From 3102abf1aabdfff798cc0d2020c07c7138b59648 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Fri, 24 Nov 2023 10:57:18 +0000 Subject: mem zero rather than store to a union where the variant is of size zero --- src/llvm_backend_utility.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index d8dbfd736..be3ae9c8a 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -57,6 +57,10 @@ gb_internal lbValue lb_correct_endianness(lbProcedure *p, lbValue value) { return value; } +gb_internal LLVMValueRef lb_mem_zero_ptr_internal(lbProcedure *p, LLVMValueRef ptr, usize len, unsigned alignment, bool is_volatile) { + return lb_mem_zero_ptr_internal(p, ptr, LLVMConstInt(lb_type(p->module, t_uint), len, false), alignment, is_volatile); +} + gb_internal LLVMValueRef lb_mem_zero_ptr_internal(lbProcedure *p, LLVMValueRef ptr, LLVMValueRef len, unsigned alignment, bool is_volatile) { bool is_inlinable = false; -- cgit v1.2.3 From c1d853a24e69689a40668c4aa036312bc871540c Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 28 Jan 2024 17:32:34 +0000 Subject: Remove dead code --- core/runtime/internal.odin | 74 -------------------------------------------- src/checker.cpp | 10 ++++-- src/llvm_backend_proc.cpp | 6 ++-- src/llvm_backend_utility.cpp | 26 ++++------------ 4 files changed, 16 insertions(+), 100 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/core/runtime/internal.odin b/core/runtime/internal.odin index d4c43ed7e..a03c2a701 100644 --- a/core/runtime/internal.odin +++ b/core/runtime/internal.odin @@ -22,50 +22,6 @@ byte_slice :: #force_inline proc "contextless" (data: rawptr, len: int) -> []byt return ([^]byte)(data)[:max(len, 0)] } -bswap_16 :: proc "contextless" (x: u16) -> u16 { - return x>>8 | x<<8 -} - -bswap_32 :: proc "contextless" (x: u32) -> u32 { - return x>>24 | (x>>8)&0xff00 | (x<<8)&0xff0000 | x<<24 -} - -bswap_64 :: proc "contextless" (x: u64) -> u64 { - z := x - z = (z & 0x00000000ffffffff) << 32 | (z & 0xffffffff00000000) >> 32 - z = (z & 0x0000ffff0000ffff) << 16 | (z & 0xffff0000ffff0000) >> 16 - z = (z & 0x00ff00ff00ff00ff) << 8 | (z & 0xff00ff00ff00ff00) >> 8 - return z -} - -bswap_128 :: proc "contextless" (x: u128) -> u128 { - z := transmute([4]u32)x - z[0], z[3] = bswap_32(z[3]), bswap_32(z[0]) - z[1], z[2] = bswap_32(z[2]), bswap_32(z[1]) - return transmute(u128)z -} - -bswap_f16 :: proc "contextless" (f: f16) -> f16 { - x := transmute(u16)f - z := bswap_16(x) - return transmute(f16)z - -} - -bswap_f32 :: proc "contextless" (f: f32) -> f32 { - x := transmute(u32)f - z := bswap_32(x) - return transmute(f32)z - -} - -bswap_f64 :: proc "contextless" (f: f64) -> f64 { - x := transmute(u64)f - z := bswap_64(x) - return transmute(f64)z -} - - is_power_of_two_int :: #force_inline proc(x: int) -> bool { if x <= 0 { return false @@ -608,36 +564,6 @@ string_decode_last_rune :: proc "contextless" (s: string) -> (rune, int) { return r, size } - -abs_f16 :: #force_inline proc "contextless" (x: f16) -> f16 { - return -x if x < 0 else x -} -abs_f32 :: #force_inline proc "contextless" (x: f32) -> f32 { - return -x if x < 0 else x -} -abs_f64 :: #force_inline proc "contextless" (x: f64) -> f64 { - return -x if x < 0 else x -} - -min_f16 :: #force_inline proc "contextless" (a, b: f16) -> f16 { - return a if a < b else b -} -min_f32 :: #force_inline proc "contextless" (a, b: f32) -> f32 { - return a if a < b else b -} -min_f64 :: #force_inline proc "contextless" (a, b: f64) -> f64 { - return a if a < b else b -} -max_f16 :: #force_inline proc "contextless" (a, b: f16) -> f16 { - return a if a > b else b -} -max_f32 :: #force_inline proc "contextless" (a, b: f32) -> f32 { - return a if a > b else b -} -max_f64 :: #force_inline proc "contextless" (a, b: f64) -> f64 { - return a if a > b else b -} - abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 { p, q := abs(real(x)), abs(imag(x)) if p < q { diff --git a/src/checker.cpp b/src/checker.cpp index 4d7514d0b..498fce7d2 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -2517,13 +2517,11 @@ gb_internal void generate_minimum_dependency_set(Checker *c, Entity *start) { // Odin internal procedures str_lit("__init_context"), - str_lit("cstring_to_string"), + // str_lit("cstring_to_string"), str_lit("_cleanup_runtime"), // Pseudo-CRT required procedures str_lit("memset"), - str_lit("memcpy"), - str_lit("memmove"), // Utility procedures str_lit("memory_equal"), @@ -2531,6 +2529,12 @@ gb_internal void generate_minimum_dependency_set(Checker *c, Entity *start) { str_lit("memory_compare_zero"), ); + // Only required if no CRT is present + FORCE_ADD_RUNTIME_ENTITIES(build_context.no_crt, + str_lit("memcpy"), + str_lit("memmove"), + ); + FORCE_ADD_RUNTIME_ENTITIES(!build_context.tilde_backend, // Extended data type internal procedures str_lit("umodti3"), diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 09bebd0cf..e0aca2c10 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2033,9 +2033,9 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu case BuiltinProc_clamp: return lb_emit_clamp(p, type_of_expr(expr), - lb_build_expr(p, ce->args[0]), - lb_build_expr(p, ce->args[1]), - lb_build_expr(p, ce->args[2])); + lb_build_expr(p, ce->args[0]), + lb_build_expr(p, ce->args[1]), + lb_build_expr(p, ce->args[2])); case BuiltinProc_soa_zip: diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index be3ae9c8a..bc5106601 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -83,27 +83,13 @@ gb_internal LLVMValueRef lb_mem_zero_ptr_internal(lbProcedure *p, LLVMValueRef p lb_type(p->module, t_rawptr), lb_type(p->module, t_int) }; - if (true || is_inlinable) { + LLVMValueRef args[4] = {}; + args[0] = LLVMBuildPointerCast(p->builder, ptr, types[0], ""); + args[1] = LLVMConstInt(LLVMInt8TypeInContext(p->module->ctx), 0, false); + args[2] = LLVMBuildIntCast2(p->builder, len, types[1], /*signed*/false, ""); + args[3] = LLVMConstInt(LLVMInt1TypeInContext(p->module->ctx), is_volatile, false); - LLVMValueRef args[4] = {}; - args[0] = LLVMBuildPointerCast(p->builder, ptr, types[0], ""); - args[1] = LLVMConstInt(LLVMInt8TypeInContext(p->module->ctx), 0, false); - args[2] = LLVMBuildIntCast2(p->builder, len, types[1], /*signed*/false, ""); - args[3] = LLVMConstInt(LLVMInt1TypeInContext(p->module->ctx), is_volatile, false); - - return lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); - } else { - lbValue pr = lb_lookup_runtime_procedure(p->module, str_lit("memset")); - - LLVMValueRef args[3] = {}; - args[0] = LLVMBuildPointerCast(p->builder, ptr, types[0], ""); - args[1] = LLVMConstInt(LLVMInt32TypeInContext(p->module->ctx), 0, false); - args[2] = LLVMBuildIntCast2(p->builder, len, types[1], /*signed*/false, ""); - - // We always get the function pointer type rather than the function and there is apparently no way around that? - LLVMTypeRef type = lb_type_internal_for_procedures_raw(p->module, pr.type); - return LLVMBuildCall2(p->builder, type, pr.value, args, gb_count_of(args), ""); - } + return lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); } -- cgit v1.2.3 From c14b9d461a5c58d4b80957682f00205714063435 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 22 Feb 2024 19:14:16 +0000 Subject: Support `using` of a `bit_field` within a `struct` --- src/check_type.cpp | 2 ++ src/llvm_backend_expr.cpp | 14 +++++++-- src/llvm_backend_utility.cpp | 2 +- src/types.cpp | 71 ++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 83 insertions(+), 6 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/check_type.cpp b/src/check_type.cpp index 41eae2178..74828f97f 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -89,6 +89,8 @@ gb_internal bool does_field_type_allow_using(Type *t) { return true; } else if (is_type_array(t)) { return t->Array.count <= 4; + } else if (is_type_bit_field(t)) { + return true; } return false; } diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 7e000c9e8..5bf2642e6 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -4679,12 +4679,20 @@ gb_internal lbAddr lb_build_addr_internal(lbProcedure *p, Ast *expr) { GB_ASSERT(sel.entity != nullptr); if (sel.is_bit_field) { lbAddr addr = lb_build_addr(p, se->expr); - Type *bf_type = base_type(type_deref(lb_addr_type(addr))); - GB_ASSERT(bf_type->kind == Type_BitField); - lbValue a = lb_addr_get_ptr(p, addr); + Selection sub_sel = sel; sub_sel.index.count -= 1; + + Type *bf_type = type_from_selection(type, sub_sel); + bf_type = base_type(type_deref(bf_type)); + GB_ASSERT(bf_type->kind == Type_BitField); + + lbValue a = lb_addr_get_ptr(p, addr); + if (sub_sel.index.count > 0) { + a = lb_emit_deep_field_gep(p, a, sub_sel); + } + i32 index = sel.index[sel.index.count-1]; Entity *f = bf_type->BitField.fields[index]; diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index bc5106601..5bd3cd8e2 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1332,7 +1332,7 @@ gb_internal lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection if (index == 0) { type = t_rawptr; } else if (index == 1) { - type = t_type_info_ptr; + type = t_typeid; } e = lb_emit_struct_ep(p, e, index); break; diff --git a/src/types.cpp b/src/types.cpp index be4b8944b..3945c7111 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -385,6 +385,9 @@ enum : int { gb_internal bool is_type_comparable(Type *t); gb_internal bool is_type_simple_compare(Type *t); +gb_internal Type *type_deref(Type *t, bool allow_multi_pointer=false); +gb_internal Type *base_type(Type *t); +gb_internal Type *alloc_type_multi_pointer(Type *elem); gb_internal u32 type_info_flags_of_type(Type *type) { if (type == nullptr) { @@ -762,7 +765,6 @@ gb_internal bool is_type_proc(Type *t); gb_internal bool is_type_slice(Type *t); gb_internal bool is_type_integer(Type *t); gb_internal bool type_set_offsets(Type *t); -gb_internal Type *base_type(Type *t); gb_internal i64 type_size_of_internal(Type *t, TypePath *path); gb_internal i64 type_align_of_internal(Type *t, TypePath *path); @@ -1157,7 +1159,7 @@ gb_internal Type *alloc_type_simd_vector(i64 count, Type *elem, Type *generic_co //////////////////////////////////////////////////////////////// -gb_internal Type *type_deref(Type *t, bool allow_multi_pointer=false) { +gb_internal Type *type_deref(Type *t, bool allow_multi_pointer) { if (t != nullptr) { Type *bt = base_type(t); if (bt == nullptr) { @@ -4261,6 +4263,71 @@ gb_internal Type *alloc_type_proc_from_types(Type **param_types, unsigned param_ } +gb_internal Type *type_from_selection(Type *type, Selection const &sel) { + for (i32 index : sel.index) { + Type *bt = base_type(type_deref(type)); + switch (bt->kind) { + case Type_Struct: + type = bt->Struct.fields[index]->type; + break; + case Type_Tuple: + type = bt->Tuple.variables[index]->type; + break; + case Type_BitField: + type = bt->BitField.fields[index]->type; + break; + case Type_Array: + type = bt->Array.elem; + break; + case Type_EnumeratedArray: + type = bt->Array.elem; + break; + case Type_Slice: + switch (index) { + case 0: type = alloc_type_multi_pointer(bt->Slice.elem); break; + case 1: type = t_int; break; + } + break; + case Type_DynamicArray: + switch (index) { + case 0: type = alloc_type_multi_pointer(bt->DynamicArray.elem); break; + case 1: type = t_int; break; + case 2: type = t_int; break; + case 3: type = t_allocator; break; + } + break; + case Type_Map: + switch (index) { + case 0: type = t_uintptr; break; + case 1: type = t_int; break; + case 2: type = t_allocator; break; + } + break; + case Type_Basic: + if (is_type_complex_or_quaternion(bt)) { + type = base_complex_elem_type(bt); + } else { + switch (type->Basic.kind) { + case Basic_any: + switch (index) { + case 0: type = t_rawptr; break; + case 1: type = t_typeid; break; + } + break; + case Basic_string: + switch (index) { + case 0: type = t_u8_multi_ptr; break; + case 1: type = t_int; break; + } + break; + } + } + break; + } + } + return type; +} + gb_internal gbString write_type_to_string(gbString str, Type *type, bool shorthand=false) { if (type == nullptr) { -- cgit v1.2.3 From a1ee9e70352c6ab0402f98c3b84f8b3f0c86e6f9 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 6 Mar 2024 15:04:46 +0000 Subject: Change min/max runtime behaviour to match IEEE 754-2019 --- src/llvm_backend_utility.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 5bd3cd8e2..0f5d7fb43 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -124,11 +124,29 @@ gb_internal lbValue lb_emit_select(lbProcedure *p, lbValue cond, lbValue x, lbVa gb_internal lbValue lb_emit_min(lbProcedure *p, Type *t, lbValue x, lbValue y) { x = lb_emit_conv(p, x, t); y = lb_emit_conv(p, y, t); + if (is_type_float(t)) { + // NOTE(bill): f either operand is a NaN, returns NaN. Otherwise returns the lesser of the two arguments. + // -0.0 is considered to be less than +0.0 for this intrinsic. + // These semantics are specified by IEEE 754-2019. + LLVMValueRef args[2] = {x.value, y.value}; + LLVMTypeRef types[1] = {lb_type(p->module, t)}; + LLVMValueRef v = lb_call_intrinsic(p, "llvm.minimum", args, gb_count_of(args), types, gb_count_of(types)); + return {v, t}; + } return lb_emit_select(p, lb_emit_comp(p, Token_Lt, x, y), x, y); } gb_internal lbValue lb_emit_max(lbProcedure *p, Type *t, lbValue x, lbValue y) { x = lb_emit_conv(p, x, t); y = lb_emit_conv(p, y, t); + if (is_type_float(t)) { + // NOTE(bill): If either operand is a NaN, returns NaN. Otherwise returns the greater of the two arguments. + // -0.0 is considered to be less than +0.0 for this intrinsic. + // These semantics are specified by IEEE 754-2019. + LLVMValueRef args[2] = {x.value, y.value}; + LLVMTypeRef types[1] = {lb_type(p->module, t)}; + LLVMValueRef v = lb_call_intrinsic(p, "llvm.maximum", args, gb_count_of(args), types, gb_count_of(types)); + return {v, t}; + } return lb_emit_select(p, lb_emit_comp(p, Token_Gt, x, y), x, y); } -- cgit v1.2.3 From a7bab89c934d181ddbfa6e17103b2587581ee5e9 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 6 Mar 2024 15:07:21 +0000 Subject: Unify min/max semantics for simd_(min|max) --- src/llvm_backend_proc.cpp | 6 ++---- src/llvm_backend_utility.cpp | 6 ++++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 2c94222cf..fba7eb381 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1399,8 +1399,7 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn return res; case BuiltinProc_simd_min: if (is_float) { - LLVMValueRef cond = LLVMBuildFCmp(p->builder, LLVMRealOLT, arg0.value, arg1.value, ""); - res.value = LLVMBuildSelect(p->builder, cond, arg0.value, arg1.value, ""); + return lb_emit_min(p, res.type, arg0, arg1); } else { LLVMValueRef cond = LLVMBuildICmp(p->builder, is_signed ? LLVMIntSLT : LLVMIntULT, arg0.value, arg1.value, ""); res.value = LLVMBuildSelect(p->builder, cond, arg0.value, arg1.value, ""); @@ -1408,8 +1407,7 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn return res; case BuiltinProc_simd_max: if (is_float) { - LLVMValueRef cond = LLVMBuildFCmp(p->builder, LLVMRealOGT, arg0.value, arg1.value, ""); - res.value = LLVMBuildSelect(p->builder, cond, arg0.value, arg1.value, ""); + return lb_emit_max(p, res.type, arg0, arg1); } else { LLVMValueRef cond = LLVMBuildICmp(p->builder, is_signed ? LLVMIntSGT : LLVMIntUGT, arg0.value, arg1.value, ""); res.value = LLVMBuildSelect(p->builder, cond, arg0.value, arg1.value, ""); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 0f5d7fb43..2c80f9c6a 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -124,7 +124,8 @@ gb_internal lbValue lb_emit_select(lbProcedure *p, lbValue cond, lbValue x, lbVa gb_internal lbValue lb_emit_min(lbProcedure *p, Type *t, lbValue x, lbValue y) { x = lb_emit_conv(p, x, t); y = lb_emit_conv(p, y, t); - if (is_type_float(t)) { + bool use_llvm_intrinsic = is_type_float(t) || (is_type_simd_vector(t) && is_type_float(base_array_type(t))); + if (use_llvm_intrinsic) { // NOTE(bill): f either operand is a NaN, returns NaN. Otherwise returns the lesser of the two arguments. // -0.0 is considered to be less than +0.0 for this intrinsic. // These semantics are specified by IEEE 754-2019. @@ -138,7 +139,8 @@ gb_internal lbValue lb_emit_min(lbProcedure *p, Type *t, lbValue x, lbValue y) { gb_internal lbValue lb_emit_max(lbProcedure *p, Type *t, lbValue x, lbValue y) { x = lb_emit_conv(p, x, t); y = lb_emit_conv(p, y, t); - if (is_type_float(t)) { + bool use_llvm_intrinsic = is_type_float(t) || (is_type_simd_vector(t) && is_type_float(base_array_type(t))); + if (use_llvm_intrinsic) { // NOTE(bill): If either operand is a NaN, returns NaN. Otherwise returns the greater of the two arguments. // -0.0 is considered to be less than +0.0 for this intrinsic. // These semantics are specified by IEEE 754-2019. -- cgit v1.2.3 From b1903b915bbfe4e325589fc70bbf1c1663fdae7d Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 6 Mar 2024 15:16:11 +0000 Subject: Change to IEEE 754-2008 conformance for `min`/`max` runtime operations. --- src/llvm_backend_utility.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 2c80f9c6a..f18aa5521 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -126,12 +126,13 @@ gb_internal lbValue lb_emit_min(lbProcedure *p, Type *t, lbValue x, lbValue y) { y = lb_emit_conv(p, y, t); bool use_llvm_intrinsic = is_type_float(t) || (is_type_simd_vector(t) && is_type_float(base_array_type(t))); if (use_llvm_intrinsic) { - // NOTE(bill): f either operand is a NaN, returns NaN. Otherwise returns the lesser of the two arguments. - // -0.0 is considered to be less than +0.0 for this intrinsic. - // These semantics are specified by IEEE 754-2019. LLVMValueRef args[2] = {x.value, y.value}; LLVMTypeRef types[1] = {lb_type(p->module, t)}; - LLVMValueRef v = lb_call_intrinsic(p, "llvm.minimum", args, gb_count_of(args), types, gb_count_of(types)); + + // NOTE(bill): f either operand is a NaN, returns NaN. Otherwise returns the lesser of the two arguments. + // -0.0 is considered to be less than +0.0 for this intrinsic. + // These semantics are specified by IEEE 754-2008. + LLVMValueRef v = lb_call_intrinsic(p, "llvm.minnum", args, gb_count_of(args), types, gb_count_of(types)); return {v, t}; } return lb_emit_select(p, lb_emit_comp(p, Token_Lt, x, y), x, y); @@ -141,12 +142,13 @@ gb_internal lbValue lb_emit_max(lbProcedure *p, Type *t, lbValue x, lbValue y) { y = lb_emit_conv(p, y, t); bool use_llvm_intrinsic = is_type_float(t) || (is_type_simd_vector(t) && is_type_float(base_array_type(t))); if (use_llvm_intrinsic) { - // NOTE(bill): If either operand is a NaN, returns NaN. Otherwise returns the greater of the two arguments. - // -0.0 is considered to be less than +0.0 for this intrinsic. - // These semantics are specified by IEEE 754-2019. LLVMValueRef args[2] = {x.value, y.value}; LLVMTypeRef types[1] = {lb_type(p->module, t)}; - LLVMValueRef v = lb_call_intrinsic(p, "llvm.maximum", args, gb_count_of(args), types, gb_count_of(types)); + + // NOTE(bill): If either operand is a NaN, returns NaN. Otherwise returns the greater of the two arguments. + // -0.0 is considered to be less than +0.0 for this intrinsic. + // These semantics are specified by IEEE 754-2008. + LLVMValueRef v = lb_call_intrinsic(p, "llvm.maxnum", args, gb_count_of(args), types, gb_count_of(types)); return {v, t}; } return lb_emit_select(p, lb_emit_comp(p, Token_Gt, x, y), x, y); -- cgit v1.2.3 From a750fc0ba63c9f1461bba4cc0446b1b4c2d2b3a9 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 19 Mar 2024 21:05:23 +0000 Subject: Add `#row_major matrix[R, C]T` As well as `#column_major matrix[R, C]T` as an alias for just `matrix[R, C]T`. This is because some libraries require a row_major internal layout but still want to be used with row or major oriented vectors. --- base/runtime/core.odin | 4 ++++ core/fmt/fmt.odin | 12 ++++++++++-- core/reflect/types.odin | 4 ++++ src/check_expr.cpp | 12 +++++++++--- src/check_type.cpp | 2 +- src/llvm_backend_const.cpp | 4 ++-- src/llvm_backend_expr.cpp | 39 +++++++++++++++++++++++++++++---------- src/llvm_backend_type.cpp | 3 ++- src/llvm_backend_utility.cpp | 22 +++++++++++++++------- src/parser.cpp | 13 +++++++++++++ src/parser.hpp | 1 + src/types.cpp | 19 +++++++++++++++---- 12 files changed, 105 insertions(+), 30 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/base/runtime/core.odin b/base/runtime/core.odin index 8f27ca674..7ad3ef1d6 100644 --- a/base/runtime/core.odin +++ b/base/runtime/core.odin @@ -177,6 +177,10 @@ Type_Info_Matrix :: struct { row_count: int, column_count: int, // Total element count = column_count * elem_stride + layout: enum u8 { + Column_Major, // array of column vectors + Row_Major, // array of row vectors + }, } Type_Info_Soa_Pointer :: struct { elem: ^Type_Info, diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index 02803f882..6f9801bc8 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -2396,7 +2396,11 @@ fmt_matrix :: proc(fi: ^Info, v: any, verb: rune, info: runtime.Type_Info_Matrix for col in 0.. 0 { io.write_string(fi.writer, ", ", &fi.n) } - offset := (row + col*info.elem_stride)*info.elem_size + offset: int + switch info.layout { + case .Column_Major: offset = (row + col*info.elem_stride)*info.elem_size + case .Row_Major: offset = (col + row*info.elem_stride)*info.elem_size + } data := uintptr(v.data) + uintptr(offset) fmt_arg(fi, any{rawptr(data), info.elem.id}, verb) @@ -2410,7 +2414,11 @@ fmt_matrix :: proc(fi: ^Info, v: any, verb: rune, info: runtime.Type_Info_Matrix for col in 0.. 0 { io.write_string(fi.writer, ", ", &fi.n) } - offset := (row + col*info.elem_stride)*info.elem_size + offset: int + switch info.layout { + case .Column_Major: offset = (row + col*info.elem_stride)*info.elem_size + case .Row_Major: offset = (col + row*info.elem_stride)*info.elem_size + } data := uintptr(v.data) + uintptr(offset) fmt_arg(fi, any{rawptr(data), info.elem.id}, verb) diff --git a/core/reflect/types.odin b/core/reflect/types.odin index 2b96dd4fb..9cff46a00 100644 --- a/core/reflect/types.odin +++ b/core/reflect/types.odin @@ -173,6 +173,7 @@ are_types_identical :: proc(a, b: ^Type_Info) -> bool { y := b.variant.(Type_Info_Matrix) or_return if x.row_count != y.row_count { return false } if x.column_count != y.column_count { return false } + if x.layout != y.layout { return false } return are_types_identical(x.elem, y.elem) case Type_Info_Bit_Field: @@ -689,6 +690,9 @@ write_type_writer :: proc(w: io.Writer, ti: ^Type_Info, n_written: ^int = nil) - write_type(w, info.pointer, &n) or_return case Type_Info_Matrix: + if info.layout == .Row_Major { + io.write_string(w, "#row_major ", &n) or_return + } io.write_string(w, "matrix[", &n) or_return io.write_i64(w, i64(info.row_count), 10, &n) or_return io.write_string(w, ", ", &n) or_return diff --git a/src/check_expr.cpp b/src/check_expr.cpp index f359d5a54..67c7f1a04 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -3431,6 +3431,11 @@ gb_internal void check_binary_matrix(CheckerContext *c, Token const &op, Operand if (xt->Matrix.column_count != yt->Matrix.row_count) { goto matrix_error; } + + if (xt->Matrix.is_row_major != yt->Matrix.is_row_major) { + goto matrix_error; + } + x->mode = Addressing_Value; if (are_types_identical(xt, yt)) { if (!is_type_named(x->type) && is_type_named(y->type)) { @@ -3438,7 +3443,8 @@ gb_internal void check_binary_matrix(CheckerContext *c, Token const &op, Operand x->type = y->type; } } else { - x->type = alloc_type_matrix(xt->Matrix.elem, xt->Matrix.row_count, yt->Matrix.column_count); + bool is_row_major = xt->Matrix.is_row_major && yt->Matrix.is_row_major; + x->type = alloc_type_matrix(xt->Matrix.elem, xt->Matrix.row_count, yt->Matrix.column_count, nullptr, nullptr, is_row_major); } goto matrix_success; } else if (yt->kind == Type_Array) { @@ -3452,7 +3458,7 @@ gb_internal void check_binary_matrix(CheckerContext *c, Token const &op, Operand // Treat arrays as column vectors x->mode = Addressing_Value; - if (type_hint == nullptr && xt->Matrix.row_count == yt->Array.count) { + if (xt->Matrix.row_count == yt->Array.count) { x->type = y->type; } else { x->type = alloc_type_matrix(xt->Matrix.elem, xt->Matrix.row_count, 1); @@ -3483,7 +3489,7 @@ gb_internal void check_binary_matrix(CheckerContext *c, Token const &op, Operand // Treat arrays as row vectors x->mode = Addressing_Value; - if (type_hint == nullptr && yt->Matrix.column_count == xt->Array.count) { + if (yt->Matrix.column_count == xt->Array.count) { x->type = x->type; } else { x->type = alloc_type_matrix(yt->Matrix.elem, 1, yt->Matrix.column_count); diff --git a/src/check_type.cpp b/src/check_type.cpp index d5cf187a4..3fe633892 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -2658,7 +2658,7 @@ gb_internal void check_matrix_type(CheckerContext *ctx, Type **type, Ast *node) } type_assign:; - *type = alloc_type_matrix(elem, row_count, column_count, generic_row, generic_column); + *type = alloc_type_matrix(elem, row_count, column_count, generic_row, generic_column, mt->is_row_major); return; } diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index 2291f24ac..bbb0b8387 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -1302,11 +1302,11 @@ gb_internal lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bo GB_ASSERT_MSG(elem_count == max_count, "%td != %td", elem_count, max_count); LLVMValueRef *values = gb_alloc_array(temporary_allocator(), LLVMValueRef, cast(isize)total_count); - for_array(i, cl->elems) { TypeAndValue tav = cl->elems[i]->tav; GB_ASSERT(tav.mode != Addressing_Invalid); - i64 offset = matrix_row_major_index_to_offset(type, i); + i64 offset = 0; + offset = matrix_row_major_index_to_offset(type, i); values[offset] = lb_const_value(m, elem_type, tav.value, allow_local).value; } for (isize i = 0; i < total_count; i++) { diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 98618798b..6eb8fdcc6 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -684,12 +684,6 @@ gb_internal lbValue lb_emit_matrix_flatten(lbProcedure *p, lbValue m, Type *type Type *mt = base_type(m.type); GB_ASSERT(mt->kind == Type_Matrix); - // TODO(bill): Determine why this fails on Windows sometimes - if (false && lb_is_matrix_simdable(mt)) { - LLVMValueRef vector = lb_matrix_to_trimmed_vector(p, m); - return lb_matrix_cast_vector_to_type(p, vector, type); - } - lbAddr res = lb_add_local_generated(p, type, true); i64 row_count = mt->Matrix.row_count; @@ -763,6 +757,7 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, GB_ASSERT(is_type_matrix(yt)); GB_ASSERT(xt->Matrix.column_count == yt->Matrix.row_count); GB_ASSERT(are_types_identical(xt->Matrix.elem, yt->Matrix.elem)); + GB_ASSERT(xt->Matrix.is_row_major == yt->Matrix.is_row_major); Type *elem = xt->Matrix.elem; @@ -770,7 +765,7 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, unsigned inner = cast(unsigned)xt->Matrix.column_count; unsigned outer_columns = cast(unsigned)yt->Matrix.column_count; - if (lb_is_matrix_simdable(xt)) { + if (!xt->Matrix.is_row_major && lb_is_matrix_simdable(xt)) { unsigned x_stride = cast(unsigned)matrix_type_stride_in_elems(xt); unsigned y_stride = cast(unsigned)matrix_type_stride_in_elems(yt); @@ -812,7 +807,7 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, return lb_addr_load(p, res); } - { + if (!xt->Matrix.is_row_major) { lbAddr res = lb_add_local_generated(p, type, true); auto inners = slice_make(permanent_allocator(), inner); @@ -835,6 +830,30 @@ gb_internal lbValue lb_emit_matrix_mul(lbProcedure *p, lbValue lhs, lbValue rhs, } } + return lb_addr_load(p, res); + } else { + lbAddr res = lb_add_local_generated(p, type, true); + + auto inners = slice_make(permanent_allocator(), inner); + + for (unsigned i = 0; i < outer_rows; i++) { + for (unsigned j = 0; j < outer_columns; j++) { + lbValue dst = lb_emit_matrix_epi(p, res.addr, i, j); + for (unsigned k = 0; k < inner; k++) { + inners[k][0] = lb_emit_matrix_ev(p, lhs, i, k); + inners[k][1] = lb_emit_matrix_ev(p, rhs, k, j); + } + + lbValue sum = lb_const_nil(p->module, elem); + for (unsigned k = 0; k < inner; k++) { + lbValue a = inners[k][0]; + lbValue b = inners[k][1]; + sum = lb_emit_mul_add(p, a, b, sum, elem); + } + lb_emit_store(p, dst, sum); + } + } + return lb_addr_load(p, res); } } @@ -855,7 +874,7 @@ gb_internal lbValue lb_emit_matrix_mul_vector(lbProcedure *p, lbValue lhs, lbVal Type *elem = mt->Matrix.elem; - if (lb_is_matrix_simdable(mt)) { + if (!mt->Matrix.is_row_major && lb_is_matrix_simdable(mt)) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); unsigned row_count = cast(unsigned)mt->Matrix.row_count; @@ -924,7 +943,7 @@ gb_internal lbValue lb_emit_vector_mul_matrix(lbProcedure *p, lbValue lhs, lbVal Type *elem = mt->Matrix.elem; - if (lb_is_matrix_simdable(mt)) { + if (!mt->Matrix.is_row_major && lb_is_matrix_simdable(mt)) { unsigned stride = cast(unsigned)matrix_type_stride_in_elems(mt); unsigned row_count = cast(unsigned)mt->Matrix.row_count; diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index aec1fb201..de83f5058 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -979,12 +979,13 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ tag_type = t_type_info_matrix; i64 ez = type_size_of(t->Matrix.elem); - LLVMValueRef vals[5] = { + LLVMValueRef vals[6] = { get_type_info_ptr(m, t->Matrix.elem), lb_const_int(m, t_int, ez).value, lb_const_int(m, t_int, matrix_type_stride_in_elems(t)).value, lb_const_int(m, t_int, t->Matrix.row_count).value, lb_const_int(m, t_int, t->Matrix.column_count).value, + lb_const_int(m, t_u8, cast(u8)t->Matrix.is_row_major).value, }; variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index f18aa5521..fb61c41c3 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1464,14 +1464,16 @@ gb_internal lbValue lb_emit_matrix_epi(lbProcedure *p, lbValue s, isize row, isi Type *t = s.type; GB_ASSERT(is_type_pointer(t)); Type *mt = base_type(type_deref(t)); - if (column == 0) { - GB_ASSERT_MSG(is_type_matrix(mt) || is_type_array_like(mt), "%s", type_to_string(mt)); - return lb_emit_epi(p, s, row); - } else if (row == 0 && is_type_array_like(mt)) { - return lb_emit_epi(p, s, column); + + if (!mt->Matrix.is_row_major) { + if (column == 0) { + GB_ASSERT_MSG(is_type_matrix(mt) || is_type_array_like(mt), "%s", type_to_string(mt)); + return lb_emit_epi(p, s, row); + } else if (row == 0 && is_type_array_like(mt)) { + return lb_emit_epi(p, s, column); + } } - GB_ASSERT_MSG(is_type_matrix(mt), "%s", type_to_string(mt)); isize offset = matrix_indices_to_offset(mt, row, column); @@ -1491,7 +1493,13 @@ gb_internal lbValue lb_emit_matrix_ep(lbProcedure *p, lbValue s, lbValue row, lb row = lb_emit_conv(p, row, t_int); column = lb_emit_conv(p, column, t_int); - LLVMValueRef index = LLVMBuildAdd(p->builder, row.value, LLVMBuildMul(p->builder, column.value, stride_elems, ""), ""); + LLVMValueRef index = nullptr; + + if (mt->Matrix.is_row_major) { + index = LLVMBuildAdd(p->builder, column.value, LLVMBuildMul(p->builder, row.value, stride_elems, ""), ""); + } else { + index = LLVMBuildAdd(p->builder, row.value, LLVMBuildMul(p->builder, column.value, stride_elems, ""), ""); + } LLVMValueRef indices[2] = { LLVMConstInt(lb_type(p->module, t_int), 0, false), diff --git a/src/parser.cpp b/src/parser.cpp index 1aa40ccbf..eb9e73342 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -2329,6 +2329,19 @@ gb_internal Ast *parse_operand(AstFile *f, bool lhs) { break; } return original_type; + } else if (name.string == "row_major" || + name.string == "column_major") { + Ast *original_type = parse_type(f); + Ast *type = unparen_expr(original_type); + switch (type->kind) { + case Ast_MatrixType: + type->MatrixType.is_row_major = (name.string == "row_major"); + break; + default: + syntax_error(type, "Expected a matrix type after #%.*s, got %.*s", LIT(name.string), LIT(ast_strings[type->kind])); + break; + } + return original_type; } else if (name.string == "partial") { Ast *tag = ast_basic_directive(f, token, name); Ast *original_expr = parse_expr(f, lhs); diff --git a/src/parser.hpp b/src/parser.hpp index f5997c4bd..ff3c5eb34 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -772,6 +772,7 @@ AST_KIND(_TypeBegin, "", bool) \ Ast *row_count; \ Ast *column_count; \ Ast *elem; \ + bool is_row_major; \ }) \ AST_KIND(_TypeEnd, "", bool) diff --git a/src/types.cpp b/src/types.cpp index 5a3ad5d6b..ab5e4de03 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -281,6 +281,7 @@ struct TypeProc { Type *generic_row_count; \ Type *generic_column_count; \ i64 stride_in_bytes; \ + bool is_row_major; \ }) \ TYPE_KIND(BitField, struct { \ Scope * scope; \ @@ -1002,7 +1003,7 @@ gb_internal Type *alloc_type_array(Type *elem, i64 count, Type *generic_count = return t; } -gb_internal Type *alloc_type_matrix(Type *elem, i64 row_count, i64 column_count, Type *generic_row_count = nullptr, Type *generic_column_count = nullptr) { +gb_internal Type *alloc_type_matrix(Type *elem, i64 row_count, i64 column_count, Type *generic_row_count = nullptr, Type *generic_column_count = nullptr, bool is_row_major = false) { if (generic_row_count != nullptr || generic_column_count != nullptr) { Type *t = alloc_type(Type_Matrix); t->Matrix.elem = elem; @@ -1010,12 +1011,14 @@ gb_internal Type *alloc_type_matrix(Type *elem, i64 row_count, i64 column_count, t->Matrix.column_count = column_count; t->Matrix.generic_row_count = generic_row_count; t->Matrix.generic_column_count = generic_column_count; + t->Matrix.is_row_major = is_row_major; return t; } Type *t = alloc_type(Type_Matrix); t->Matrix.elem = elem; t->Matrix.row_count = row_count; t->Matrix.column_count = column_count; + t->Matrix.is_row_major = is_row_major; return t; } @@ -1512,14 +1515,18 @@ gb_internal i64 matrix_indices_to_offset(Type *t, i64 row_index, i64 column_inde GB_ASSERT(0 <= row_index && row_index < t->Matrix.row_count); GB_ASSERT(0 <= column_index && column_index < t->Matrix.column_count); i64 stride_elems = matrix_type_stride_in_elems(t); - // NOTE(bill): Column-major layout internally - return row_index + stride_elems*column_index; + if (t->Matrix.is_row_major) { + return column_index + stride_elems*row_index; + } else { + // NOTE(bill): Column-major layout internally + return row_index + stride_elems*column_index; + } } gb_internal i64 matrix_row_major_index_to_offset(Type *t, i64 index) { t = base_type(t); GB_ASSERT(t->kind == Type_Matrix); - + i64 row_index = index/t->Matrix.column_count; i64 column_index = index%t->Matrix.column_count; return matrix_indices_to_offset(t, row_index, column_index); @@ -2690,6 +2697,7 @@ gb_internal bool are_types_identical_internal(Type *x, Type *y, bool check_tuple case Type_Matrix: return x->Matrix.row_count == y->Matrix.row_count && x->Matrix.column_count == y->Matrix.column_count && + x->Matrix.is_row_major == y->Matrix.is_row_major && are_types_identical(x->Matrix.elem, y->Matrix.elem); case Type_DynamicArray: @@ -4735,6 +4743,9 @@ gb_internal gbString write_type_to_string(gbString str, Type *type, bool shortha break; case Type_Matrix: + if (type->Matrix.is_row_major) { + str = gb_string_appendc(str, "#row_major "); + } str = gb_string_appendc(str, gb_bprintf("matrix[%d, %d]", cast(int)type->Matrix.row_count, cast(int)type->Matrix.column_count)); str = write_type_to_string(str, type->Matrix.elem); break; -- cgit v1.2.3 From df526549e215e9d2ccda3f4eddcf8014c9ba8ebd Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 26 Mar 2024 14:31:28 +0000 Subject: Fix `min`/`max` for wasm --- src/llvm_backend_utility.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index fb61c41c3..865c3f1ec 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -124,7 +124,7 @@ gb_internal lbValue lb_emit_select(lbProcedure *p, lbValue cond, lbValue x, lbVa gb_internal lbValue lb_emit_min(lbProcedure *p, Type *t, lbValue x, lbValue y) { x = lb_emit_conv(p, x, t); y = lb_emit_conv(p, y, t); - bool use_llvm_intrinsic = is_type_float(t) || (is_type_simd_vector(t) && is_type_float(base_array_type(t))); + bool use_llvm_intrinsic = !is_arch_wasm() && (is_type_float(t) || (is_type_simd_vector(t) && is_type_float(base_array_type(t)))); if (use_llvm_intrinsic) { LLVMValueRef args[2] = {x.value, y.value}; LLVMTypeRef types[1] = {lb_type(p->module, t)}; @@ -140,7 +140,7 @@ gb_internal lbValue lb_emit_min(lbProcedure *p, Type *t, lbValue x, lbValue y) { gb_internal lbValue lb_emit_max(lbProcedure *p, Type *t, lbValue x, lbValue y) { x = lb_emit_conv(p, x, t); y = lb_emit_conv(p, y, t); - bool use_llvm_intrinsic = is_type_float(t) || (is_type_simd_vector(t) && is_type_float(base_array_type(t))); + bool use_llvm_intrinsic = !is_arch_wasm() && (is_type_float(t) || (is_type_simd_vector(t) && is_type_float(base_array_type(t)))); if (use_llvm_intrinsic) { LLVMValueRef args[2] = {x.value, y.value}; LLVMTypeRef types[1] = {lb_type(p->module, t)}; -- cgit v1.2.3 From a61ae7c861fa301684ee1582507061317b11426b Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 16 Apr 2024 13:31:49 +0100 Subject: Fix #3427 --- src/check_type.cpp | 20 ++++++++++++++------ src/checker.hpp | 5 ++++- src/llvm_backend_general.cpp | 2 +- src/llvm_backend_type.cpp | 2 +- src/llvm_backend_utility.cpp | 4 ++-- src/types.cpp | 1 - 6 files changed, 22 insertions(+), 12 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/check_type.cpp b/src/check_type.cpp index f4e5d7c96..3bb1a4fd1 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -2495,18 +2495,16 @@ gb_internal Type *get_map_cell_type(Type *type) { return s; } -gb_internal void init_map_internal_types(Type *type) { +gb_internal void init_map_internal_debug_types(Type *type) { GB_ASSERT(type->kind == Type_Map); GB_ASSERT(t_allocator != nullptr); - if (type->Map.lookup_result_type != nullptr) return; + if (type->Map.debug_metadata_type != nullptr) return; Type *key = type->Map.key; Type *value = type->Map.value; GB_ASSERT(key != nullptr); GB_ASSERT(value != nullptr); - - Type *key_cell = get_map_cell_type(key); Type *value_cell = get_map_cell_type(value); @@ -2541,6 +2539,18 @@ gb_internal void init_map_internal_types(Type *type) { gb_unused(type_size_of(debug_type)); type->Map.debug_metadata_type = debug_type; +} + + +gb_internal void init_map_internal_types(Type *type) { + GB_ASSERT(type->kind == Type_Map); + GB_ASSERT(t_allocator != nullptr); + if (type->Map.lookup_result_type != nullptr) return; + + Type *key = type->Map.key; + Type *value = type->Map.value; + GB_ASSERT(key != nullptr); + GB_ASSERT(value != nullptr); type->Map.lookup_result_type = make_optional_ok_type(value); } @@ -2613,8 +2623,6 @@ gb_internal void check_map_type(CheckerContext *ctx, Type *type, Ast *node) { init_core_map_type(ctx->checker); init_map_internal_types(type); - - // error(node, "'map' types are not yet implemented"); } gb_internal void check_matrix_type(CheckerContext *ctx, Type **type, Ast *node) { diff --git a/src/checker.hpp b/src/checker.hpp index 1701da58d..2ade9312e 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -563,4 +563,7 @@ gb_internal void init_mem_allocator(Checker *c); gb_internal void add_untyped_expressions(CheckerInfo *cinfo, UntypedExprInfoMap *untyped); -gb_internal GenTypesData *ensure_polymorphic_record_entity_has_gen_types(CheckerContext *ctx, Type *original_type); \ No newline at end of file +gb_internal GenTypesData *ensure_polymorphic_record_entity_has_gen_types(CheckerContext *ctx, Type *original_type); + + +gb_internal void init_map_internal_types(Type *type); \ No newline at end of file diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 73e4a00e6..7a5ed5635 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -2070,7 +2070,7 @@ gb_internal LLVMTypeRef lb_type_internal(lbModule *m, Type *type) { break; case Type_Map: - init_map_internal_types(type); + init_map_internal_debug_types(type); GB_ASSERT(t_raw_map != nullptr); return lb_type_internal(m, t_raw_map); diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index 93e2874a5..0bac2f732 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -903,7 +903,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ case Type_Map: { tag_type = t_type_info_map; - init_map_internal_types(t); + init_map_internal_debug_types(t); LLVMValueRef vals[3] = { get_type_info_ptr(m, t->Map.key), diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 865c3f1ec..0d1db2cbf 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1125,7 +1125,7 @@ gb_internal lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) { case 3: result_type = t_allocator; break; } } else if (is_type_map(t)) { - init_map_internal_types(t); + init_map_internal_debug_types(t); Type *itp = alloc_type_pointer(t_raw_map); s = lb_emit_transmute(p, s, itp); @@ -1264,7 +1264,7 @@ gb_internal lbValue lb_emit_struct_ev(lbProcedure *p, lbValue s, i32 index) { case Type_Map: { - init_map_internal_types(t); + init_map_internal_debug_types(t); switch (index) { case 0: result_type = get_struct_field_type(t_raw_map, 0); break; case 1: result_type = get_struct_field_type(t_raw_map, 1); break; diff --git a/src/types.cpp b/src/types.cpp index 97512d29b..18cb12ea1 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -769,7 +769,6 @@ gb_internal gbString type_to_string (Type *type, bool shorthand=true); gb_internal gbString type_to_string (Type *type, gbAllocator allocator, bool shorthand=true); gb_internal i64 type_size_of_internal(Type *t, TypePath *path); gb_internal i64 type_align_of_internal(Type *t, TypePath *path); -gb_internal void init_map_internal_types(Type *type); gb_internal Type * bit_set_to_int(Type *t); gb_internal bool are_types_identical(Type *x, Type *y); -- cgit v1.2.3 From ece78d22d2b549116a0884d3578972b8f389f983 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 11:22:31 +0100 Subject: Add `-no-type-assert` and `ODIN_NO_TYPE_ASSERT` --- src/build_settings.cpp | 1 + src/checker.cpp | 1 + src/llvm_backend_expr.cpp | 4 +-- src/llvm_backend_utility.cpp | 72 +++++++++++++++++++++++--------------------- src/main.cpp | 9 ++++++ 5 files changed, 51 insertions(+), 36 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 106ae8a28..b806adcd6 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -382,6 +382,7 @@ struct BuildContext { bool keep_temp_files; bool ignore_unknown_attributes; bool no_bounds_check; + bool no_type_assert; bool no_dynamic_literals; bool no_output_files; bool no_crt; diff --git a/src/checker.cpp b/src/checker.cpp index e82836b2a..b7fe2b903 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1112,6 +1112,7 @@ gb_internal void init_universal(void) { add_global_bool_constant("ODIN_DISABLE_ASSERT", bc->ODIN_DISABLE_ASSERT); add_global_bool_constant("ODIN_DEFAULT_TO_NIL_ALLOCATOR", bc->ODIN_DEFAULT_TO_NIL_ALLOCATOR); add_global_bool_constant("ODIN_NO_BOUNDS_CHECK", build_context.no_bounds_check); + add_global_bool_constant("ODIN_NO_TYPE_ASSERT", build_context.no_type_assert); add_global_bool_constant("ODIN_DEFAULT_TO_PANIC_ALLOCATOR", bc->ODIN_DEFAULT_TO_PANIC_ALLOCATOR); add_global_bool_constant("ODIN_NO_DYNAMIC_LITERALS", bc->no_dynamic_literals); add_global_bool_constant("ODIN_NO_CRT", bc->no_crt); diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index ad28f2e5e..edd5daeca 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -3116,7 +3116,7 @@ gb_internal lbValue lb_build_unary_and(lbProcedure *p, Ast *expr) { Type *dst_type = type; - if ((p->state_flags & StateFlag_no_type_assert) == 0) { + if (!build_context.no_type_assert && (p->state_flags & StateFlag_no_type_assert) == 0) { lbValue src_tag = {}; lbValue dst_tag = {}; if (is_type_union_maybe_pointer(src_type)) { @@ -3156,7 +3156,7 @@ gb_internal lbValue lb_build_unary_and(lbProcedure *p, Ast *expr) { v = lb_emit_load(p, v); } lbValue data_ptr = lb_emit_struct_ev(p, v, 0); - if ((p->state_flags & StateFlag_no_type_assert) == 0) { + if (!build_context.no_type_assert && (p->state_flags & StateFlag_no_type_assert) == 0) { GB_ASSERT(!build_context.no_rtti); lbValue any_id = lb_emit_struct_ev(p, v, 1); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 0d1db2cbf..2dd7fbc38 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -728,29 +728,31 @@ gb_internal lbValue lb_emit_union_cast(lbProcedure *p, lbValue value, Type *type lb_start_block(p, end_block); if (!is_tuple) { - GB_ASSERT((p->state_flags & StateFlag_no_type_assert) == 0); - // NOTE(bill): Panic on invalid conversion - Type *dst_type = tuple->Tuple.variables[0]->type; - - isize arg_count = 7; - if (build_context.no_rtti) { - arg_count = 4; - } + if (!build_context.no_type_assert) { + GB_ASSERT((p->state_flags & StateFlag_no_type_assert) == 0); + // NOTE(bill): Panic on invalid conversion + Type *dst_type = tuple->Tuple.variables[0]->type; + + isize arg_count = 7; + if (build_context.no_rtti) { + arg_count = 4; + } - lbValue ok = lb_emit_load(p, lb_emit_struct_ep(p, v.addr, 1)); - auto args = array_make(permanent_allocator(), arg_count); - args[0] = ok; + lbValue ok = lb_emit_load(p, lb_emit_struct_ep(p, v.addr, 1)); + auto args = array_make(permanent_allocator(), arg_count); + args[0] = ok; - args[1] = lb_const_string(m, get_file_path_string(pos.file_id)); - args[2] = lb_const_int(m, t_i32, pos.line); - args[3] = lb_const_int(m, t_i32, pos.column); + args[1] = lb_const_string(m, get_file_path_string(pos.file_id)); + args[2] = lb_const_int(m, t_i32, pos.line); + args[3] = lb_const_int(m, t_i32, pos.column); - if (!build_context.no_rtti) { - args[4] = lb_typeid(m, src_type); - args[5] = lb_typeid(m, dst_type); - args[6] = lb_emit_conv(p, value_, t_rawptr); + if (!build_context.no_rtti) { + args[4] = lb_typeid(m, src_type); + args[5] = lb_typeid(m, dst_type); + args[6] = lb_emit_conv(p, value_, t_rawptr); + } + lb_emit_runtime_call(p, "type_assertion_check2", args); } - lb_emit_runtime_call(p, "type_assertion_check2", args); return lb_emit_load(p, lb_emit_struct_ep(p, v.addr, 0)); } @@ -806,25 +808,27 @@ gb_internal lbAddr lb_emit_any_cast_addr(lbProcedure *p, lbValue value, Type *ty if (!is_tuple) { // NOTE(bill): Panic on invalid conversion - lbValue ok = lb_emit_load(p, lb_emit_struct_ep(p, v.addr, 1)); + if (!build_context.no_type_assert) { + lbValue ok = lb_emit_load(p, lb_emit_struct_ep(p, v.addr, 1)); - isize arg_count = 7; - if (build_context.no_rtti) { - arg_count = 4; - } - auto args = array_make(permanent_allocator(), arg_count); - args[0] = ok; + isize arg_count = 7; + if (build_context.no_rtti) { + arg_count = 4; + } + auto args = array_make(permanent_allocator(), arg_count); + args[0] = ok; - args[1] = lb_const_string(m, get_file_path_string(pos.file_id)); - args[2] = lb_const_int(m, t_i32, pos.line); - args[3] = lb_const_int(m, t_i32, pos.column); + args[1] = lb_const_string(m, get_file_path_string(pos.file_id)); + args[2] = lb_const_int(m, t_i32, pos.line); + args[3] = lb_const_int(m, t_i32, pos.column); - if (!build_context.no_rtti) { - args[4] = any_typeid; - args[5] = dst_typeid; - args[6] = lb_emit_struct_ev(p, value, 0); + if (!build_context.no_rtti) { + args[4] = any_typeid; + args[5] = dst_typeid; + args[6] = lb_emit_struct_ev(p, value, 0); + } + lb_emit_runtime_call(p, "type_assertion_check2", args); } - lb_emit_runtime_call(p, "type_assertion_check2", args); return lb_addr(lb_emit_struct_ep(p, v.addr, 0)); } diff --git a/src/main.cpp b/src/main.cpp index 063b6c8b3..53103ce3a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -243,6 +243,7 @@ enum BuildFlagKind { BuildFlag_Debug, BuildFlag_DisableAssert, BuildFlag_NoBoundsCheck, + BuildFlag_NoTypeAssert, BuildFlag_NoDynamicLiterals, BuildFlag_NoCRT, BuildFlag_NoEntryPoint, @@ -436,6 +437,7 @@ gb_internal bool parse_build_flags(Array args) { add_flag(&build_flags, BuildFlag_Debug, str_lit("debug"), BuildFlagParam_None, Command__does_check); add_flag(&build_flags, BuildFlag_DisableAssert, str_lit("disable-assert"), BuildFlagParam_None, Command__does_check); add_flag(&build_flags, BuildFlag_NoBoundsCheck, str_lit("no-bounds-check"), BuildFlagParam_None, Command__does_check); + add_flag(&build_flags, BuildFlag_NoTypeAssert, str_lit("no-type-assert"), BuildFlagParam_None, Command__does_check); add_flag(&build_flags, BuildFlag_NoThreadLocal, str_lit("no-thread-local"), BuildFlagParam_None, Command__does_check); add_flag(&build_flags, BuildFlag_NoDynamicLiterals, str_lit("no-dynamic-literals"), BuildFlagParam_None, Command__does_check); add_flag(&build_flags, BuildFlag_NoCRT, str_lit("no-crt"), BuildFlagParam_None, Command__does_build); @@ -1013,6 +1015,9 @@ gb_internal bool parse_build_flags(Array args) { case BuildFlag_NoBoundsCheck: build_context.no_bounds_check = true; break; + case BuildFlag_NoTypeAssert: + build_context.no_type_assert = true; + break; case BuildFlag_NoDynamicLiterals: build_context.no_dynamic_literals = true; break; @@ -1850,6 +1855,10 @@ gb_internal void print_show_help(String const arg0, String const &command) { print_usage_line(2, "Disables bounds checking program wide."); print_usage_line(0, ""); + print_usage_line(1, "-no-type-assert"); + print_usage_line(2, "Disables type assertion checking program wide."); + print_usage_line(0, ""); + print_usage_line(1, "-no-crt"); print_usage_line(2, "Disables automatic linking with the C Run Time."); print_usage_line(0, ""); -- cgit v1.2.3 From c330e5b5c1b512e1b0ca7181941057e5f2e085e4 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 24 Apr 2024 14:46:34 +0100 Subject: Improve codegen for `bit_field` compound literals with an integer backing --- src/llvm_backend.hpp | 1 - src/llvm_backend_expr.cpp | 111 +++++++++++++++++++++++++++++++++++++------ src/llvm_backend_general.cpp | 3 +- src/llvm_backend_utility.cpp | 9 ++-- 4 files changed, 101 insertions(+), 23 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index c4bf2691d..7dc5f6b63 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -122,7 +122,6 @@ struct lbAddr { } swizzle_large; struct { Type *type; - i64 index; i64 bit_offset; i64 bit_size; } bitfield; diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index edd5daeca..4209ba1ea 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -4296,7 +4296,19 @@ gb_internal lbAddr lb_build_addr_compound_lit(lbProcedure *p, Ast *expr) { switch (bt->kind) { default: GB_PANIC("Unknown CompoundLit type: %s", type_to_string(type)); break; - case Type_BitField: + case Type_BitField: { + TEMPORARY_ALLOCATOR_GUARD(); + + // Type *backing_type = core_type(bt->BitField.backing_type); + + struct FieldData { + Type *field_type; + u64 bit_offset; + u64 bit_size; + }; + auto values = array_make(temporary_allocator(), 0, cl->elems.count); + auto fields = array_make(temporary_allocator(), 0, cl->elems.count); + for (Ast *elem : cl->elems) { ast_node(fv, FieldValue, elem); String name = fv->field->Ident.token.string; @@ -4307,26 +4319,97 @@ gb_internal lbAddr lb_build_addr_compound_lit(lbProcedure *p, Ast *expr) { GB_ASSERT(sel.entity != nullptr); i64 index = sel.index[0]; - i64 bit_offset = 0; - i64 bit_size = -1; - for_array(i, bt->BitField.fields) { - Entity *f = bt->BitField.fields[i]; - if (f == sel.entity) { - bit_offset = bt->BitField.bit_offsets[i]; - bit_size = bt->BitField.bit_sizes[i]; - break; - } - } + Entity *f = bt->BitField.fields[index]; + GB_ASSERT(f == sel.entity); + i64 bit_offset = bt->BitField.bit_offsets[index]; + i64 bit_size = bt->BitField.bit_sizes[index]; GB_ASSERT(bit_size > 0); Type *field_type = sel.entity->type; lbValue field_expr = lb_build_expr(p, fv->value); field_expr = lb_emit_conv(p, field_expr, field_type); + array_add(&values, field_expr); + array_add(&fields, FieldData{field_type, cast(u64)bit_offset, cast(u64)bit_size}); + } + + // NOTE(bill): inline insertion sort should be good enough, right? + for (isize i = 1; i < values.count; i++) { + for (isize j = i; + j > 0 && fields[i].bit_offset < fields[j].bit_offset; + j--) { + auto vtmp = values[j]; + values[j] = values[j-1]; + values[j-1] = vtmp; + + auto ftmp = fields[j]; + fields[j] = fields[j-1]; + fields[j-1] = ftmp; + } + } + + if (fields.count == bt->BitField.fields.count) { + Type *backing_type = core_type(bt->BitField.backing_type); + GB_ASSERT(is_type_integer(backing_type) || + (is_type_array(backing_type) && is_type_integer(backing_type->Array.elem))); + + // NOTE(bill): all fields are present + // this means no masking is necessary since on write, the bits will be overridden + + lbValue dst_byte_ptr = lb_emit_conv(p, v.addr, t_u8_ptr); + u64 total_bit_size = cast(u64)(8*type_size_of(bt)); + + if (is_type_integer(backing_type)) { + LLVMTypeRef lbt = lb_type(p->module, backing_type); + + LLVMValueRef res = LLVMConstInt(lbt, 0, false); + + for (isize i = 0; i < fields.count; i++) { + auto const &f = fields[i]; + + LLVMValueRef mask = LLVMConstInt(lbt, 1, false); + mask = LLVMConstShl(mask, LLVMConstInt(lbt, f.bit_size, false)); + mask = LLVMConstSub(mask, LLVMConstInt(lbt, 1, false)); - lbAddr field_addr = lb_addr_bit_field(v.addr, field_type, index, bit_offset, bit_size); - lb_addr_store(p, field_addr, field_expr); + LLVMValueRef elem = values[i].value; + elem = LLVMBuildZExt(p->builder, elem, lbt, ""); + elem = LLVMBuildAnd(p->builder, elem, mask, ""); + + elem = LLVMBuildShl(p->builder, elem, LLVMConstInt(lbt, f.bit_offset, false), ""); + + res = LLVMBuildOr(p->builder, res, elem, ""); + } + LLVMBuildStore(p->builder, res, v.addr.value); + } else { + for_array(i, fields) { + auto const &f = fields[i]; + + if ((f.bit_offset & 7) == 0) { + u64 unpacked_bit_size = cast(u64)(8*type_size_of(f.field_type)); + u64 byte_size = (f.bit_size+7)/8; + + if (f.bit_offset + unpacked_bit_size <= total_bit_size) { + byte_size = unpacked_bit_size/8; + } + lbValue dst = lb_emit_ptr_offset(p, dst_byte_ptr, lb_const_int(p->module, t_int, f.bit_offset/8)); + lbValue src = lb_address_from_load_or_generate_local(p, values[i]); + lb_mem_copy_non_overlapping(p, dst, src, lb_const_int(p->module, t_uintptr, byte_size)); + } else { + lbAddr dst = lb_addr_bit_field(v.addr, f.field_type, f.bit_offset, f.bit_size); + lb_addr_store(p, dst, values[i]); + } + } + } + } else { + // individual storing + for_array(i, values) { + auto const &f = fields[i]; + lbAddr dst = lb_addr_bit_field(v.addr, f.field_type, f.bit_offset, f.bit_size); + lb_addr_store(p, dst, values[i]); + } } + return v; + } case Type_Struct: { // TODO(bill): "constant" '#raw_union's are not initialized constantly at the moment. @@ -4771,7 +4854,7 @@ gb_internal lbAddr lb_build_addr_internal(lbProcedure *p, Ast *expr) { u8 bit_size = bf_type->BitField.bit_sizes[index]; i64 bit_offset = bf_type->BitField.bit_offsets[index]; - return lb_addr_bit_field(ptr, f->type, index, bit_offset, bit_size); + return lb_addr_bit_field(ptr, f->type, bit_offset, bit_size); } { diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index b8fbd231e..bf23417c6 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -450,14 +450,13 @@ gb_internal lbAddr lb_addr_swizzle_large(lbValue addr, Type *array_type, Slicemodule, type); LLVMTypeKind kind = LLVMGetTypeKind(llvm_type); - + i64 sz = type_size_of(type); switch (kind) { case LLVMStructTypeKind: case LLVMArrayTypeKind: - { - // NOTE(bill): Enforce zeroing through memset to make sure padding is zeroed too - i32 sz = cast(i32)type_size_of(type); - lb_mem_zero_ptr_internal(p, ptr, lb_const_int(p->module, t_int, sz).value, alignment, false); - } + // NOTE(bill): Enforce zeroing through memset to make sure padding is zeroed too + lb_mem_zero_ptr_internal(p, ptr, lb_const_int(p->module, t_int, sz).value, alignment, false); break; default: LLVMBuildStore(p->builder, LLVMConstNull(lb_type(p->module, type)), ptr); -- cgit v1.2.3 From 25f1d0906d2b5a8276c3832783970a798c12cc6c Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 1 May 2024 22:12:37 +0200 Subject: compiler: improve target features support --- .gitignore | 4 +- base/intrinsics/intrinsics.odin | 10 +- core/simd/x86/sse3.odin | 4 +- core/simd/x86/sse41.odin | 4 +- core/sync/futex_wasm.odin | 44 ++- misc/featuregen/README.md | 28 ++ misc/featuregen/featuregen.cpp | 37 +++ misc/featuregen/featuregen.py | 116 ++++++++ src/build_settings.cpp | 617 ++++++++++++++++++++++++++++++++++------ src/check_builtin.cpp | 46 ++- src/check_decl.cpp | 49 +++- src/check_expr.cpp | 68 ++++- src/checker_builtin_procs.hpp | 4 + src/entity.cpp | 4 +- src/llvm_backend.cpp | 104 +++---- src/llvm_backend_proc.cpp | 27 +- src/llvm_backend_utility.cpp | 3 +- src/main.cpp | 79 ++++- src/types.cpp | 22 ++ 19 files changed, 1066 insertions(+), 204 deletions(-) create mode 100644 misc/featuregen/README.md create mode 100644 misc/featuregen/featuregen.cpp create mode 100644 misc/featuregen/featuregen.py (limited to 'src/llvm_backend_utility.cpp') diff --git a/.gitignore b/.gitignore index f6c3927a2..2b6b5281a 100644 --- a/.gitignore +++ b/.gitignore @@ -322,4 +322,6 @@ build.sh !core/debug/ # RAD debugger project file -*.raddbg \ No newline at end of file +*.raddbg + +misc/featuregen/featuregen diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index dca33bfd9..d887f8dcc 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -282,6 +282,12 @@ simd_reverse :: proc(a: #simd[N]T) -> #simd[N]T --- simd_rotate_left :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T --- simd_rotate_right :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T --- +// Checks if the current target supports the given target features. +// +// Takes a constant comma-seperated string (eg: "sha512,sse4.1"), or a procedure type which has either +// `@(require_target_feature)` or `@(enable_target_feature)` as its input and returns a boolean indicating +// if all listed features are supported. +has_target_feature :: proc($test: $T) -> bool where type_is_string(T) || type_is_proc(T) --- // WASM targets only wasm_memory_grow :: proc(index, delta: uintptr) -> int --- @@ -293,9 +299,9 @@ wasm_memory_size :: proc(index: uintptr) -> int --- // 0 - indicates that the thread blocked and then was woken up // 1 - the loaded value from `ptr` did not match `expected`, the thread did not block // 2 - the thread blocked, but the timeout -@(enable_target_feature="atomics") +@(require_target_feature="atomics") wasm_memory_atomic_wait32 :: proc(ptr: ^u32, expected: u32, timeout_ns: i64) -> u32 --- -@(enable_target_feature="atomics") +@(require_target_feature="atomics") wasm_memory_atomic_notify32 :: proc(ptr: ^u32, waiters: u32) -> (waiters_woken_up: u32) --- // x86 Targets (i386, amd64) diff --git a/core/simd/x86/sse3.odin b/core/simd/x86/sse3.odin index cf5f3b2fa..ca19c3954 100644 --- a/core/simd/x86/sse3.odin +++ b/core/simd/x86/sse3.odin @@ -36,7 +36,7 @@ _mm_lddqu_si128 :: #force_inline proc "c" (mem_addr: ^__m128i) -> __m128i { _mm_movedup_pd :: #force_inline proc "c" (a: __m128d) -> __m128d { return simd.shuffle(a, a, 0, 0) } -@(require_results, enable_target_feature="sse3") +@(require_results, enable_target_feature="sse2,sse3") _mm_loaddup_pd :: #force_inline proc "c" (mem_addr: [^]f64) -> __m128d { return _mm_load1_pd(mem_addr) } @@ -65,4 +65,4 @@ foreign _ { hsubps :: proc(a, b: __m128) -> __m128 --- @(link_name = "llvm.x86.sse3.ldu.dq") lddqu :: proc(mem_addr: rawptr) -> i8x16 --- -} \ No newline at end of file +} diff --git a/core/simd/x86/sse41.odin b/core/simd/x86/sse41.odin index 8c306ba4c..0b9c5986f 100644 --- a/core/simd/x86/sse41.odin +++ b/core/simd/x86/sse41.odin @@ -268,7 +268,7 @@ _mm_testnzc_si128 :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 { _mm_test_all_zeros :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 { return _mm_testz_si128(a, mask) } -@(require_results, enable_target_feature="sse4.1") +@(require_results, enable_target_feature="sse2,sse4.1") _mm_test_all_ones :: #force_inline proc "c" (a: __m128i) -> i32 { return _mm_testc_si128(a, _mm_cmpeq_epi32(a, a)) } @@ -349,4 +349,4 @@ foreign _ { ptestc :: proc(a, mask: i64x2) -> i32 --- @(link_name = "llvm.x86.sse41.ptestnzc") ptestnzc :: proc(a, mask: i64x2) -> i32 --- -} \ No newline at end of file +} diff --git a/core/sync/futex_wasm.odin b/core/sync/futex_wasm.odin index de1013364..de88e8198 100644 --- a/core/sync/futex_wasm.odin +++ b/core/sync/futex_wasm.odin @@ -5,31 +5,49 @@ package sync import "base:intrinsics" import "core:time" +// NOTE: because `core:sync` is in the dependency chain of a lot of the core packages (mostly through `core:mem`) +// without actually calling into it much, I opted for a runtime panic instead of a compile error here. + _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool { - s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, -1) - return s != 0 + when !intrinsics.has_target_feature("atomics") { + _panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it") + } else { + s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, -1) + return s != 0 + } } _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool { - s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, i64(duration)) - return s != 0 - + when !intrinsics.has_target_feature("atomics") { + _panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it") + } else { + s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, i64(duration)) + return s != 0 + } } _futex_signal :: proc "contextless" (f: ^Futex) { - loop: for { - s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), 1) - if s >= 1 { - return + when !intrinsics.has_target_feature("atomics") { + _panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it") + } else { + loop: for { + s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), 1) + if s >= 1 { + return + } } } } _futex_broadcast :: proc "contextless" (f: ^Futex) { - loop: for { - s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), ~u32(0)) - if s >= 0 { - return + when !intrinsics.has_target_feature("atomics") { + _panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it") + } else { + loop: for { + s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), ~u32(0)) + if s >= 0 { + return + } } } } diff --git a/misc/featuregen/README.md b/misc/featuregen/README.md new file mode 100644 index 000000000..22a798cca --- /dev/null +++ b/misc/featuregen/README.md @@ -0,0 +1,28 @@ +# Featuregen + +This directory contains a python and CPP script that generates the needed information +for features regarding microarchitecture and target features of the compiler. + +It is not pretty! But LLVM has no way to query this information with their C API. + +It generates these globals (intended for `src/build_settings.cpp`: + +- `target_microarch_list`: an array of strings indexed by the architecture, each string is a comma-seperated list of microarchitectures available on that architecture +- `target_features_list`: an array of strings indexed by the architecture, each string is a comma-seperated list of target features available on that architecture +- `target_microarch_counts`: an array of ints indexed by the architecture, each int represents the amount of microarchitectures available on that target, intended for easier iteration of the next global +- `microarch_features_list`: an array of a tuple like struct where the first string is a microarchitecture and the second is a comma-seperated list of all features that are enabled by default for it + +In order to get the default features for a microarchitecture there is a small CPP program that takes +a target triple and microarchitecture and spits out the default features, this is then parsed by the python script. + +This should be ran each time we update LLVM to stay in sync. + +If there are minor differences (like the Odin user using LLVM 14 and this table being generated on LLVM 17) it +does not impact much at all, the only thing it will do is make LLVM print a message that the feature is ignored (if it was added between 14 and 17 in this case). + +## Usage + +1. Make sure the table of architectures at the top of the python script is up-to-date (the triple can be any valid triple for the architecture) +1. `./build.sh` +1. `python3 featuregen.py` +1. Copy the output into `src/build_settings.cpp` diff --git a/misc/featuregen/featuregen.cpp b/misc/featuregen/featuregen.cpp new file mode 100644 index 000000000..a1d00ab31 --- /dev/null +++ b/misc/featuregen/featuregen.cpp @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include +#include + +// Dumps the default set of supported features for the given microarch. +int main(int argc, char **argv) { + if (argc < 3) { + llvm::errs() << "Error: first arg should be triple, second should be microarch\n"; + return 1; + } + + llvm::InitializeAllTargets(); + llvm::InitializeAllTargetMCs(); + + std::string error; + const llvm::Target* target = llvm::TargetRegistry::lookupTarget(argv[1], error); + + if (!target) { + llvm::errs() << "Error: " << error << "\n"; + return 1; + } + + auto STI = target->createMCSubtargetInfo(argv[1], argv[2], ""); + + std::string plus = "+"; + llvm::ArrayRef features = STI->getAllProcessorFeatures(); + for (const auto& feature : features) { + if (STI->checkFeatures(plus + feature.Key)) { + llvm::outs() << feature.Key << "\n"; + } + } + + return 0; +} diff --git a/misc/featuregen/featuregen.py b/misc/featuregen/featuregen.py new file mode 100644 index 000000000..da4cc68f5 --- /dev/null +++ b/misc/featuregen/featuregen.py @@ -0,0 +1,116 @@ +import subprocess +import tempfile +import os +import sys + +archs = [ + ("amd64", "linux_amd64", "x86_64-pc-linux-gnu", [], []), + ("i386", "linux_i386", "i386-pc-linux-gnu", [], []), + ("arm32", "linux_arm32", "arm-linux-gnu", [], []), + ("arm64", "linux_arm64", "aarch64-linux-elf", [], []), + ("wasm32", "js_wasm32", "wasm32-js-js", [], []), + ("wasm64p32", "js_wasm64p32","wasm32-js-js", [], []), +]; + +SEEKING_CPUS = 0 +PARSING_CPUS = 1 +PARSING_FEATURES = 2 + +with tempfile.NamedTemporaryFile(suffix=".odin", delete=True) as temp_file: + temp_file.write(b"package main\n") + + for arch, target, triple, cpus, features in archs: + cmd = ["odin", "build", temp_file.name, "-file", "-build-mode:llvm", "-out:temp", "-target-features:\"help\"", f"-target:\"{target}\""] + process = subprocess.Popen(cmd, stderr=subprocess.PIPE, text=True) + + state = SEEKING_CPUS + for line in process.stderr: + + if state == SEEKING_CPUS: + if line == "Available CPUs for this target:\n": + state = PARSING_CPUS + + elif state == PARSING_CPUS: + if line == "Available features for this target:\n": + state = PARSING_FEATURES + continue + + parts = line.split(" -", maxsplit=1) + if len(parts) < 2: + continue + + cpu = parts[0].strip() + cpus.append(cpu) + + elif state == PARSING_FEATURES: + if line == "\n" and len(features) > 0: + break + + parts = line.split(" -", maxsplit=1) + if len(parts) < 2: + continue + + feature = parts[0].strip() + features.append(feature) + + process.wait() + if process.returncode != 0: + print(f"odin build returned with non-zero exit code {process.returncode}") + sys.exit(1) + + os.remove("temp.ll") + +def print_default_features(triple, microarch): + cmd = ["./featuregen", triple, microarch] + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True) + first = True + for line in process.stdout: + print("" if first else ",", line.strip(), sep="", end="") + first = False + process.wait() + if process.returncode != 0: + print(f"featuregen returned with non-zero exit code {process.returncode}") + sys.exit(1) + +print("// Generated with the featuregen script in `misc/featuregen`") +print("gb_global String target_microarch_list[TargetArch_COUNT] = {") +print("\t// TargetArch_Invalid:") +print('\tstr_lit(""),') +for arch, target, triple, cpus, features in archs: + print(f"\t// TargetArch_{arch}:") + print(f'\tstr_lit("{','.join(cpus)}"),') +print("};") + +print("") + +print("// Generated with the featuregen script in `misc/featuregen`") +print("gb_global String target_features_list[TargetArch_COUNT] = {") +print("\t// TargetArch_Invalid:") +print('\tstr_lit(""),') +for arch, target, triple, cpus, features in archs: + print(f"\t// TargetArch_{arch}:") + print(f'\tstr_lit("{','.join(features)}"),') +print("};") + +print("") + +print("// Generated with the featuregen script in `misc/featuregen`") +print("gb_global int target_microarch_counts[TargetArch_COUNT] = {") +print("\t// TargetArch_Invalid:") +print("\t0,") +for arch, target, triple, cpus, feature in archs: + print(f"\t// TargetArch_{arch}:") + print(f"\t{len(cpus)},") +print("};") + +print("") + +print("// Generated with the featuregen script in `misc/featuregen`") +print("gb_global MicroarchFeatureList microarch_features_list[] = {") +for arch, target, triple, cpus, features in archs: + print(f"\t// TargetArch_{arch}:") + for cpu in cpus: + print(f'\t{{ str_lit("{cpu}"), str_lit("', end="") + print_default_features(triple, cpu) + print('") },') +print("};") diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 8509394ff..8ad03b1b9 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -71,6 +71,11 @@ enum Windows_Subsystem : u8 { Windows_Subsystem_COUNT, }; +struct MicroarchFeatureList { + String microarch; + String features; +}; + gb_global String target_os_names[TargetOs_COUNT] = { str_lit(""), str_lit("windows"), @@ -97,21 +102,467 @@ gb_global String target_arch_names[TargetArch_COUNT] = { str_lit("wasm64p32"), }; +// Generated with the featuregen script in `misc/featuregen` gb_global String target_microarch_list[TargetArch_COUNT] = { - // TargetArch_Invalid, - str_lit("Invalid!"), - // TargetArch_amd64, - str_lit("alderlake,amdfam10,athlon-fx,athlon64,athlon64-sse3,atom_sse4_2,atom_sse4_2_movbe,barcelona,bdver1,bdver2,bdver3,bdver4,broadwell,btver1,btver2,cannonlake,cascadelake,cooperlake,core-avx-i,core-avx2,core2,core_2_duo_sse4_1,core_2_duo_ssse3,core_2nd_gen_avx,core_3rd_gen_avx,core_4th_gen_avx,core_4th_gen_avx_tsx,core_5th_gen_avx,core_5th_gen_avx_tsx,core_aes_pclmulqdq,core_i7_sse4_2,corei7,corei7-avx,generic,goldmont,goldmont-plus,goldmont_plus,grandridge,graniterapids,graniterapids-d,graniterapids_d,haswell,icelake-client,icelake-server,icelake_client,icelake_server,ivybridge,k8,k8-sse3,knl,knm,meteorlake,mic_avx512,native,nehalem,nocona,opteron,opteron-sse3,penryn,raptorlake,rocketlake,sandybridge,sapphirerapids,sierraforest,silvermont,skx,skylake,skylake-avx512,skylake_avx512,slm,tigerlake,tremont,westmere,x86-64,x86-64-v2,x86-64-v3,x86-64-v4,znver1,znver2,znver3,znver4"), - // TargetArch_i386, - str_lit("athlon,athlon-4,athlon-mp,athlon-tbird,athlon-xp,atom,bonnell,c3,c3-2,generic,geode,i386,i486,i586,i686,k6,k6-2,k6-3,lakemont,native,pentium,pentium-m,pentium-mmx,pentium2,pentium3,pentium3m,pentium4,pentium4m,pentium_4,pentium_4_sse3,pentium_ii,pentium_iii,pentium_iii_no_xmm_regs,pentium_m,pentium_mmx,pentium_pro,pentiumpro,prescott,winchip-c6,winchip2,yonah"), - // TargetArch_arm32, - str_lit("arm1020e,arm1020t,arm1022e,arm10e,arm10tdmi,arm1136j-s,arm1136jf-s,arm1156t2-s,arm1156t2f-s,arm1176jz-s,arm1176jzf-s,arm710t,arm720t,arm7tdmi,arm7tdmi-s,arm8,arm810,arm9,arm920,arm920t,arm922t,arm926ej-s,arm940t,arm946e-s,arm966e-s,arm968e-s,arm9e,arm9tdmi,cortex-a12,cortex-a15,cortex-a17,cortex-a32,cortex-a35,cortex-a5,cortex-a53,cortex-a55,cortex-a57,cortex-a7,cortex-a710,cortex-a72,cortex-a73,cortex-a75,cortex-a76,cortex-a76ae,cortex-a77,cortex-a78,cortex-a78c,cortex-a8,cortex-a9,cortex-m0,cortex-m0plus,cortex-m1,cortex-m23,cortex-m3,cortex-m33,cortex-m35p,cortex-m4,cortex-m55,cortex-m7,cortex-m85,cortex-r4,cortex-r4f,cortex-r5,cortex-r52,cortex-r7,cortex-r8,cortex-x1,cortex-x1c,cyclone,ep9312,exynos-m3,exynos-m4,exynos-m5,generic,iwmmxt,krait,kryo,mpcore,mpcorenovfp,native,neoverse-n1,neoverse-n2,neoverse-v1,sc000,sc300,strongarm,strongarm110,strongarm1100,strongarm1110,swift,xscale"), - // TargetArch_arm64, - str_lit("a64fx,ampere1,ampere1a,apple-a10,apple-a11,apple-a12,apple-a13,apple-a14,apple-a15,apple-a16,apple-a7,apple-a8,apple-a9,apple-latest,apple-m1,apple-m2,apple-s4,apple-s5,carmel,cortex-a34,cortex-a35,cortex-a510,cortex-a53,cortex-a55,cortex-a57,cortex-a65,cortex-a65ae,cortex-a710,cortex-a715,cortex-a72,cortex-a73,cortex-a75,cortex-a76,cortex-a76ae,cortex-a77,cortex-a78,cortex-a78c,cortex-r82,cortex-x1,cortex-x1c,cortex-x2,cortex-x3,cyclone,exynos-m3,exynos-m4,exynos-m5,falkor,generic,kryo,native,neoverse-512tvb,neoverse-e1,neoverse-n1,neoverse-n2,neoverse-v1,neoverse-v2,saphira,thunderx,thunderx2t99,thunderx3t110,thunderxt81,thunderxt83,thunderxt88,tsv110"), - // TargetArch_wasm32, - str_lit("generic"), - // TargetArch_wasm64p32, - str_lit("generic"), + // TargetArch_Invalid: + str_lit(""), + // TargetArch_amd64: + str_lit("alderlake,amdfam10,athlon,athlon-4,athlon-fx,athlon-mp,athlon-tbird,athlon-xp,athlon64,athlon64-sse3,atom,atom_sse4_2,atom_sse4_2_movbe,barcelona,bdver1,bdver2,bdver3,bdver4,bonnell,broadwell,btver1,btver2,c3,c3-2,cannonlake,cascadelake,cooperlake,core-avx-i,core-avx2,core2,core_2_duo_sse4_1,core_2_duo_ssse3,core_2nd_gen_avx,core_3rd_gen_avx,core_4th_gen_avx,core_4th_gen_avx_tsx,core_5th_gen_avx,core_5th_gen_avx_tsx,core_aes_pclmulqdq,core_i7_sse4_2,corei7,corei7-avx,emeraldrapids,generic,geode,goldmont,goldmont-plus,goldmont_plus,grandridge,graniterapids,graniterapids-d,graniterapids_d,haswell,i386,i486,i586,i686,icelake-client,icelake-server,icelake_client,icelake_server,ivybridge,k6,k6-2,k6-3,k8,k8-sse3,knl,knm,lakemont,meteorlake,mic_avx512,nehalem,nocona,opteron,opteron-sse3,penryn,pentium,pentium-m,pentium-mmx,pentium2,pentium3,pentium3m,pentium4,pentium4m,pentium_4,pentium_4_sse3,pentium_ii,pentium_iii,pentium_iii_no_xmm_regs,pentium_m,pentium_mmx,pentium_pro,pentiumpro,prescott,raptorlake,rocketlake,sandybridge,sapphirerapids,sierraforest,silvermont,skx,skylake,skylake-avx512,skylake_avx512,slm,tigerlake,tremont,westmere,winchip-c6,winchip2,x86-64,x86-64-v2,x86-64-v3,x86-64-v4,yonah,znver1,znver2,znver3,znver4"), + // TargetArch_i386: + str_lit("alderlake,amdfam10,athlon,athlon-4,athlon-fx,athlon-mp,athlon-tbird,athlon-xp,athlon64,athlon64-sse3,atom,atom_sse4_2,atom_sse4_2_movbe,barcelona,bdver1,bdver2,bdver3,bdver4,bonnell,broadwell,btver1,btver2,c3,c3-2,cannonlake,cascadelake,cooperlake,core-avx-i,core-avx2,core2,core_2_duo_sse4_1,core_2_duo_ssse3,core_2nd_gen_avx,core_3rd_gen_avx,core_4th_gen_avx,core_4th_gen_avx_tsx,core_5th_gen_avx,core_5th_gen_avx_tsx,core_aes_pclmulqdq,core_i7_sse4_2,corei7,corei7-avx,emeraldrapids,generic,geode,goldmont,goldmont-plus,goldmont_plus,grandridge,graniterapids,graniterapids-d,graniterapids_d,haswell,i386,i486,i586,i686,icelake-client,icelake-server,icelake_client,icelake_server,ivybridge,k6,k6-2,k6-3,k8,k8-sse3,knl,knm,lakemont,meteorlake,mic_avx512,nehalem,nocona,opteron,opteron-sse3,penryn,pentium,pentium-m,pentium-mmx,pentium2,pentium3,pentium3m,pentium4,pentium4m,pentium_4,pentium_4_sse3,pentium_ii,pentium_iii,pentium_iii_no_xmm_regs,pentium_m,pentium_mmx,pentium_pro,pentiumpro,prescott,raptorlake,rocketlake,sandybridge,sapphirerapids,sierraforest,silvermont,skx,skylake,skylake-avx512,skylake_avx512,slm,tigerlake,tremont,westmere,winchip-c6,winchip2,x86-64,x86-64-v2,x86-64-v3,x86-64-v4,yonah,znver1,znver2,znver3,znver4"), + // TargetArch_arm32: + str_lit("arm1020e,arm1020t,arm1022e,arm10e,arm10tdmi,arm1136j-s,arm1136jf-s,arm1156t2-s,arm1156t2f-s,arm1176jz-s,arm1176jzf-s,arm710t,arm720t,arm7tdmi,arm7tdmi-s,arm8,arm810,arm9,arm920,arm920t,arm922t,arm926ej-s,arm940t,arm946e-s,arm966e-s,arm968e-s,arm9e,arm9tdmi,cortex-a12,cortex-a15,cortex-a17,cortex-a32,cortex-a35,cortex-a5,cortex-a53,cortex-a55,cortex-a57,cortex-a7,cortex-a710,cortex-a72,cortex-a73,cortex-a75,cortex-a76,cortex-a76ae,cortex-a77,cortex-a78,cortex-a78c,cortex-a8,cortex-a9,cortex-m0,cortex-m0plus,cortex-m1,cortex-m23,cortex-m3,cortex-m33,cortex-m35p,cortex-m4,cortex-m55,cortex-m7,cortex-m85,cortex-r4,cortex-r4f,cortex-r5,cortex-r52,cortex-r7,cortex-r8,cortex-x1,cortex-x1c,cyclone,ep9312,exynos-m3,exynos-m4,exynos-m5,generic,iwmmxt,krait,kryo,mpcore,mpcorenovfp,neoverse-n1,neoverse-n2,neoverse-v1,sc000,sc300,strongarm,strongarm110,strongarm1100,strongarm1110,swift,xscale"), + // TargetArch_arm64: + str_lit("a64fx,ampere1,ampere1a,apple-a10,apple-a11,apple-a12,apple-a13,apple-a14,apple-a15,apple-a16,apple-a7,apple-a8,apple-a9,apple-latest,apple-m1,apple-m2,apple-s4,apple-s5,carmel,cortex-a34,cortex-a35,cortex-a510,cortex-a53,cortex-a55,cortex-a57,cortex-a65,cortex-a65ae,cortex-a710,cortex-a715,cortex-a72,cortex-a73,cortex-a75,cortex-a76,cortex-a76ae,cortex-a77,cortex-a78,cortex-a78c,cortex-r82,cortex-x1,cortex-x1c,cortex-x2,cortex-x3,cyclone,exynos-m3,exynos-m4,exynos-m5,falkor,generic,kryo,neoverse-512tvb,neoverse-e1,neoverse-n1,neoverse-n2,neoverse-v1,neoverse-v2,saphira,thunderx,thunderx2t99,thunderx3t110,thunderxt81,thunderxt83,thunderxt88,tsv110"), + // TargetArch_wasm32: + str_lit("bleeding-edge,generic,mvp"), + // TargetArch_wasm64p32: + str_lit("bleeding-edge,generic,mvp"), +}; + +// Generated with the featuregen script in `misc/featuregen` +gb_global String target_features_list[TargetArch_COUNT] = { + // TargetArch_Invalid: + str_lit(""), + // TargetArch_amd64: + str_lit("16bit-mode,32bit-mode,3dnow,3dnowa,64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512er,avx512f,avx512fp16,avx512ifma,avx512pf,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vp2intersect,avx512vpopcntdq,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,branchfusion,cldemote,clflushopt,clwb,clzero,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,ermsb,f16c,false-deps-getmant,false-deps-lzcnt-tzcnt,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-popcnt,false-deps-range,fast-11bytenop,fast-15bytenop,fast-7bytenop,fast-bextr,fast-gather,fast-hops,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fast-vector-shift-masks,faster-shift-than-shuffle,fma,fma4,fsgsbase,fsrm,fxsr,gfni,harden-sls-ijmp,harden-sls-ret,hreset,idivl-to-divb,idivq-to-divl,invpcid,kl,lea-sp,lea-uses-ag,lvi-cfi,lvi-load-hardening,lwp,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,mwaitx,no-bypass-delay,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pad-short-functions,pclmul,pconfig,pku,popcnt,prefer-128-bit,prefer-256-bit,prefer-mask-registers,prefer-movmsk-over-vtest,prefer-no-gather,prefer-no-scatter,prefetchi,prefetchwt1,prfchw,ptwrite,raoint,rdpid,rdpru,rdrnd,rdseed,retpoline,retpoline-external-thunk,retpoline-indirect-branches,retpoline-indirect-calls,rtm,sahf,sbb-dep-breaking,serialize,seses,sgx,sha,sha512,shstk,slow-3ops-lea,slow-incdec,slow-lea,slow-pmaddwd,slow-pmulld,slow-shld,slow-two-mem-ops,slow-unaligned-mem-16,slow-unaligned-mem-32,sm3,sm4,soft-float,sse,sse-unaligned-mem,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tagged-globals,tbm,tsxldtrk,tuning-fast-imm-vector-shift,uintr,use-glm-div-sqrt-costs,use-slm-arith-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,widekl,x87,xop,xsave,xsavec,xsaveopt,xsaves"), + // TargetArch_i386: + str_lit("16bit-mode,32bit-mode,3dnow,3dnowa,64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512er,avx512f,avx512fp16,avx512ifma,avx512pf,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vp2intersect,avx512vpopcntdq,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,branchfusion,cldemote,clflushopt,clwb,clzero,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,ermsb,f16c,false-deps-getmant,false-deps-lzcnt-tzcnt,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-popcnt,false-deps-range,fast-11bytenop,fast-15bytenop,fast-7bytenop,fast-bextr,fast-gather,fast-hops,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fast-vector-shift-masks,faster-shift-than-shuffle,fma,fma4,fsgsbase,fsrm,fxsr,gfni,harden-sls-ijmp,harden-sls-ret,hreset,idivl-to-divb,idivq-to-divl,invpcid,kl,lea-sp,lea-uses-ag,lvi-cfi,lvi-load-hardening,lwp,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,mwaitx,no-bypass-delay,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pad-short-functions,pclmul,pconfig,pku,popcnt,prefer-128-bit,prefer-256-bit,prefer-mask-registers,prefer-movmsk-over-vtest,prefer-no-gather,prefer-no-scatter,prefetchi,prefetchwt1,prfchw,ptwrite,raoint,rdpid,rdpru,rdrnd,rdseed,retpoline,retpoline-external-thunk,retpoline-indirect-branches,retpoline-indirect-calls,rtm,sahf,sbb-dep-breaking,serialize,seses,sgx,sha,sha512,shstk,slow-3ops-lea,slow-incdec,slow-lea,slow-pmaddwd,slow-pmulld,slow-shld,slow-two-mem-ops,slow-unaligned-mem-16,slow-unaligned-mem-32,sm3,sm4,soft-float,sse,sse-unaligned-mem,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tagged-globals,tbm,tsxldtrk,tuning-fast-imm-vector-shift,uintr,use-glm-div-sqrt-costs,use-slm-arith-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,widekl,x87,xop,xsave,xsavec,xsaveopt,xsaves"), + // TargetArch_arm32: + str_lit("32bit,8msecext,a12,a15,a17,a32,a35,a5,a53,a55,a57,a7,a72,a73,a75,a76,a77,a78c,a8,a9,aapcs-frame-chain,aapcs-frame-chain-leaf,aclass,acquire-release,aes,armv4,armv4t,armv5t,armv5te,armv5tej,armv6,armv6-m,armv6j,armv6k,armv6kz,armv6s-m,armv6t2,armv7-a,armv7-m,armv7-r,armv7e-m,armv7k,armv7s,armv7ve,armv8-a,armv8-m.base,armv8-m.main,armv8-r,armv8.1-a,armv8.1-m.main,armv8.2-a,armv8.3-a,armv8.4-a,armv8.5-a,armv8.6-a,armv8.7-a,armv8.8-a,armv8.9-a,armv9-a,armv9.1-a,armv9.2-a,armv9.3-a,armv9.4-a,atomics-32,avoid-movs-shop,avoid-partial-cpsr,bf16,big-endian-instructions,cde,cdecp0,cdecp1,cdecp2,cdecp3,cdecp4,cdecp5,cdecp6,cdecp7,cheap-predicable-cpsr,clrbhb,cortex-a710,cortex-a78,cortex-x1,cortex-x1c,crc,crypto,d32,db,dfb,disable-postra-scheduler,dont-widen-vmovs,dotprod,dsp,execute-only,expand-fp-mlx,exynos,fix-cmse-cve-2021-35465,fix-cortex-a57-aes-1742098,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp16fml,fp64,fpao,fpregs,fpregs16,fpregs64,fullfp16,fuse-aes,fuse-literals,harden-sls-blr,harden-sls-nocomdat,harden-sls-retbr,hwdiv,hwdiv-arm,i8mm,iwmmxt,iwmmxt2,krait,kryo,lob,long-calls,loop-align,m3,m7,mclass,mp,muxed-units,mve,mve.fp,mve1beat,mve2beat,mve4beat,nacl-trap,neon,neon-fpmovs,neonfp,neoverse-v1,no-branch-predictor,no-bti-at-return-twice,no-movt,no-neg-immediates,noarm,nonpipelined-vfp,pacbti,perfmon,prefer-ishst,prefer-vmovsr,prof-unpr,r4,r5,r52,r7,ras,rclass,read-tp-tpidrprw,read-tp-tpidruro,read-tp-tpidrurw,reserve-r9,ret-addr-stack,sb,sha2,slow-fp-brcc,slow-load-D-subreg,slow-odd-reg,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,soft-float,splat-vfp-neon,strict-align,swift,thumb-mode,thumb2,trustzone,use-mipipeliner,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.1m.main,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8.7a,v8.8a,v8.9a,v8m,v8m.main,v9.1a,v9.2a,v9.3a,v9.4a,v9a,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vldn-align,vmlx-forwarding,vmlx-hazards,wide-stride-vfp,xscale,zcz"), + // TargetArch_arm64: + str_lit("CONTEXTIDREL2,a35,a510,a53,a55,a57,a64fx,a65,a710,a715,a72,a73,a75,a76,a77,a78,a78c,aes,aggressive-fma,all,alternate-sextload-cvt-f32-pattern,altnzcv,am,ampere1,ampere1a,amvs,apple-a10,apple-a11,apple-a12,apple-a13,apple-a14,apple-a15,apple-a16,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,ascend-store-address,b16b16,balance-fp-ops,bf16,brbe,bti,call-saved-x10,call-saved-x11,call-saved-x12,call-saved-x13,call-saved-x14,call-saved-x15,call-saved-x18,call-saved-x8,call-saved-x9,carmel,ccdp,ccidx,ccpp,chk,clrbhb,cmp-bcc-fusion,complxnum,cortex-r82,cortex-x1,cortex-x2,cortex-x3,crc,crypto,cssc,custom-cheap-as-move,d128,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,enable-select-opt,ete,exynos-cheap-as-move,exynosm3,exynosm4,f32mm,f64mm,falkor,fgt,fix-cortex-a53-835769,flagm,fmv,force-32bit-jump-tables,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-addsub-2reg-const1,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,gcs,harden-sls-blr,harden-sls-nocomdat,harden-sls-retbr,hbc,hcx,i8mm,ite,jsconv,kryo,lor,ls64,lse,lse128,lse2,lsl-fast,mec,mops,mpam,mte,neon,neoverse512tvb,neoversee1,neoversen1,neoversen2,neoversev1,neoversev2,nmi,no-bti-at-return-twice,no-neg-immediates,no-sve-fp-ld1r,no-zcz-fp,nv,outline-atomics,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,prfm-slc-target,rand,ras,rasv2,rcpc,rcpc-immo,rcpc3,rdm,reserve-x1,reserve-x10,reserve-x11,reserve-x12,reserve-x13,reserve-x14,reserve-x15,reserve-x18,reserve-x2,reserve-x20,reserve-x21,reserve-x22,reserve-x23,reserve-x24,reserve-x25,reserve-x26,reserve-x27,reserve-x28,reserve-x3,reserve-x30,reserve-x4,reserve-x5,reserve-x6,reserve-x7,reserve-x9,rme,saphira,sb,sel2,sha2,sha3,slow-misaligned-128store,slow-paired-128,slow-strqro-store,sm4,sme,sme-f16f16,sme-f64f64,sme-i16i64,sme2,sme2p1,spe,spe-eef,specres2,specrestrict,ssbs,strict-align,sve,sve2,sve2-aes,sve2-bitperm,sve2-sha3,sve2-sm4,sve2p1,tagged-globals,the,thunderx,thunderx2t99,thunderx3t110,thunderxt81,thunderxt83,thunderxt88,tlb-rmi,tme,tpidr-el1,tpidr-el2,tpidr-el3,tpidrro-el0,tracev8.4,trbe,tsv110,uaops,use-experimental-zeroing-pseudos,use-postra-scheduler,use-reciprocal-square-root,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8.7a,v8.8a,v8.9a,v8a,v8r,v9.1a,v9.2a,v9.3a,v9.4a,v9a,vh,wfxt,xs,zcm,zcz,zcz-fp-workaround,zcz-gp"), + // TargetArch_wasm32: + str_lit("atomics,bulk-memory,exception-handling,extended-const,multivalue,mutable-globals,nontrapping-fptoint,reference-types,relaxed-simd,sign-ext,simd128,tail-call"), + // TargetArch_wasm64p32: + str_lit("atomics,bulk-memory,exception-handling,extended-const,multivalue,mutable-globals,nontrapping-fptoint,reference-types,relaxed-simd,sign-ext,simd128,tail-call"), +}; + +// Generated with the featuregen script in `misc/featuregen` +gb_global int target_microarch_counts[TargetArch_COUNT] = { + // TargetArch_Invalid: + 0, + // TargetArch_amd64: + 120, + // TargetArch_i386: + 120, + // TargetArch_arm32: + 90, + // TargetArch_arm64: + 63, + // TargetArch_wasm32: + 3, + // TargetArch_wasm64p32: + 3, +}; + +// Generated with the featuregen script in `misc/featuregen` +gb_global MicroarchFeatureList microarch_features_list[] = { + // TargetArch_amd64: + { str_lit("alderlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("amdfam10"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,vzeroupper,x87") }, + { str_lit("athlon"), str_lit("3dnow,3dnowa,64bit-mode,cmov,cx8,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon-4"), str_lit("3dnow,3dnowa,64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon-fx"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon-mp"), str_lit("3dnow,3dnowa,64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon-tbird"), str_lit("3dnow,3dnowa,64bit-mode,cmov,cx8,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon-xp"), str_lit("3dnow,3dnowa,64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon64"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon64-sse3"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("atom"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,idivl-to-divb,idivq-to-divl,lea-sp,lea-uses-ag,mmx,movbe,no-bypass-delay,nopl,pad-short-functions,sahf,slow-two-mem-ops,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("atom_sse4_2"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87") }, + { str_lit("atom_sse4_2_movbe"), str_lit("64bit,64bit-mode,aes,clflushopt,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fsgsbase,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("barcelona"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,vzeroupper,x87") }, + { str_lit("bdver1"), str_lit("64bit,64bit-mode,aes,avx,branchfusion,cmov,crc32,cx16,cx8,fast-11bytenop,fast-scalar-shift-masks,fma4,fxsr,lwp,lzcnt,mmx,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vzeroupper,x87,xop,xsave") }, + { str_lit("bdver2"), str_lit("64bit,64bit-mode,aes,avx,bmi,branchfusion,cmov,crc32,cx16,cx8,f16c,fast-11bytenop,fast-bextr,fast-movbe,fast-scalar-shift-masks,fma,fma4,fxsr,lwp,lzcnt,mmx,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tbm,vzeroupper,x87,xop,xsave") }, + { str_lit("bdver3"), str_lit("64bit,64bit-mode,aes,avx,bmi,branchfusion,cmov,crc32,cx16,cx8,f16c,fast-11bytenop,fast-bextr,fast-movbe,fast-scalar-shift-masks,fma,fma4,fsgsbase,fxsr,lwp,lzcnt,mmx,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tbm,vzeroupper,x87,xop,xsave,xsaveopt") }, + { str_lit("bdver4"), str_lit("64bit,64bit-mode,aes,avx,avx2,bmi,bmi2,branchfusion,cmov,crc32,cx16,cx8,f16c,fast-11bytenop,fast-bextr,fast-movbe,fast-scalar-shift-masks,fma,fma4,fsgsbase,fxsr,lwp,lzcnt,mmx,movbe,mwaitx,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tbm,vzeroupper,x87,xop,xsave,xsaveopt") }, + { str_lit("bonnell"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,idivl-to-divb,idivq-to-divl,lea-sp,lea-uses-ag,mmx,movbe,no-bypass-delay,nopl,pad-short-functions,sahf,slow-two-mem-ops,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("broadwell"), str_lit("64bit,64bit-mode,adx,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("btver1"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fast-15bytenop,fast-scalar-shift-masks,fast-vector-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,ssse3,vzeroupper,x87") }, + { str_lit("btver2"), str_lit("64bit,64bit-mode,aes,avx,bmi,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-hops,fast-lzcnt,fast-movbe,fast-scalar-shift-masks,fast-vector-shift-masks,fxsr,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,x87,xsave,xsaveopt") }, + { str_lit("c3"), str_lit("3dnow,64bit-mode,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("c3-2"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("cannonlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vl,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,sha,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("cascadelake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,avx512vnni,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("cooperlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bf16,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,avx512vnni,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("core-avx-i"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core-avx2"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core2"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("core_2_duo_sse4_1"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,sse4.1,ssse3,vzeroupper,x87") }, + { str_lit("core_2_duo_ssse3"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("core_2nd_gen_avx"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_3rd_gen_avx"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_4th_gen_avx"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_4th_gen_avx_tsx"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_5th_gen_avx"), str_lit("64bit,64bit-mode,adx,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_5th_gen_avx_tsx"), str_lit("64bit,64bit-mode,adx,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_aes_pclmulqdq"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("core_i7_sse4_2"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("corei7"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("corei7-avx"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("emeraldrapids"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("generic"), str_lit("64bit,64bit-mode,cx8,fast-15bytenop,fast-scalar-fsqrt,idivq-to-divl,macrofusion,slow-3ops-lea,sse,sse2,vzeroupper,x87") }, + { str_lit("geode"), str_lit("3dnow,3dnowa,64bit-mode,cx8,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("goldmont"), str_lit("64bit,64bit-mode,aes,clflushopt,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("goldmont-plus"), str_lit("64bit,64bit-mode,aes,clflushopt,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("goldmont_plus"), str_lit("64bit,64bit-mode,aes,clflushopt,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("grandridge"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,fast-movbe,fma,fsgsbase,fxsr,gfni,hreset,invpcid,kl,lzcnt,mmx,movbe,movdir64b,movdiri,no-bypass-delay,nopl,pclmul,pconfig,pku,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,uintr,use-glm-div-sqrt-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("graniterapids"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("graniterapids-d"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("graniterapids_d"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("haswell"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("i386"), str_lit("64bit-mode,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("i486"), str_lit("64bit-mode,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("i586"), str_lit("64bit-mode,cx8,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("i686"), str_lit("64bit-mode,cmov,cx8,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("icelake-client"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("icelake-server"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("icelake_client"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("icelake_server"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("ivybridge"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("k6"), str_lit("64bit-mode,cx8,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("k6-2"), str_lit("3dnow,64bit-mode,cx8,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("k6-3"), str_lit("3dnow,64bit-mode,cx8,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("k8"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("k8-sse3"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("knl"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, + { str_lit("knm"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,avx512vpopcntdq,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, + { str_lit("lakemont"), str_lit("64bit-mode,cx8,slow-unaligned-mem-16,sse,sse2,vzeroupper") }, + { str_lit("meteorlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("mic_avx512"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, + { str_lit("nehalem"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("nocona"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("opteron"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("opteron-sse3"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("penryn"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,sse4.1,ssse3,vzeroupper,x87") }, + { str_lit("pentium"), str_lit("64bit-mode,cx8,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium-m"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium-mmx"), str_lit("64bit-mode,cx8,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium2"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium3"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium3m"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium4"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium4m"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_4"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_4_sse3"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("pentium_ii"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_iii"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_iii_no_xmm_regs"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_m"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_mmx"), str_lit("64bit-mode,cx8,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_pro"), str_lit("64bit-mode,cmov,cx8,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentiumpro"), str_lit("64bit-mode,cmov,cx8,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("prescott"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("raptorlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("rocketlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("sandybridge"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("sapphirerapids"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("sierraforest"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,fast-movbe,fma,fsgsbase,fxsr,gfni,hreset,invpcid,kl,lzcnt,mmx,movbe,movdir64b,movdiri,no-bypass-delay,nopl,pclmul,pconfig,pku,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,uintr,use-glm-div-sqrt-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("silvermont"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87") }, + { str_lit("skx"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("skylake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("skylake-avx512"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("skylake_avx512"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("slm"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87") }, + { str_lit("tigerlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vp2intersect,avx512vpopcntdq,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("tremont"), str_lit("64bit,64bit-mode,aes,clflushopt,clwb,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,gfni,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("westmere"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("winchip-c6"), str_lit("64bit-mode,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("winchip2"), str_lit("3dnow,64bit-mode,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("x86-64"), str_lit("64bit,64bit-mode,cmov,cx8,fxsr,idivq-to-divl,macrofusion,mmx,nopl,slow-3ops-lea,slow-incdec,sse,sse2,vzeroupper,x87") }, + { str_lit("x86-64-v2"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,nopl,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("x86-64-v3"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fxsr,idivq-to-divl,lzcnt,macrofusion,mmx,movbe,nopl,popcnt,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave") }, + { str_lit("x86-64-v4"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fxsr,idivq-to-divl,lzcnt,macrofusion,mmx,movbe,nopl,popcnt,prefer-256-bit,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave") }, + { str_lit("yonah"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("znver1"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,branchfusion,clflushopt,clzero,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,lzcnt,mmx,movbe,mwaitx,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("znver2"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,branchfusion,clflushopt,clwb,clzero,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,lzcnt,mmx,movbe,mwaitx,nopl,pclmul,popcnt,prfchw,rdpid,rdpru,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("znver3"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,branchfusion,clflushopt,clwb,clzero,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,invpcid,lzcnt,macrofusion,mmx,movbe,mwaitx,nopl,pclmul,pku,popcnt,prfchw,rdpid,rdpru,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("znver4"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,branchfusion,clflushopt,clwb,clzero,cmov,crc32,cx16,cx8,evex512,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,invpcid,lzcnt,macrofusion,mmx,movbe,mwaitx,nopl,pclmul,pku,popcnt,prfchw,rdpid,rdpru,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,shstk,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + // TargetArch_i386: + { str_lit("alderlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("amdfam10"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,vzeroupper,x87") }, + { str_lit("athlon"), str_lit("32bit-mode,3dnow,3dnowa,cmov,cx8,mmx,nopl,slow-shld,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("athlon-4"), str_lit("32bit-mode,3dnow,3dnowa,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("athlon-fx"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon-mp"), str_lit("32bit-mode,3dnow,3dnowa,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("athlon-tbird"), str_lit("32bit-mode,3dnow,3dnowa,cmov,cx8,mmx,nopl,slow-shld,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("athlon-xp"), str_lit("32bit-mode,3dnow,3dnowa,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("athlon64"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon64-sse3"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("atom"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,idivl-to-divb,idivq-to-divl,lea-sp,lea-uses-ag,mmx,movbe,no-bypass-delay,nopl,pad-short-functions,sahf,slow-two-mem-ops,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("atom_sse4_2"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87") }, + { str_lit("atom_sse4_2_movbe"), str_lit("32bit-mode,64bit,aes,clflushopt,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fsgsbase,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("barcelona"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,vzeroupper,x87") }, + { str_lit("bdver1"), str_lit("32bit-mode,64bit,aes,avx,branchfusion,cmov,crc32,cx16,cx8,fast-11bytenop,fast-scalar-shift-masks,fma4,fxsr,lwp,lzcnt,mmx,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vzeroupper,x87,xop,xsave") }, + { str_lit("bdver2"), str_lit("32bit-mode,64bit,aes,avx,bmi,branchfusion,cmov,crc32,cx16,cx8,f16c,fast-11bytenop,fast-bextr,fast-movbe,fast-scalar-shift-masks,fma,fma4,fxsr,lwp,lzcnt,mmx,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tbm,vzeroupper,x87,xop,xsave") }, + { str_lit("bdver3"), str_lit("32bit-mode,64bit,aes,avx,bmi,branchfusion,cmov,crc32,cx16,cx8,f16c,fast-11bytenop,fast-bextr,fast-movbe,fast-scalar-shift-masks,fma,fma4,fsgsbase,fxsr,lwp,lzcnt,mmx,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tbm,vzeroupper,x87,xop,xsave,xsaveopt") }, + { str_lit("bdver4"), str_lit("32bit-mode,64bit,aes,avx,avx2,bmi,bmi2,branchfusion,cmov,crc32,cx16,cx8,f16c,fast-11bytenop,fast-bextr,fast-movbe,fast-scalar-shift-masks,fma,fma4,fsgsbase,fxsr,lwp,lzcnt,mmx,movbe,mwaitx,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tbm,vzeroupper,x87,xop,xsave,xsaveopt") }, + { str_lit("bonnell"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,idivl-to-divb,idivq-to-divl,lea-sp,lea-uses-ag,mmx,movbe,no-bypass-delay,nopl,pad-short-functions,sahf,slow-two-mem-ops,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("broadwell"), str_lit("32bit-mode,64bit,adx,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("btver1"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fast-15bytenop,fast-scalar-shift-masks,fast-vector-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,ssse3,vzeroupper,x87") }, + { str_lit("btver2"), str_lit("32bit-mode,64bit,aes,avx,bmi,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-hops,fast-lzcnt,fast-movbe,fast-scalar-shift-masks,fast-vector-shift-masks,fxsr,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,x87,xsave,xsaveopt") }, + { str_lit("c3"), str_lit("32bit-mode,3dnow,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("c3-2"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("cannonlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vl,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,sha,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("cascadelake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,avx512vnni,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("cooperlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bf16,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,avx512vnni,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("core-avx-i"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core-avx2"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core2"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("core_2_duo_sse4_1"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,sse4.1,ssse3,vzeroupper,x87") }, + { str_lit("core_2_duo_ssse3"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("core_2nd_gen_avx"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_3rd_gen_avx"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_4th_gen_avx"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_4th_gen_avx_tsx"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_5th_gen_avx"), str_lit("32bit-mode,64bit,adx,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_5th_gen_avx_tsx"), str_lit("32bit-mode,64bit,adx,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_aes_pclmulqdq"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("core_i7_sse4_2"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("corei7"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("corei7-avx"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("emeraldrapids"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,amx-bf16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("generic"), str_lit("32bit-mode,64bit,cx8,fast-15bytenop,fast-scalar-fsqrt,idivq-to-divl,macrofusion,slow-3ops-lea,vzeroupper,x87") }, + { str_lit("geode"), str_lit("32bit-mode,3dnow,3dnowa,cx8,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("goldmont"), str_lit("32bit-mode,64bit,aes,clflushopt,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("goldmont-plus"), str_lit("32bit-mode,64bit,aes,clflushopt,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("goldmont_plus"), str_lit("32bit-mode,64bit,aes,clflushopt,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("grandridge"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,fast-movbe,fma,fsgsbase,fxsr,gfni,hreset,invpcid,kl,lzcnt,mmx,movbe,movdir64b,movdiri,no-bypass-delay,nopl,pclmul,pconfig,pku,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,uintr,use-glm-div-sqrt-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("graniterapids"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,amx-bf16,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("graniterapids-d"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("graniterapids_d"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("haswell"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("i386"), str_lit("32bit-mode,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("i486"), str_lit("32bit-mode,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("i586"), str_lit("32bit-mode,cx8,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("i686"), str_lit("32bit-mode,cmov,cx8,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("icelake-client"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("icelake-server"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("icelake_client"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("icelake_server"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("ivybridge"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("k6"), str_lit("32bit-mode,cx8,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("k6-2"), str_lit("32bit-mode,3dnow,cx8,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("k6-3"), str_lit("32bit-mode,3dnow,cx8,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("k8"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("k8-sse3"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("knl"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, + { str_lit("knm"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,avx512vpopcntdq,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, + { str_lit("lakemont"), str_lit("32bit-mode,cx8,slow-unaligned-mem-16,vzeroupper") }, + { str_lit("meteorlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("mic_avx512"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, + { str_lit("nehalem"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("nocona"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("opteron"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("opteron-sse3"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("penryn"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,sse4.1,ssse3,vzeroupper,x87") }, + { str_lit("pentium"), str_lit("32bit-mode,cx8,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("pentium-m"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium-mmx"), str_lit("32bit-mode,cx8,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("pentium2"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("pentium3"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("pentium3m"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("pentium4"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium4m"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_4"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_4_sse3"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("pentium_ii"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("pentium_iii"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("pentium_iii_no_xmm_regs"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("pentium_m"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_mmx"), str_lit("32bit-mode,cx8,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("pentium_pro"), str_lit("32bit-mode,cmov,cx8,nopl,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("pentiumpro"), str_lit("32bit-mode,cmov,cx8,nopl,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("prescott"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("raptorlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("rocketlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("sandybridge"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("sapphirerapids"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,amx-bf16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("sierraforest"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,fast-movbe,fma,fsgsbase,fxsr,gfni,hreset,invpcid,kl,lzcnt,mmx,movbe,movdir64b,movdiri,no-bypass-delay,nopl,pclmul,pconfig,pku,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,uintr,use-glm-div-sqrt-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("silvermont"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87") }, + { str_lit("skx"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("skylake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("skylake-avx512"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("skylake_avx512"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("slm"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87") }, + { str_lit("tigerlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vp2intersect,avx512vpopcntdq,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("tremont"), str_lit("32bit-mode,64bit,aes,clflushopt,clwb,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,gfni,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("westmere"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("winchip-c6"), str_lit("32bit-mode,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("winchip2"), str_lit("32bit-mode,3dnow,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("x86-64"), str_lit("32bit-mode,64bit,cmov,cx8,fxsr,idivq-to-divl,macrofusion,mmx,nopl,slow-3ops-lea,slow-incdec,sse,sse2,vzeroupper,x87") }, + { str_lit("x86-64-v2"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,nopl,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("x86-64-v3"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fxsr,idivq-to-divl,lzcnt,macrofusion,mmx,movbe,nopl,popcnt,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave") }, + { str_lit("x86-64-v4"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fxsr,idivq-to-divl,lzcnt,macrofusion,mmx,movbe,nopl,popcnt,prefer-256-bit,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave") }, + { str_lit("yonah"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("znver1"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,branchfusion,clflushopt,clzero,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,lzcnt,mmx,movbe,mwaitx,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("znver2"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,branchfusion,clflushopt,clwb,clzero,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,lzcnt,mmx,movbe,mwaitx,nopl,pclmul,popcnt,prfchw,rdpid,rdpru,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("znver3"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,branchfusion,clflushopt,clwb,clzero,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,invpcid,lzcnt,macrofusion,mmx,movbe,mwaitx,nopl,pclmul,pku,popcnt,prfchw,rdpid,rdpru,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("znver4"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,branchfusion,clflushopt,clwb,clzero,cmov,crc32,cx16,cx8,evex512,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,invpcid,lzcnt,macrofusion,mmx,movbe,mwaitx,nopl,pclmul,pku,popcnt,prfchw,rdpid,rdpru,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,shstk,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + // TargetArch_arm32: + { str_lit("arm1020e"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm1020t"), str_lit("armv5t,v4t,v5t") }, + { str_lit("arm1022e"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm10e"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm10tdmi"), str_lit("armv5t,v4t,v5t") }, + { str_lit("arm1136j-s"), str_lit("armv6,dsp,v4t,v5t,v5te,v6") }, + { str_lit("arm1136jf-s"), str_lit("armv6,dsp,fp64,fpregs,fpregs64,slowfpvmlx,v4t,v5t,v5te,v6,vfp2,vfp2sp") }, + { str_lit("arm1156t2-s"), str_lit("armv6t2,dsp,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v8m") }, + { str_lit("arm1156t2f-s"), str_lit("armv6t2,dsp,fp64,fpregs,fpregs64,slowfpvmlx,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v8m,vfp2,vfp2sp") }, + { str_lit("arm1176jz-s"), str_lit("armv6kz,trustzone,v4t,v5t,v5te,v6,v6k") }, + { str_lit("arm1176jzf-s"), str_lit("armv6kz,fp64,fpregs,fpregs64,slowfpvmlx,trustzone,v4t,v5t,v5te,v6,v6k,vfp2,vfp2sp") }, + { str_lit("arm710t"), str_lit("armv4t,v4t") }, + { str_lit("arm720t"), str_lit("armv4t,v4t") }, + { str_lit("arm7tdmi"), str_lit("armv4t,v4t") }, + { str_lit("arm7tdmi-s"), str_lit("armv4t,v4t") }, + { str_lit("arm8"), str_lit("armv4") }, + { str_lit("arm810"), str_lit("armv4") }, + { str_lit("arm9"), str_lit("armv4t,v4t") }, + { str_lit("arm920"), str_lit("armv4t,v4t") }, + { str_lit("arm920t"), str_lit("armv4t,v4t") }, + { str_lit("arm922t"), str_lit("armv4t,v4t") }, + { str_lit("arm926ej-s"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm940t"), str_lit("armv4t,v4t") }, + { str_lit("arm946e-s"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm966e-s"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm968e-s"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm9e"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm9tdmi"), str_lit("armv4t,v4t") }, + { str_lit("cortex-a12"), str_lit("a12,aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ret-addr-stack,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vmlx-forwarding") }, + { str_lit("cortex-a15"), str_lit("a15,aclass,armv7-a,avoid-partial-cpsr,d32,db,dont-widen-vmovs,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,muxed-units,neon,perfmon,ret-addr-stack,splat-vfp-neon,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vldn-align") }, + { str_lit("cortex-a17"), str_lit("a17,aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ret-addr-stack,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vmlx-forwarding") }, + { str_lit("cortex-a32"), str_lit("aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a35"), str_lit("a35,aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a5"), str_lit("a5,aclass,armv7-a,d32,db,dsp,fp16,fp64,fpregs,fpregs64,mp,neon,perfmon,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,vmlx-forwarding") }, + { str_lit("cortex-a53"), str_lit("a53,aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpao,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a55"), str_lit("a55,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a57"), str_lit("a57,aclass,acquire-release,aes,armv8-a,avoid-partial-cpsr,cheap-predicable-cpsr,crc,crypto,d32,db,dsp,fix-cortex-a57-aes-1742098,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpao,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a7"), str_lit("a7,aclass,armv7-a,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vmlx-forwarding,vmlx-hazards") }, + { str_lit("cortex-a710"), str_lit("aclass,acquire-release,armv9-a,bf16,cortex-a710,crc,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp16fml,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,i8mm,mp,neon,perfmon,ras,sb,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8m,v9a,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a72"), str_lit("a72,aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fix-cortex-a57-aes-1742098,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a73"), str_lit("a73,aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a75"), str_lit("a75,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a76"), str_lit("a76,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a76ae"), str_lit("a76,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a77"), str_lit("a77,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a78"), str_lit("aclass,acquire-release,aes,armv8.2-a,cortex-a78,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a78c"), str_lit("a78c,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a8"), str_lit("a8,aclass,armv7-a,d32,db,dsp,fp64,fpregs,fpregs64,neon,nonpipelined-vfp,perfmon,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vmlx-forwarding,vmlx-hazards") }, + { str_lit("cortex-a9"), str_lit("a9,aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,expand-fp-mlx,fp16,fp64,fpregs,fpregs64,mp,muxed-units,neon,neon-fpmovs,perfmon,prefer-vmovsr,ret-addr-stack,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vldn-align,vmlx-forwarding,vmlx-hazards") }, + { str_lit("cortex-m0"), str_lit("armv6-m,db,mclass,no-branch-predictor,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m") }, + { str_lit("cortex-m0plus"), str_lit("armv6-m,db,mclass,no-branch-predictor,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m") }, + { str_lit("cortex-m1"), str_lit("armv6-m,db,mclass,no-branch-predictor,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m") }, + { str_lit("cortex-m23"), str_lit("8msecext,acquire-release,armv8-m.base,db,hwdiv,mclass,no-branch-predictor,no-movt,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m,v7clrex,v8m") }, + { str_lit("cortex-m3"), str_lit("armv7-m,db,hwdiv,loop-align,m3,mclass,no-branch-predictor,noarm,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m") }, + { str_lit("cortex-m33"), str_lit("8msecext,acquire-release,armv8-m.main,db,dsp,fix-cmse-cve-2021-35465,fp-armv8d16sp,fp16,fpregs,hwdiv,loop-align,mclass,no-branch-predictor,noarm,slowfpvfmx,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,v8m.main,vfp2sp,vfp3d16sp,vfp4d16sp") }, + { str_lit("cortex-m35p"), str_lit("8msecext,acquire-release,armv8-m.main,db,dsp,fix-cmse-cve-2021-35465,fp-armv8d16sp,fp16,fpregs,hwdiv,loop-align,mclass,no-branch-predictor,noarm,slowfpvfmx,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,v8m.main,vfp2sp,vfp3d16sp,vfp4d16sp") }, + { str_lit("cortex-m4"), str_lit("armv7e-m,db,dsp,fp16,fpregs,hwdiv,loop-align,mclass,no-branch-predictor,noarm,slowfpvfmx,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2sp,vfp3d16sp,vfp4d16sp") }, + { str_lit("cortex-m55"), str_lit("8msecext,acquire-release,armv8.1-m.main,db,dsp,fix-cmse-cve-2021-35465,fp-armv8d16,fp-armv8d16sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,lob,loop-align,mclass,mve,mve.fp,no-branch-predictor,noarm,ras,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8.1m.main,v8m,v8m.main,vfp2,vfp2sp,vfp3d16,vfp3d16sp,vfp4d16,vfp4d16sp") }, + { str_lit("cortex-m7"), str_lit("armv7e-m,db,dsp,fp-armv8d16,fp-armv8d16sp,fp16,fp64,fpregs,fpregs64,hwdiv,m7,mclass,noarm,thumb-mode,thumb2,use-mipipeliner,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3d16,vfp3d16sp,vfp4d16,vfp4d16sp") }, + { str_lit("cortex-m85"), str_lit("8msecext,acquire-release,armv8.1-m.main,db,dsp,fp-armv8d16,fp-armv8d16sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,lob,mclass,mve,mve.fp,noarm,pacbti,ras,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8.1m.main,v8m,v8m.main,vfp2,vfp2sp,vfp3d16,vfp3d16sp,vfp4d16,vfp4d16sp") }, + { str_lit("cortex-r4"), str_lit("armv7-r,avoid-partial-cpsr,db,dsp,hwdiv,perfmon,r4,rclass,ret-addr-stack,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m") }, + { str_lit("cortex-r4f"), str_lit("armv7-r,avoid-partial-cpsr,db,dsp,fp64,fpregs,fpregs64,hwdiv,perfmon,r4,rclass,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3d16,vfp3d16sp") }, + { str_lit("cortex-r5"), str_lit("armv7-r,avoid-partial-cpsr,db,dsp,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,perfmon,r5,rclass,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3d16,vfp3d16sp") }, + { str_lit("cortex-r52"), str_lit("acquire-release,armv8-r,crc,d32,db,dfb,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpao,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,r52,rclass,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-r7"), str_lit("armv7-r,avoid-partial-cpsr,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,perfmon,r7,rclass,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3d16,vfp3d16sp") }, + { str_lit("cortex-r8"), str_lit("armv7-r,avoid-partial-cpsr,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,perfmon,rclass,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3d16,vfp3d16sp") }, + { str_lit("cortex-x1"), str_lit("aclass,acquire-release,aes,armv8.2-a,cortex-x1,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-x1c"), str_lit("aclass,acquire-release,aes,armv8.2-a,cortex-x1c,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cyclone"), str_lit("aclass,acquire-release,aes,armv8-a,avoid-movs-shop,avoid-partial-cpsr,crc,crypto,d32,db,disable-postra-scheduler,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,neonfp,perfmon,ret-addr-stack,sha2,slowfpvfmx,slowfpvmlx,swift,thumb2,trustzone,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,zcz") }, + { str_lit("ep9312"), str_lit("armv4t,v4t") }, + { str_lit("exynos-m3"), str_lit("aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dont-widen-vmovs,dsp,expand-fp-mlx,exynos,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,fuse-aes,fuse-literals,hwdiv,hwdiv-arm,mp,neon,perfmon,prof-unpr,ret-addr-stack,sha2,slow-fp-brcc,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,splat-vfp-neon,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,wide-stride-vfp,zcz") }, + { str_lit("exynos-m4"), str_lit("aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dont-widen-vmovs,dotprod,dsp,expand-fp-mlx,exynos,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,fuse-aes,fuse-literals,hwdiv,hwdiv-arm,mp,neon,perfmon,prof-unpr,ras,ret-addr-stack,sha2,slow-fp-brcc,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,splat-vfp-neon,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,wide-stride-vfp,zcz") }, + { str_lit("exynos-m5"), str_lit("aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dont-widen-vmovs,dotprod,dsp,expand-fp-mlx,exynos,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,fuse-aes,fuse-literals,hwdiv,hwdiv-arm,mp,neon,perfmon,prof-unpr,ras,ret-addr-stack,sha2,slow-fp-brcc,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,splat-vfp-neon,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,wide-stride-vfp,zcz") }, + { str_lit("generic"), str_lit("") }, + { str_lit("iwmmxt"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("krait"), str_lit("aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,krait,muxed-units,neon,perfmon,ret-addr-stack,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,vldn-align,vmlx-forwarding") }, + { str_lit("kryo"), str_lit("aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,kryo,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("mpcore"), str_lit("armv6k,fp64,fpregs,fpregs64,slowfpvmlx,v4t,v5t,v5te,v6,v6k,vfp2,vfp2sp") }, + { str_lit("mpcorenovfp"), str_lit("armv6k,v4t,v5t,v5te,v6,v6k") }, + { str_lit("neoverse-n1"), str_lit("aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("neoverse-n2"), str_lit("aclass,acquire-release,armv9-a,bf16,crc,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,i8mm,mp,neon,perfmon,ras,sb,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8m,v9a,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("neoverse-v1"), str_lit("aclass,acquire-release,aes,armv8.4-a,bf16,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,i8mm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8.3a,v8.4a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("sc000"), str_lit("armv6-m,db,mclass,no-branch-predictor,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m") }, + { str_lit("sc300"), str_lit("armv7-m,db,hwdiv,m3,mclass,no-branch-predictor,noarm,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m") }, + { str_lit("strongarm"), str_lit("armv4") }, + { str_lit("strongarm110"), str_lit("armv4") }, + { str_lit("strongarm1100"), str_lit("armv4") }, + { str_lit("strongarm1110"), str_lit("armv4") }, + { str_lit("swift"), str_lit("aclass,armv7-a,avoid-movs-shop,avoid-partial-cpsr,d32,db,disable-postra-scheduler,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,neonfp,perfmon,prefer-ishst,prof-unpr,ret-addr-stack,slow-load-D-subreg,slow-odd-reg,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,swift,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,vmlx-hazards,wide-stride-vfp") }, + { str_lit("xscale"), str_lit("armv5te,v4t,v5t,v5te") }, + // TargetArch_arm64: + { str_lit("a64fx"), str_lit("CONTEXTIDREL2,a64fx,aggressive-fma,arith-bcc-fusion,ccpp,complxnum,crc,el2vmsa,el3,fp-armv8,fullfp16,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rdm,sha2,store-pair-suppress,sve,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("ampere1"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,aggressive-fma,altnzcv,alu-lsl-fast,am,ampere1,amvs,arith-bcc-fusion,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fptoint,fuse-address,fuse-aes,fuse-literals,i8mm,jsconv,ldp-aligned-only,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,rand,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,stp-aligned-only,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh") }, + { str_lit("ampere1a"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,aggressive-fma,altnzcv,alu-lsl-fast,am,ampere1a,amvs,arith-bcc-fusion,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fptoint,fuse-address,fuse-aes,fuse-literals,i8mm,jsconv,ldp-aligned-only,lor,lse,lse2,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predres,rand,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,sm4,specrestrict,ssbs,store-pair-suppress,stp-aligned-only,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh") }, + { str_lit("apple-a10"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a10,arith-bcc-fusion,arith-cbz-fusion,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,lor,neon,pan,perfmon,rdm,sha2,store-pair-suppress,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a11"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a11,arith-bcc-fusion,arith-cbz-fusion,ccpp,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fullfp16,fuse-aes,fuse-crypto-eor,lor,lse,neon,pan,pan-rwv,perfmon,ras,rdm,sha2,store-pair-suppress,uaops,v8.1a,v8.2a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a12"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a12,arith-bcc-fusion,arith-cbz-fusion,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fullfp16,fuse-aes,fuse-crypto-eor,jsconv,lor,lse,neon,pan,pan-rwv,pauth,perfmon,ras,rcpc,rdm,sha2,store-pair-suppress,uaops,v8.1a,v8.2a,v8.3a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a13"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,am,apple-a13,arith-bcc-fusion,arith-cbz-fusion,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,el2vmsa,el3,flagm,fp-armv8,fp16fml,fullfp16,fuse-aes,fuse-crypto-eor,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,ras,rcpc,rcpc-immo,rdm,sel2,sha2,sha3,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a14"), str_lit("CONTEXTIDREL2,aes,aggressive-fma,alternate-sextload-cvt-f32-pattern,altnzcv,am,apple-a14,arith-bcc-fusion,arith-cbz-fusion,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,el2vmsa,el3,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a15"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a15,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a16"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a16,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,hcx,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a7"), str_lit("aes,alternate-sextload-cvt-f32-pattern,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,neon,perfmon,sha2,store-pair-suppress,v8a,zcm,zcz,zcz-fp-workaround,zcz-gp") }, + { str_lit("apple-a8"), str_lit("aes,alternate-sextload-cvt-f32-pattern,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,neon,perfmon,sha2,store-pair-suppress,v8a,zcm,zcz,zcz-fp-workaround,zcz-gp") }, + { str_lit("apple-a9"), str_lit("aes,alternate-sextload-cvt-f32-pattern,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,neon,perfmon,sha2,store-pair-suppress,v8a,zcm,zcz,zcz-fp-workaround,zcz-gp") }, + { str_lit("apple-latest"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a16,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,hcx,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-m1"), str_lit("CONTEXTIDREL2,aes,aggressive-fma,alternate-sextload-cvt-f32-pattern,altnzcv,am,apple-a14,arith-bcc-fusion,arith-cbz-fusion,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,el2vmsa,el3,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-m2"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a15,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-s4"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a12,arith-bcc-fusion,arith-cbz-fusion,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fullfp16,fuse-aes,fuse-crypto-eor,jsconv,lor,lse,neon,pan,pan-rwv,pauth,perfmon,ras,rcpc,rdm,sha2,store-pair-suppress,uaops,v8.1a,v8.2a,v8.3a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-s5"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a12,arith-bcc-fusion,arith-cbz-fusion,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fullfp16,fuse-aes,fuse-crypto-eor,jsconv,lor,lse,neon,pan,pan-rwv,pauth,perfmon,ras,rcpc,rdm,sha2,store-pair-suppress,uaops,v8.1a,v8.2a,v8.3a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("carmel"), str_lit("CONTEXTIDREL2,aes,carmel,ccpp,crc,crypto,el2vmsa,el3,fp-armv8,fullfp16,lor,lse,neon,pan,pan-rwv,ras,rdm,sha2,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a34"), str_lit("a35,aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,sha2,v8a") }, + { str_lit("cortex-a35"), str_lit("a35,aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,sha2,v8a") }, + { str_lit("cortex-a510"), str_lit("CONTEXTIDREL2,a510,altnzcv,am,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,dit,dotprod,el2vmsa,el3,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("cortex-a53"), str_lit("a53,aes,balance-fp-ops,crc,crypto,el2vmsa,el3,fp-armv8,fuse-adrp-add,fuse-aes,neon,perfmon,sha2,use-postra-scheduler,v8a") }, + { str_lit("cortex-a55"), str_lit("CONTEXTIDREL2,a55,aes,ccpp,crc,crypto,dotprod,el2vmsa,el3,fp-armv8,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,ras,rcpc,rdm,sha2,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a57"), str_lit("a57,aes,balance-fp-ops,crc,crypto,el2vmsa,el3,enable-select-opt,fp-armv8,fuse-adrp-add,fuse-aes,fuse-literals,neon,perfmon,predictable-select-expensive,sha2,use-postra-scheduler,v8a") }, + { str_lit("cortex-a65"), str_lit("CONTEXTIDREL2,a65,aes,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-literals,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,ssbs,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a65ae"), str_lit("CONTEXTIDREL2,a65,aes,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-literals,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,ssbs,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a710"), str_lit("CONTEXTIDREL2,a710,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("cortex-a715"), str_lit("CONTEXTIDREL2,a715,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,spe,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("cortex-a72"), str_lit("a72,aes,crc,crypto,el2vmsa,el3,enable-select-opt,fp-armv8,fuse-adrp-add,fuse-aes,fuse-literals,neon,perfmon,predictable-select-expensive,sha2,v8a") }, + { str_lit("cortex-a73"), str_lit("a73,aes,crc,crypto,el2vmsa,el3,enable-select-opt,fp-armv8,fuse-adrp-add,fuse-aes,neon,perfmon,predictable-select-expensive,sha2,v8a") }, + { str_lit("cortex-a75"), str_lit("CONTEXTIDREL2,a75,aes,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a76"), str_lit("CONTEXTIDREL2,a76,addr-lsl-fast,aes,alu-lsl-fast,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,ssbs,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a76ae"), str_lit("CONTEXTIDREL2,a76,addr-lsl-fast,aes,alu-lsl-fast,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,ssbs,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a77"), str_lit("CONTEXTIDREL2,a77,addr-lsl-fast,aes,alu-lsl-fast,ccpp,cmp-bcc-fusion,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,ssbs,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a78"), str_lit("CONTEXTIDREL2,a78,addr-lsl-fast,aes,alu-lsl-fast,ccpp,cmp-bcc-fusion,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,spe,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a78c"), str_lit("CONTEXTIDREL2,a78c,addr-lsl-fast,aes,alu-lsl-fast,ccpp,cmp-bcc-fusion,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,flagm,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,spe,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-r82"), str_lit("CONTEXTIDREL2,ccidx,ccpp,complxnum,cortex-r82,crc,dit,dotprod,flagm,fp-armv8,fp16fml,fullfp16,jsconv,lse,neon,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,specrestrict,ssbs,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8r") }, + { str_lit("cortex-x1"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,ccpp,cmp-bcc-fusion,cortex-x1,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,spe,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-x1c"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,ccpp,cmp-bcc-fusion,cortex-x1,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,flagm,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,lse2,neon,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,ras,rcpc,rcpc-immo,rdm,sha2,spe,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-x2"), str_lit("CONTEXTIDREL2,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,cortex-x2,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("cortex-x3"), str_lit("CONTEXTIDREL2,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,complxnum,cortex-x3,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,spe,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("cyclone"), str_lit("aes,alternate-sextload-cvt-f32-pattern,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,neon,perfmon,sha2,store-pair-suppress,v8a,zcm,zcz,zcz-fp-workaround,zcz-gp") }, + { str_lit("exynos-m3"), str_lit("addr-lsl-fast,aes,alu-lsl-fast,crc,crypto,el2vmsa,el3,exynos-cheap-as-move,exynosm3,force-32bit-jump-tables,fp-armv8,fuse-address,fuse-adrp-add,fuse-aes,fuse-csel,fuse-literals,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,use-postra-scheduler,v8a") }, + { str_lit("exynos-m4"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,arith-bcc-fusion,arith-cbz-fusion,ccpp,crc,crypto,dotprod,el2vmsa,el3,exynos-cheap-as-move,exynosm4,force-32bit-jump-tables,fp-armv8,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-csel,fuse-literals,lor,lse,neon,pan,pan-rwv,perfmon,ras,rdm,sha2,store-pair-suppress,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh,zcz,zcz-gp") }, + { str_lit("exynos-m5"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,arith-bcc-fusion,arith-cbz-fusion,ccpp,crc,crypto,dotprod,el2vmsa,el3,exynos-cheap-as-move,exynosm4,force-32bit-jump-tables,fp-armv8,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-csel,fuse-literals,lor,lse,neon,pan,pan-rwv,perfmon,ras,rdm,sha2,store-pair-suppress,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh,zcz,zcz-gp") }, + { str_lit("falkor"), str_lit("addr-lsl-fast,aes,alu-lsl-fast,crc,crypto,el2vmsa,el3,falkor,fp-armv8,neon,perfmon,predictable-select-expensive,rdm,sha2,slow-strqro-store,store-pair-suppress,use-postra-scheduler,v8a,zcz,zcz-gp") }, + { str_lit("generic"), str_lit("enable-select-opt,ete,fp-armv8,fuse-adrp-add,fuse-aes,neon,trbe,use-postra-scheduler") }, + { str_lit("kryo"), str_lit("addr-lsl-fast,aes,alu-lsl-fast,crc,crypto,el2vmsa,el3,fp-armv8,kryo,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,use-postra-scheduler,v8a,zcz,zcz-gp") }, + { str_lit("neoverse-512tvb"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,am,bf16,ccdp,ccidx,ccpp,complxnum,crc,crypto,dit,dotprod,el2vmsa,el3,enable-select-opt,flagm,fp-armv8,fp16fml,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mpam,neon,neoverse512tvb,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,rand,ras,rcpc,rcpc-immo,rdm,sel2,sha2,spe,ssbs,sve,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh") }, + { str_lit("neoverse-e1"), str_lit("CONTEXTIDREL2,aes,ccpp,crc,crypto,dotprod,el2vmsa,el3,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,neoversee1,pan,pan-rwv,perfmon,ras,rcpc,rdm,sha2,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("neoverse-n1"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,neoversen1,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,spe,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("neoverse-n2"), str_lit("CONTEXTIDREL2,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,neoversen2,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("neoverse-v1"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,am,bf16,ccdp,ccidx,ccpp,complxnum,crc,crypto,dit,dotprod,el2vmsa,el3,enable-select-opt,flagm,fp-armv8,fp16fml,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mpam,neon,neoversev1,no-sve-fp-ld1r,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,rand,ras,rcpc,rcpc-immo,rdm,sel2,sha2,spe,ssbs,sve,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh") }, + { str_lit("neoverse-v2"), str_lit("CONTEXTIDREL2,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,neoversev2,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,rand,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,spe,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("saphira"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,am,ccidx,ccpp,complxnum,crc,crypto,dit,dotprod,el2vmsa,el3,flagm,fp-armv8,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,ras,rcpc,rcpc-immo,rdm,saphira,sel2,sha2,spe,store-pair-suppress,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh,zcz,zcz-gp") }, + { str_lit("thunderx"), str_lit("aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,thunderx,use-postra-scheduler,v8a") }, + { str_lit("thunderx2t99"), str_lit("CONTEXTIDREL2,aes,aggressive-fma,arith-bcc-fusion,crc,crypto,el2vmsa,el3,fp-armv8,lor,lse,neon,pan,predictable-select-expensive,rdm,sha2,store-pair-suppress,thunderx2t99,use-postra-scheduler,v8.1a,v8a,vh") }, + { str_lit("thunderx3t110"), str_lit("CONTEXTIDREL2,aes,aggressive-fma,arith-bcc-fusion,balance-fp-ops,ccidx,ccpp,complxnum,crc,crypto,el2vmsa,el3,fp-armv8,jsconv,lor,lse,neon,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,store-pair-suppress,strict-align,thunderx3t110,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8a,vh") }, + { str_lit("thunderxt81"), str_lit("aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,thunderxt81,use-postra-scheduler,v8a") }, + { str_lit("thunderxt83"), str_lit("aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,thunderxt83,use-postra-scheduler,v8a") }, + { str_lit("thunderxt88"), str_lit("aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,thunderxt88,use-postra-scheduler,v8a") }, + { str_lit("tsv110"), str_lit("CONTEXTIDREL2,aes,ccpp,complxnum,crc,crypto,dotprod,el2vmsa,el3,fp-armv8,fp16fml,fullfp16,fuse-aes,jsconv,lor,lse,neon,pan,pan-rwv,perfmon,ras,rdm,sha2,spe,store-pair-suppress,tsv110,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + // TargetArch_wasm32: + { str_lit("bleeding-edge"), str_lit("atomics,bulk-memory,mutable-globals,nontrapping-fptoint,sign-ext,simd128,tail-call") }, + { str_lit("generic"), str_lit("mutable-globals,sign-ext") }, + { str_lit("mvp"), str_lit("") }, + // TargetArch_wasm64p32: + { str_lit("bleeding-edge"), str_lit("atomics,bulk-memory,mutable-globals,nontrapping-fptoint,sign-ext,simd128,tail-call") }, + { str_lit("generic"), str_lit("mutable-globals,sign-ext") }, + { str_lit("mvp"), str_lit("") }, }; gb_global String target_endian_names[TargetEndian_COUNT] = { @@ -443,9 +894,9 @@ struct BuildContext { PtrMap defined_values; - BlockingMutex target_features_mutex; StringSet target_features_set; String target_features_string; + bool strict_target_features; String minimum_os_version_string; bool minimum_os_version_string_given; @@ -1596,48 +2047,53 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta #include "microsoft_craziness.h" #endif +// NOTE: the target feature and microarch lists are all sorted, so if it turns out to be slow (I don't think it will) +// a binary search is possible. -gb_internal Array split_by_comma(String const &list) { - isize n = 1; - for (isize i = 0; i < list.len; i++) { - if (list.text[i] == ',') { - n++; +gb_internal bool check_single_target_feature_is_valid(String const &feature_list, String const &feature) { + String_Iterator it = {feature_list, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (str == feature) { + return true; } } - auto res = array_make(heap_allocator(), n); - String s = list; - for (isize i = 0; i < n; i++) { - isize m = string_index_byte(s, ','); - if (m < 0) { - res[i] = s; - break; + return false; +} + +gb_internal bool check_target_feature_is_valid(String const &feature, TargetArchKind arch, String *invalid) { + String feature_list = target_features_list[arch]; + String_Iterator it = {feature, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (!check_single_target_feature_is_valid(feature_list, str)) { + if (invalid) *invalid = str; + return false; } - res[i] = substring(s, 0, m); - s = substring(s, m+1, s.len); } - return res; -} -gb_internal bool check_target_feature_is_valid(TokenPos pos, String const &feature) { - // TODO(bill): check_target_feature_is_valid return true; } -gb_internal bool check_target_feature_is_enabled(TokenPos pos, String const &target_feature_list) { - BuildContext *bc = &build_context; - mutex_lock(&bc->target_features_mutex); - defer (mutex_unlock(&bc->target_features_mutex)); - - auto items = split_by_comma(target_feature_list); - array_free(&items); - for (String const &item : items) { - if (!check_target_feature_is_valid(pos, item)) { - error(pos, "Target feature '%.*s' is not valid", LIT(item)); - return false; +gb_internal bool check_target_feature_is_valid_globally(String const &feature, String *invalid) { + String_Iterator it = {feature, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + + bool valid = false; + for (int arch = TargetArch_Invalid; arch < TargetArch_COUNT; arch += 1) { + if (check_target_feature_is_valid(str, cast(TargetArchKind)arch, invalid)) { + valid = true; + break; + } } - if (!string_set_exists(&bc->target_features_set, item)) { - error(pos, "Target feature '%.*s' is not enabled", LIT(item)); + + if (!valid) { + if (invalid) *invalid = str; return false; } } @@ -1645,54 +2101,35 @@ gb_internal bool check_target_feature_is_enabled(TokenPos pos, String const &tar return true; } -gb_internal void enable_target_feature(TokenPos pos, String const &target_feature_list) { - BuildContext *bc = &build_context; - mutex_lock(&bc->target_features_mutex); - defer (mutex_unlock(&bc->target_features_mutex)); - - auto items = split_by_comma(target_feature_list); - for (String const &item : items) { - if (!check_target_feature_is_valid(pos, item)) { - error(pos, "Target feature '%.*s' is not valid", LIT(item)); - continue; - } - - string_set_add(&bc->target_features_set, item); - } - array_free(&items); +gb_internal bool check_target_feature_is_valid_for_target_arch(String const &feature, String *invalid) { + return check_target_feature_is_valid(feature, build_context.metrics.arch, invalid); } - -gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bool with_quotes, bool with_plus) { - isize len = 0; - isize i = 0; - for (String const &feature : build_context.target_features_set) { - if (i != 0) { - len += 1; +gb_internal bool check_target_feature_is_enabled(String const &feature, String *not_enabled) { + String_Iterator it = {feature, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (!string_set_exists(&build_context.target_features_set, str)) { + if (not_enabled) *not_enabled = str; + return false; } - len += feature.len; - if (with_quotes) len += 2; - if (with_plus) len += 1; - i += 1; } - char *features = gb_alloc_array(allocator, char, len+1); - len = 0; - i = 0; - for (String const &feature : build_context.target_features_set) { - if (i != 0) { - features[len++] = ','; - } - if (with_quotes) features[len++] = '"'; - if (with_plus) features[len++] = '+'; - gb_memmove(features + len, feature.text, feature.len); - len += feature.len; - if (with_quotes) features[len++] = '"'; - i += 1; - } - features[len++] = 0; + return true; +} - return features; +gb_internal bool check_target_feature_is_superset_of(String const &superset, String const &of, String *missing) { + String_Iterator it = {of, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (!check_single_target_feature_is_valid(superset, str)) { + if (missing) *missing = str; + return false; + } + } + return true; } // NOTE(Jeroen): Set/create the output and other paths and report an error as appropriate. @@ -1983,10 +2420,6 @@ gb_internal bool init_build_paths(String init_filename) { } } - if (bc->target_features_string.len != 0) { - enable_target_feature({}, bc->target_features_string); - } - return true; } diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index c3c217ec7..825fc6448 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -1719,6 +1719,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As case BuiltinProc_objc_register_selector: case BuiltinProc_objc_register_class: case BuiltinProc_atomic_type_is_lock_free: + case BuiltinProc_has_target_feature: // NOTE(bill): The first arg may be a Type, this will be checked case by case break; @@ -3663,6 +3664,41 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As break; } + case BuiltinProc_has_target_feature: { + String features = str_lit(""); + + check_expr_or_type(c, operand, ce->args[0]); + + if (is_type_string(operand->type) && operand->mode == Addressing_Constant) { + GB_ASSERT(operand->value.kind == ExactValue_String); + features = operand->value.value_string; + } else { + Type *pt = base_type(operand->type); + if (pt->kind == Type_Proc) { + if (pt->Proc.require_target_feature.len != 0) { + GB_ASSERT(pt->Proc.enable_target_feature.len == 0); + features = pt->Proc.require_target_feature; + } else if (pt->Proc.enable_target_feature.len != 0) { + features = pt->Proc.enable_target_feature; + } else { + error(ce->args[0], "Expected the procedure type given to '%.*s' to have @(require_target_feature=\"...\") or @(enable_target_feature=\"...\")", LIT(builtin_name)); + } + } else { + error(ce->args[0], "Expected a constant string or procedure type for '%.*s'", LIT(builtin_name)); + } + } + + String invalid; + if (!check_target_feature_is_valid_globally(features, &invalid)) { + error(ce->args[0], "Target feature '%.*s' is not a valid target feature", LIT(invalid)); + } + + operand->value = exact_value_bool(check_target_feature_is_enabled(features, nullptr)); + operand->mode = Addressing_Constant; + operand->type = t_untyped_bool; + break; + } + case BuiltinProc_soa_struct: { Operand x = {}; Operand y = {}; @@ -6014,7 +6050,10 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As return false; } - enable_target_feature({}, str_lit("atomics")); + if (!check_target_feature_is_enabled(str_lit("atomics"), nullptr)) { + error(call, "'%.*s' requires target feature 'atomics' to be enabled, enable it with -target-features:\"atomics\" or choose a different -microarch", LIT(builtin_name)); + return false; + } Operand ptr = {}; Operand expected = {}; @@ -6068,7 +6107,10 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As return false; } - enable_target_feature({}, str_lit("atomics")); + if (!check_target_feature_is_enabled(str_lit("atomics"), nullptr)) { + error(call, "'%.*s' requires target feature 'atomics' to be enabled, enable it with -target-features:\"atomics\" or choose a different -microarch", LIT(builtin_name)); + return false; + } Operand ptr = {}; Operand waiters = {}; diff --git a/src/check_decl.cpp b/src/check_decl.cpp index 952a877a4..5b9486873 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -886,17 +886,37 @@ gb_internal void check_proc_decl(CheckerContext *ctx, Entity *e, DeclInfo *d) { check_objc_methods(ctx, e, ac); - if (ac.require_target_feature.len != 0 && ac.enable_target_feature.len != 0) { - error(e->token, "Attributes @(require_target_feature=...) and @(enable_target_feature=...) cannot be used together"); - } else if (ac.require_target_feature.len != 0) { - if (check_target_feature_is_enabled(e->token.pos, ac.require_target_feature)) { - e->Procedure.target_feature = ac.require_target_feature; - } else { - e->Procedure.target_feature_disabled = true; + { + if (ac.require_target_feature.len != 0 && ac.enable_target_feature.len != 0) { + error(e->token, "A procedure cannot have both @(require_target_feature=\"...\") and @(enable_target_feature=\"...\")"); + } + + if (build_context.strict_target_features && ac.enable_target_feature.len != 0) { + ac.require_target_feature = ac.enable_target_feature; + ac.enable_target_feature.len = 0; + } + + if (ac.require_target_feature.len != 0) { + pt->require_target_feature = ac.require_target_feature; + String invalid; + if (!check_target_feature_is_valid_globally(ac.require_target_feature, &invalid)) { + error(e->token, "Required target feature '%.*s' is not a valid target feature", LIT(invalid)); + } else if (!check_target_feature_is_enabled(ac.require_target_feature, nullptr)) { + e->flags |= EntityFlag_Disabled; + } + } else if (ac.enable_target_feature.len != 0) { + + // NOTE: disallow wasm, features on that arch are always global to the module. + if (is_arch_wasm()) { + error(e->token, "@(enable_target_feature=\"...\") is not allowed on wasm, features for wasm must be declared globally"); + } + + pt->enable_target_feature = ac.enable_target_feature; + String invalid; + if (!check_target_feature_is_valid_globally(ac.enable_target_feature, &invalid)) { + error(e->token, "Procedure enabled target feature '%.*s' is not a valid target feature", LIT(invalid)); + } } - } else if (ac.enable_target_feature.len != 0) { - enable_target_feature(e->token.pos, ac.enable_target_feature); - e->Procedure.target_feature = ac.enable_target_feature; } switch (e->Procedure.optimization_mode) { @@ -1370,6 +1390,10 @@ gb_internal void check_proc_group_decl(CheckerContext *ctx, Entity *pg_entity, D continue; } + if (p->flags & EntityFlag_Disabled) { + continue; + } + String name = p->token.string; for (isize k = j+1; k < pge->entities.count; k++) { @@ -1387,6 +1411,10 @@ gb_internal void check_proc_group_decl(CheckerContext *ctx, Entity *pg_entity, D ERROR_BLOCK(); + if (q->flags & EntityFlag_Disabled) { + continue; + } + ProcTypeOverloadKind kind = are_proc_types_overload_safe(p->type, q->type); bool both_have_where_clauses = false; if (p->decl_info->proc_lit != nullptr && q->decl_info->proc_lit != nullptr) { @@ -1423,6 +1451,7 @@ gb_internal void check_proc_group_decl(CheckerContext *ctx, Entity *pg_entity, D break; case ProcOverload_ParamCount: case ProcOverload_ParamTypes: + case ProcOverload_TargetFeatures: // This is okay :) break; diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 06d0a8b12..490c9aae7 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -6526,12 +6526,17 @@ gb_internal CallArgumentData check_call_arguments_proc_group(CheckerContext *c, array_add(&proc_entities, proc); } + int max_matched_features = 0; gbString expr_name = expr_to_string(operand->expr); defer (gb_string_free(expr_name)); for_array(i, procs) { Entity *p = procs[i]; + if (p->flags & EntityFlag_Disabled) { + continue; + } + Type *pt = base_type(p->type); if (pt != nullptr && is_type_proc(pt)) { CallArgumentData data = {}; @@ -6562,11 +6567,24 @@ gb_internal CallArgumentData check_call_arguments_proc_group(CheckerContext *c, item.score += assign_score_function(1); } + max_matched_features = gb_max(max_matched_features, matched_target_features(&pt->Proc)); + item.index = index; array_add(&valids, item); } } + if (max_matched_features > 0) { + for_array(i, valids) { + Entity *p = procs[valids[i].index]; + Type *t = base_type(p->type); + GB_ASSERT(t->kind == Type_Proc); + + int matched = matched_target_features(&t->Proc); + valids[i].score += assign_score_function(max_matched_features-matched); + } + } + if (valids.count > 1) { array_sort(valids, valid_index_and_score_cmp); i64 best_score = valids[0].score; @@ -6708,7 +6726,11 @@ gb_internal CallArgumentData check_call_arguments_proc_group(CheckerContext *c, ERROR_BLOCK(); error(operand->expr, "Ambiguous procedure group call '%s' that match with the given arguments", expr_name); - print_argument_types(); + if (positional_operands.count == 0 && named_operands.count == 0) { + error_line("\tNo given arguments\n"); + } else { + print_argument_types(); + } for (auto const &valid : valids) { Entity *proc = proc_entities[valid.index]; @@ -7553,8 +7575,11 @@ gb_internal ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *c } } + bool is_call_inlined = false; + switch (inlining) { case ProcInlining_inline: + is_call_inlined = true; if (proc != nullptr) { Entity *e = entity_from_expr(proc); if (e != nullptr && e->kind == Entity_Procedure) { @@ -7570,6 +7595,47 @@ gb_internal ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *c break; case ProcInlining_no_inline: break; + case ProcInlining_none: + if (proc != nullptr) { + Entity *e = entity_from_expr(proc); + if (e != nullptr && e->kind == Entity_Procedure) { + DeclInfo *decl = e->decl_info; + if (decl->proc_lit) { + ast_node(pl, ProcLit, decl->proc_lit); + if (pl->inlining == ProcInlining_inline) { + is_call_inlined = true; + } + } + } + } + } + + { + String invalid; + if (pt->kind == Type_Proc && pt->Proc.require_target_feature.len != 0) { + if (!check_target_feature_is_valid_for_target_arch(pt->Proc.require_target_feature, &invalid)) { + error(call, "Called procedure requires target feature '%.*s' which is invalid for the build target", LIT(invalid)); + } else if (!check_target_feature_is_enabled(pt->Proc.require_target_feature, &invalid)) { + error(call, "Calling this procedure requires target feature '%.*s' to be enabled", LIT(invalid)); + } + } + + if (pt->kind == Type_Proc && pt->Proc.enable_target_feature.len != 0) { + if (!check_target_feature_is_valid_for_target_arch(pt->Proc.enable_target_feature, &invalid)) { + error(call, "Called procedure enables target feature '%.*s' which is invalid for the build target", LIT(invalid)); + } + + // NOTE: Due to restrictions in LLVM you can not inline calls with a superset of features. + if (is_call_inlined) { + GB_ASSERT(c->curr_proc_decl); + GB_ASSERT(c->curr_proc_decl->entity); + GB_ASSERT(c->curr_proc_decl->entity->type->kind == Type_Proc); + String scope_features = c->curr_proc_decl->entity->type->Proc.enable_target_feature; + if (!check_target_feature_is_superset_of(scope_features, pt->Proc.enable_target_feature, &invalid)) { + error(call, "Inlined procedure enables target feature '%.*s', this requires the calling procedure to at least enable the same feature", LIT(invalid)); + } + } + } } operand->expr = call; diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index c15ec7137..8419c6568 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -44,6 +44,8 @@ enum BuiltinProcId { // "Intrinsics" BuiltinProc_is_package_imported, + BuiltinProc_has_target_feature, + BuiltinProc_transpose, BuiltinProc_outer_product, BuiltinProc_hadamard_product, @@ -354,6 +356,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { // "Intrinsics" {STR_LIT("is_package_imported"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("has_target_feature"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("transpose"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("outer_product"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("hadamard_product"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, diff --git a/src/entity.cpp b/src/entity.cpp index a12e1d0a6..d76d5f441 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -252,10 +252,8 @@ struct Entity { bool is_foreign : 1; bool is_export : 1; bool generated_from_polymorphic : 1; - bool target_feature_disabled : 1; bool entry_point_only : 1; bool has_instrumentation : 1; - String target_feature; } Procedure; struct { Array entities; @@ -502,4 +500,4 @@ gb_internal bool is_entity_local_variable(Entity *e) { return ((e->scope->flags &~ ScopeFlag_ContextDefined) == 0) || (e->scope->flags & ScopeFlag_Proc) != 0; -} \ No newline at end of file +} diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 4b94cf020..fad130b99 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -41,6 +41,37 @@ String get_default_microarchitecture() { return default_march; } +String get_final_microarchitecture() { + BuildContext *bc = &build_context; + + String microarch = bc->microarch; + if (microarch.len == 0) { + microarch = get_default_microarchitecture(); + } else if (microarch == str_lit("native")) { + microarch = make_string_c(LLVMGetHostCPUName()); + } + return microarch; +} + +gb_internal String get_default_features() { + BuildContext *bc = &build_context; + + int off = 0; + for (int i = 0; i < bc->metrics.arch; i += 1) { + off += target_microarch_counts[i]; + } + + String microarch = get_final_microarchitecture(); + for (int i = off; i < off+target_microarch_counts[bc->metrics.arch]; i += 1) { + if (microarch_features_list[i].microarch == microarch) { + return microarch_features_list[i].features; + } + } + + GB_PANIC("unknown microarch"); + return {}; +} + gb_internal void lb_add_foreign_library_path(lbModule *m, Entity *e) { if (e == nullptr) { return; @@ -2468,69 +2499,24 @@ gb_internal bool lb_generate_code(lbGenerator *gen) { code_mode = LLVMCodeModelKernel; } - String host_cpu_name = copy_string(permanent_allocator(), make_string_c(LLVMGetHostCPUName())); - String llvm_cpu = get_default_microarchitecture(); - char const *llvm_features = ""; - if (build_context.microarch.len != 0) { - if (build_context.microarch == "native") { - llvm_cpu = host_cpu_name; - } else { - llvm_cpu = copy_string(permanent_allocator(), build_context.microarch); - } - if (llvm_cpu == host_cpu_name) { - llvm_features = LLVMGetHostCPUFeatures(); + String llvm_cpu = get_final_microarchitecture(); + + gbString llvm_features = gb_string_make(temporary_allocator(), ""); + String_Iterator it = {build_context.target_features_string, 0}; + bool first = true; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (!first) { + llvm_features = gb_string_appendc(llvm_features, ","); } - } + first = false; - // NOTE(Jeroen): Uncomment to get the list of supported microarchitectures. - /* - if (build_context.microarch == "?") { - string_set_add(&build_context.target_features_set, str_lit("+cpuhelp")); + llvm_features = gb_string_appendc(llvm_features, "+"); + llvm_features = gb_string_append_length(llvm_features, str.text, str.len); } - */ - if (build_context.target_features_set.entries.count != 0) { - // Prefix all of the features with a `+`, because we are - // enabling additional features. - char const *additional_features = target_features_set_to_cstring(permanent_allocator(), false, true); - - String f_string = make_string_c(llvm_features); - String a_string = make_string_c(additional_features); - isize f_len = f_string.len; - - if (f_len == 0) { - // The common case is that llvm_features is empty, so - // the target_features_set additions can be used as is. - llvm_features = additional_features; - } else { - // The user probably specified `-microarch:native`, so - // llvm_features is populated by LLVM's idea of what - // the host CPU supports. - // - // As far as I can tell, (which is barely better than - // wild guessing), a bitset is formed by parsing the - // string left to right. - // - // So, llvm_features + ',' + additonal_features, will - // makes the target_features_set override llvm_features. - - char *tmp = gb_alloc_array(permanent_allocator(), char, f_len + 1 + a_string.len + 1); - isize len = 0; - - // tmp = f_string - gb_memmove(tmp, f_string.text, f_string.len); - len += f_string.len; - // tmp += ',' - tmp[len++] = ','; - // tmp += a_string - gb_memmove(tmp + len, a_string.text, a_string.len); - len += a_string.len; - // tmp += NUL - tmp[len++] = 0; - - llvm_features = tmp; - } - } + debugf("CPU: %.*s, Features: %s\n", LIT(llvm_cpu), llvm_features); // GB_ASSERT_MSG(LLVMTargetHasAsmBackend(target)); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index f73698d34..898c9ac31 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -177,17 +177,24 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i break; } - if (!entity->Procedure.target_feature_disabled && - entity->Procedure.target_feature.len != 0) { - auto features = split_by_comma(entity->Procedure.target_feature); - for_array(i, features) { - String feature = features[i]; - LLVMAttributeRef ref = LLVMCreateStringAttribute( - m->ctx, - cast(char const *)feature.text, cast(unsigned)feature.len, - "", 0); - LLVMAddAttributeAtIndex(p->value, LLVMAttributeIndex_FunctionIndex, ref); + if (pt->Proc.enable_target_feature.len != 0) { + gbString feature_str = gb_string_make(temporary_allocator(), ""); + + String_Iterator it = {pt->Proc.enable_target_feature, 0}; + bool first = true; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (!first) { + feature_str = gb_string_appendc(feature_str, ","); + } + first = false; + + feature_str = gb_string_appendc(feature_str, "+"); + feature_str = gb_string_append_length(feature_str, str.text, str.len); } + + lb_add_attribute_to_proc_with_string(m, p->value, make_string_c("target-features"), make_string_c(feature_str)); } if (entity->flags & EntityFlag_Cold) { diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index c01ab0692..db99ebc99 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1708,7 +1708,8 @@ gb_internal lbValue lb_emit_mul_add(lbProcedure *p, lbValue a, lbValue b, lbValu if (is_possible) { switch (build_context.metrics.arch) { case TargetArch_amd64: - if (type_size_of(t) == 2) { + // NOTE: using the intrinsic when not supported causes slow codegen (See #2928). + if (type_size_of(t) == 2 || !check_target_feature_is_enabled(str_lit("fma"), nullptr)) { is_possible = false; } break; diff --git a/src/main.cpp b/src/main.cpp index ee7de7f81..93685acb9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -272,6 +272,7 @@ enum BuildFlagKind { BuildFlag_ExtraAssemblerFlags, BuildFlag_Microarch, BuildFlag_TargetFeatures, + BuildFlag_StrictTargetFeatures, BuildFlag_MinimumOSVersion, BuildFlag_NoThreadLocal, @@ -467,6 +468,7 @@ gb_internal bool parse_build_flags(Array args) { add_flag(&build_flags, BuildFlag_ExtraAssemblerFlags, str_lit("extra-assembler-flags"), BuildFlagParam_String, Command__does_build); add_flag(&build_flags, BuildFlag_Microarch, str_lit("microarch"), BuildFlagParam_String, Command__does_build); add_flag(&build_flags, BuildFlag_TargetFeatures, str_lit("target-features"), BuildFlagParam_String, Command__does_build); + add_flag(&build_flags, BuildFlag_StrictTargetFeatures, str_lit("strict-target-features"), BuildFlagParam_None, Command__does_build); add_flag(&build_flags, BuildFlag_MinimumOSVersion, str_lit("minimum-os-version"), BuildFlagParam_String, Command__does_build); add_flag(&build_flags, BuildFlag_RelocMode, str_lit("reloc-mode"), BuildFlagParam_String, Command__does_build); @@ -1083,6 +1085,9 @@ gb_internal bool parse_build_flags(Array args) { string_to_lower(&build_context.target_features_string); break; } + case BuildFlag_StrictTargetFeatures: + build_context.strict_target_features = true; + break; case BuildFlag_MinimumOSVersion: { GB_ASSERT(value.kind == ExactValue_String); build_context.minimum_os_version_string = value.value_string; @@ -1981,7 +1986,20 @@ gb_internal void print_show_help(String const arg0, String const &command) { print_usage_line(2, "Examples:"); print_usage_line(3, "-microarch:sandybridge"); print_usage_line(3, "-microarch:native"); - print_usage_line(3, "-microarch:? for a list"); + print_usage_line(3, "-microarch:\"?\" for a list"); + print_usage_line(0, ""); + + print_usage_line(1, "-target-features:"); + print_usage_line(2, "Specifies CPU features to enable on top of the enabled features implied by -microarch."); + print_usage_line(2, "Examples:"); + print_usage_line(3, "-target-features:atomics"); + print_usage_line(3, "-target-features:\"sse2,aes\""); + print_usage_line(3, "-target-features:\"?\" for a list"); + print_usage_line(0, ""); + + print_usage_line(1, "-strict-target-features"); + print_usage_line(2, "Makes @(enable_target_features=\"...\") behave the same way as @(require_target_features=\"...\")."); + print_usage_line(2, "This enforces that all generated code uses features supported by the combination of -target, -microarch, and -target-features."); print_usage_line(0, ""); print_usage_line(1, "-reloc-mode:"); @@ -2663,7 +2681,7 @@ int main(int arg_count, char const **arg_ptr) { // Check chosen microarchitecture. If not found or ?, print list. bool print_microarch_list = true; - if (build_context.microarch.len == 0) { + if (build_context.microarch.len == 0 || build_context.microarch == str_lit("native")) { // Autodetect, no need to print list. print_microarch_list = false; } else { @@ -2680,6 +2698,11 @@ int main(int arg_count, char const **arg_ptr) { } } + // Set and check build paths... + if (!init_build_paths(init_filename)) { + return 1; + } + String default_march = get_default_microarchitecture(); if (print_microarch_list) { if (build_context.microarch != "?") { @@ -2703,13 +2726,57 @@ int main(int arg_count, char const **arg_ptr) { return 0; } - // Set and check build paths... - if (!init_build_paths(init_filename)) { - return 1; + String march = get_final_microarchitecture(); + String default_features = get_default_features(); + { + String_Iterator it = {default_features, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + string_set_add(&build_context.target_features_set, str); + } + } + + if (build_context.target_features_string.len != 0) { + String_Iterator target_it = {build_context.target_features_string, 0}; + for (;;) { + String item = string_split_iterator(&target_it, ','); + if (item == "") break; + + String invalid; + if (!check_target_feature_is_valid_for_target_arch(item, &invalid) && item != str_lit("help")) { + if (item != str_lit("?")) { + gb_printf_err("Unkown target feature '%.*s'.\n", LIT(invalid)); + } + gb_printf("Possible -target-features for target %.*s are:\n", LIT(target_arch_names[build_context.metrics.arch])); + gb_printf("\n"); + + String feature_list = target_features_list[build_context.metrics.arch]; + String_Iterator it = {feature_list, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (check_single_target_feature_is_valid(default_features, str)) { + if (has_ansi_terminal_colours()) { + gb_printf("\t%.*s\x1b[38;5;244m (implied by target microarch %.*s)\x1b[0m\n", LIT(str), LIT(march)); + } else { + gb_printf("\t%.*s (implied by current microarch %.*s)\n", LIT(str), LIT(march)); + } + } else { + gb_printf("\t%.*s\n", LIT(str)); + } + } + + return 1; + } + + string_set_add(&build_context.target_features_set, item); + } } if (build_context.show_debug_messages) { - debugf("Selected microarch: %.*s\n", LIT(default_march)); + debugf("Selected microarch: %.*s\n", LIT(march)); + debugf("Default microarch features: %.*s\n", LIT(default_features)); for_array(i, build_context.build_paths) { String build_path = path_to_string(heap_allocator(), build_context.build_paths[i]); debugf("build_paths[%ld]: %.*s\n", i, LIT(build_path)); diff --git a/src/types.cpp b/src/types.cpp index 18cb12ea1..3ec05059f 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -184,6 +184,8 @@ struct TypeProc { isize specialization_count; ProcCallingConvention calling_convention; i32 variadic_index; + String require_target_feature; + String enable_target_feature; // TODO(bill): Make this a flag set rather than bools bool variadic; bool require_results; @@ -2991,7 +2993,22 @@ gb_internal Type *union_tag_type(Type *u) { return t_uint; } +gb_internal int matched_target_features(TypeProc *t) { + if (t->require_target_feature.len == 0) { + return 0; + } + int matches = 0; + String_Iterator it = {t->require_target_feature, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (check_target_feature_is_valid_for_target_arch(str, nullptr)) { + matches += 1; + } + } + return matches; +} enum ProcTypeOverloadKind { ProcOverload_Identical, // The types are identical @@ -3003,6 +3020,7 @@ enum ProcTypeOverloadKind { ProcOverload_ResultCount, ProcOverload_ResultTypes, ProcOverload_Polymorphic, + ProcOverload_TargetFeatures, ProcOverload_NotProcedure, @@ -3060,6 +3078,10 @@ gb_internal ProcTypeOverloadKind are_proc_types_overload_safe(Type *x, Type *y) } } + if (matched_target_features(&px) != matched_target_features(&py)) { + return ProcOverload_TargetFeatures; + } + if (px.params != nullptr && py.params != nullptr) { Entity *ex = px.params->Tuple.variables[0]; Entity *ey = py.params->Tuple.variables[0]; -- cgit v1.2.3 From de5ce90fa70c8ae4b859b50704dd0ffacbeedb1e Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 7 May 2024 15:28:09 +0100 Subject: Add metadata to packed structs field accesses to state it is packed --- src/llvm_backend.hpp | 6 +++++- src/llvm_backend_general.cpp | 6 ++++++ src/llvm_backend_utility.cpp | 33 ++++++++++++++++++++++++++++++++- 3 files changed, 43 insertions(+), 2 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 7dc5f6b63..975aa17c5 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -581,6 +581,10 @@ gb_internal LLVMTypeRef llvm_array_type(LLVMTypeRef ElementType, uint64_t Elemen #endif } + +gb_internal void lb_set_metadata_custom_u64(lbModule *m, LLVMValueRef v_ref, String name, u64 value); +gb_internal u64 lb_get_metadata_custom_u64(lbModule *m, LLVMValueRef v_ref, String name); + #define LB_STARTUP_RUNTIME_PROC_NAME "__$startup_runtime" #define LB_CLEANUP_RUNTIME_PROC_NAME "__$cleanup_runtime" #define LB_TYPE_INFO_DATA_NAME "__$type_info_data" @@ -719,4 +723,4 @@ gb_global char const *llvm_linkage_strings[] = { "linker private weak linkage" }; -#define ODIN_METADATA_REQUIRE "odin-metadata-require", 21 +#define ODIN_METADATA_IS_PACKED str_lit("odin-is-packed") \ No newline at end of file diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 15cbb7c71..22ffbd89c 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1107,6 +1107,12 @@ gb_internal lbValue lb_emit_load(lbProcedure *p, lbValue value) { GB_ASSERT(is_type_pointer(value.type)); Type *t = type_deref(value.type); LLVMValueRef v = LLVMBuildLoad2(p->builder, lb_type(p->module, t), value.value, ""); + + u64 is_packed = lb_get_metadata_custom_u64(p->module, v, ODIN_METADATA_IS_PACKED); + if (is_packed != 0) { + LLVMSetAlignment(v, 1); + } + return lbValue{v, t}; } diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index db99ebc99..8b046c784 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -57,6 +57,29 @@ gb_internal lbValue lb_correct_endianness(lbProcedure *p, lbValue value) { return value; } + +gb_internal void lb_set_metadata_custom_u64(lbModule *m, LLVMValueRef v_ref, String name, u64 value) { + unsigned md_id = LLVMGetMDKindIDInContext(m->ctx, cast(char const *)name.text, cast(unsigned)name.len); + LLVMMetadataRef md = LLVMValueAsMetadata(LLVMConstInt(lb_type(m, t_u64), value, false)); + LLVMValueRef node = LLVMMetadataAsValue(m->ctx, LLVMMDNodeInContext2(m->ctx, &md, 1)); + LLVMSetMetadata(v_ref, md_id, node); +} +gb_internal u64 lb_get_metadata_custom_u64(lbModule *m, LLVMValueRef v_ref, String name) { + unsigned md_id = LLVMGetMDKindIDInContext(m->ctx, cast(char const *)name.text, cast(unsigned)name.len); + LLVMValueRef v_md = LLVMGetMetadata(v_ref, md_id); + if (v_md == nullptr) { + return 0; + } + unsigned node_count = LLVMGetMDNodeNumOperands(v_md); + if (node_count == 0) { + return 0; + } + GB_ASSERT(node_count == 1); + LLVMValueRef value = nullptr; + LLVMGetMDNodeOperands(v_md, &value); + return LLVMConstIntGetZExtValue(value); +} + gb_internal LLVMValueRef lb_mem_zero_ptr_internal(lbProcedure *p, LLVMValueRef ptr, usize len, unsigned alignment, bool is_volatile) { return lb_mem_zero_ptr_internal(p, ptr, LLVMConstInt(lb_type(p->module, t_uint), len, false), alignment, is_volatile); } @@ -1148,7 +1171,15 @@ gb_internal lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) { GB_ASSERT_MSG(result_type != nullptr, "%s %d", type_to_string(t), index); - return lb_emit_struct_ep_internal(p, s, index, result_type); + lbValue gep = lb_emit_struct_ep_internal(p, s, index, result_type); + + Type *bt = base_type(t); + if (bt->kind == Type_Struct && bt->Struct.is_packed) { + lb_set_metadata_custom_u64(p->module, gep.value, ODIN_METADATA_IS_PACKED, 1); + GB_ASSERT(lb_get_metadata_custom_u64(p->module, gep.value, ODIN_METADATA_IS_PACKED) == 1); + } + + return gep; } gb_internal lbValue lb_emit_tuple_ev(lbProcedure *p, lbValue value, i32 index) { -- cgit v1.2.3 From c9b1c99a4057b7e3f6caee3fb37cf85ca29a7fc9 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 16 May 2024 16:27:09 +0100 Subject: Fix `soa_zip` and `soa_unzip` --- src/check_builtin.cpp | 4 ++-- src/llvm_backend_utility.cpp | 14 +++++++++++--- src/types.cpp | 10 ++++++++++ 3 files changed, 23 insertions(+), 5 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index c7d27cf38..4636d810a 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -3428,8 +3428,8 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As auto types = slice_make(permanent_allocator(), t->Struct.fields.count-1); for_array(i, types) { Entity *f = t->Struct.fields[i]; - GB_ASSERT(f->type->kind == Type_Pointer); - types[i] = alloc_type_slice(f->type->Pointer.elem); + GB_ASSERT(f->type->kind == Type_MultiPointer); + types[i] = alloc_type_slice(f->type->MultiPointer.elem); } operand->type = alloc_type_tuple_from_field_types(types.data, types.count, false, false); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 8b046c784..5ebe0ddd9 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -328,6 +328,7 @@ gb_internal lbValue lb_soa_zip(lbProcedure *p, AstCallExpr *ce, TypeAndValue con lbAddr res = lb_add_local_generated(p, tv.type, true); for_array(i, slices) { lbValue src = lb_slice_elem(p, slices[i]); + src = lb_emit_conv(p, src, alloc_type_pointer_to_multi_pointer(src.type)); lbValue dst = lb_emit_struct_ep(p, res.addr, cast(i32)i); lb_emit_store(p, dst, src); } @@ -1559,19 +1560,26 @@ gb_internal lbValue lb_emit_matrix_ev(lbProcedure *p, lbValue s, isize row, isiz return lb_emit_load(p, ptr); } - gb_internal void lb_fill_slice(lbProcedure *p, lbAddr const &slice, lbValue base_elem, lbValue len) { Type *t = lb_addr_type(slice); GB_ASSERT(is_type_slice(t)); lbValue ptr = lb_addr_get_ptr(p, slice); - lb_emit_store(p, lb_emit_struct_ep(p, ptr, 0), base_elem); + lbValue data = lb_emit_struct_ep(p, ptr, 0); + if (are_types_identical(type_deref(base_elem.type, true), type_deref(type_deref(data.type), true))) { + base_elem = lb_emit_conv(p, base_elem, type_deref(data.type)); + } + lb_emit_store(p, data, base_elem); lb_emit_store(p, lb_emit_struct_ep(p, ptr, 1), len); } gb_internal void lb_fill_string(lbProcedure *p, lbAddr const &string, lbValue base_elem, lbValue len) { Type *t = lb_addr_type(string); GB_ASSERT(is_type_string(t)); lbValue ptr = lb_addr_get_ptr(p, string); - lb_emit_store(p, lb_emit_struct_ep(p, ptr, 0), base_elem); + lbValue data = lb_emit_struct_ep(p, ptr, 0); + if (are_types_identical(type_deref(base_elem.type, true), type_deref(type_deref(data.type), true))) { + base_elem = lb_emit_conv(p, base_elem, type_deref(data.type)); + } + lb_emit_store(p, data, base_elem); lb_emit_store(p, lb_emit_struct_ep(p, ptr, 1), len); } diff --git a/src/types.cpp b/src/types.cpp index 47ed86f7a..390ee842a 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -988,6 +988,16 @@ gb_internal Type *alloc_type_soa_pointer(Type *elem) { return t; } +gb_internal Type *alloc_type_pointer_to_multi_pointer(Type *ptr) { + Type *original_type = ptr; + ptr = base_type(ptr); + if (ptr->kind == Type_Pointer) { + return alloc_type_multi_pointer(ptr->Pointer.elem); + } else if (ptr->kind != Type_MultiPointer) { + GB_PANIC("Invalid type: %s", type_to_string(original_type)); + } + return original_type; +} gb_internal Type *alloc_type_array(Type *elem, i64 count, Type *generic_count = nullptr) { if (generic_count != nullptr) { -- cgit v1.2.3 From f6ef3950572274da65e11c131af28f4f539a2303 Mon Sep 17 00:00:00 2001 From: Kamil T Date: Mon, 20 May 2024 13:57:08 +0100 Subject: Fix Odin to LLVM memory order mapping for .Relaxed and .Consume --- src/llvm_backend_utility.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 5ebe0ddd9..f7674a8bc 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -2223,8 +2223,8 @@ gb_internal LLVMAtomicOrdering llvm_atomic_ordering_from_odin(ExactValue const & GB_ASSERT(value.kind == ExactValue_Integer); i64 v = exact_value_to_i64(value); switch (v) { - case OdinAtomicMemoryOrder_relaxed: return LLVMAtomicOrderingUnordered; - case OdinAtomicMemoryOrder_consume: return LLVMAtomicOrderingMonotonic; + case OdinAtomicMemoryOrder_relaxed: return LLVMAtomicOrderingMonotonic; + case OdinAtomicMemoryOrder_consume: return LLVMAtomicOrderingAcquire; case OdinAtomicMemoryOrder_acquire: return LLVMAtomicOrderingAcquire; case OdinAtomicMemoryOrder_release: return LLVMAtomicOrderingRelease; case OdinAtomicMemoryOrder_acq_rel: return LLVMAtomicOrderingAcquireRelease; -- cgit v1.2.3 From ae63fd923039b6d3ab9a8abe0407827bf5fcd86d Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 30 May 2024 23:08:42 +0100 Subject: Fix #3649 --- src/llvm_backend_utility.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index f7674a8bc..94153e233 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1365,6 +1365,8 @@ gb_internal lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection } else { e = lb_emit_ptr_offset(p, lb_emit_load(p, arr), index); } + e.type = alloc_type_multi_pointer_to_pointer(e.type); + } else if (is_type_quaternion(type)) { e = lb_emit_struct_ep(p, e, index); } else if (is_type_raw_union(type)) { -- cgit v1.2.3 From b79d7e69177ccbc25b75f1aa7a90a62f64c6cee1 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Fri, 28 Jun 2024 01:16:26 +0200 Subject: disallow non-global foreign import of variables on wasm --- src/build_settings.cpp | 4 +++- src/check_decl.cpp | 8 ++++---- src/llvm_backend.cpp | 2 -- src/llvm_backend_proc.cpp | 2 +- src/llvm_backend_utility.cpp | 2 +- src/tilde.cpp | 1 - 6 files changed, 9 insertions(+), 10 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 0495cf3b5..1988e9cac 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -2053,10 +2053,12 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta gbString link_flags = gb_string_make(heap_allocator(), " "); // link_flags = gb_string_appendc(link_flags, "--export-all "); // link_flags = gb_string_appendc(link_flags, "--export-table "); - link_flags = gb_string_appendc(link_flags, "--allow-undefined "); // if (bc->metrics.arch == TargetArch_wasm64) { // link_flags = gb_string_appendc(link_flags, "-mwasm64 "); // } + if (bc->metrics.os != TargetOs_orca) { + link_flags = gb_string_appendc(link_flags, "--allow-undefined "); + } if (bc->no_entry_point || bc->metrics.os == TargetOs_orca) { link_flags = gb_string_appendc(link_flags, "--no-entry "); } diff --git a/src/check_decl.cpp b/src/check_decl.cpp index b5307838c..883cfcba9 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -709,7 +709,7 @@ gb_internal Entity *init_entity_foreign_library(CheckerContext *ctx, Entity *e) } if (ident == nullptr) { - error(e->token, "foreign entiies must declare which library they are from"); + error(e->token, "foreign entities must declare which library they are from"); } else if (ident->kind != Ast_Ident) { error(ident, "foreign library names must be an identifier"); } else { @@ -1320,9 +1320,9 @@ gb_internal void check_global_variable_decl(CheckerContext *ctx, Entity *&e, Ast error(e->token, "A foreign variable declaration cannot have a default value"); } init_entity_foreign_library(ctx, e); - // if (is_arch_wasm()) { - // error(e->token, "A foreign variable declaration are not allowed for the '%.*s' architecture", LIT(target_arch_names[build_context.metrics.arch])); - // } + if (is_arch_wasm() && e->Variable.foreign_library != nullptr) { + error(e->token, "A foreign variable declaration can not be scoped to a module and must be declared in a 'foreign {' (without a library) block"); + } } if (ac.link_name.len > 0) { e->Variable.link_name = ac.link_name; diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index d119633ec..92c8138c5 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -3217,8 +3217,6 @@ gb_internal bool lb_generate_code(lbGenerator *gen) { LLVMSetDLLStorageClass(g.value, LLVMDLLImportStorageClass); LLVMSetExternallyInitialized(g.value, true); lb_add_foreign_library_path(m, e->Variable.foreign_library); - - // lb_set_wasm_import_attributes(g.value, e, name); } else { LLVMSetInitializer(g.value, LLVMConstNull(lb_type(m, e->type))); } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 958b4fd38..68ba4f74e 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -233,7 +233,7 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i if (p->is_foreign) { - lb_set_wasm_import_attributes(p->value, entity, p->name); + lb_set_wasm_procedure_import_attributes(p->value, entity, p->name); } diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 94153e233..98ed0c57e 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -2018,7 +2018,7 @@ gb_internal LLVMValueRef llvm_get_inline_asm(LLVMTypeRef func_type, String const } -gb_internal void lb_set_wasm_import_attributes(LLVMValueRef value, Entity *entity, String import_name) { +gb_internal void lb_set_wasm_procedure_import_attributes(LLVMValueRef value, Entity *entity, String import_name) { if (!is_arch_wasm()) { return; } diff --git a/src/tilde.cpp b/src/tilde.cpp index 4fc7d1c9b..f6fed0f9a 100644 --- a/src/tilde.cpp +++ b/src/tilde.cpp @@ -363,7 +363,6 @@ gb_internal bool cg_global_variables_create(cgModule *m, Array if (is_foreign) { linkage = TB_LINKAGE_PUBLIC; // lb_add_foreign_library_path(m, e->Variable.foreign_library); - // lb_set_wasm_import_attributes(g.value, e, name); } else if (is_export) { linkage = TB_LINKAGE_PUBLIC; } -- cgit v1.2.3 From 6f1cc8071c3ff49c5431cc8ad078d12883f91545 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Tue, 2 Jul 2024 15:28:08 +0200 Subject: wasm: add foreign import and linking of wasm object files --- src/build_settings.cpp | 9 --------- src/check_decl.cpp | 7 +++++-- src/checker.cpp | 3 +-- src/linker.cpp | 14 ++++++++++++++ src/llvm_backend_utility.cpp | 6 +++++- src/parser.cpp | 3 +-- 6 files changed, 26 insertions(+), 16 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/build_settings.cpp b/src/build_settings.cpp index c3b4f2506..9c93a5b69 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -860,15 +860,6 @@ gb_internal bool is_arch_x86(void) { return false; } -gb_internal bool allow_check_foreign_filepath(void) { - switch (build_context.metrics.arch) { - case TargetArch_wasm32: - case TargetArch_wasm64p32: - return false; - } - return true; -} - // TODO(bill): OS dependent versions for the BuildContext // join_path // is_dir diff --git a/src/check_decl.cpp b/src/check_decl.cpp index 883cfcba9..3c4a4b3de 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -1178,9 +1178,12 @@ gb_internal void check_proc_decl(CheckerContext *ctx, Entity *e, DeclInfo *d) { if (foreign_library->LibraryName.paths.count >= 1) { module_name = foreign_library->LibraryName.paths[0]; } - name = concatenate3_strings(permanent_allocator(), module_name, WASM_MODULE_NAME_SEPARATOR, name); + + if (!string_ends_with(module_name, str_lit(".o"))) { + name = concatenate3_strings(permanent_allocator(), module_name, WASM_MODULE_NAME_SEPARATOR, name); + } } - + e->Procedure.is_foreign = true; e->Procedure.link_name = name; diff --git a/src/checker.cpp b/src/checker.cpp index 734659510..c3d2ae5eb 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -5000,9 +5000,8 @@ gb_internal void check_foreign_import_fullpaths(Checker *c) { String file_str = op.value.value_string; file_str = string_trim_whitespace(file_str); - String fullpath = file_str; - if (allow_check_foreign_filepath()) { + if (!is_arch_wasm() || string_ends_with(file_str, str_lit(".o"))) { String foreign_path = {}; bool ok = determine_path_from_string(nullptr, decl, base_dir, file_str, &foreign_path, /*use error not syntax_error*/true); if (ok) { diff --git a/src/linker.cpp b/src/linker.cpp index 371736743..34c0af7e5 100644 --- a/src/linker.cpp +++ b/src/linker.cpp @@ -85,6 +85,20 @@ gb_internal i32 linker_stage(LinkerData *gen) { if (extra_linker_flags.len != 0) { lib_str = gb_string_append_fmt(lib_str, " %.*s", LIT(extra_linker_flags)); } + + for_array(i, e->LibraryName.paths) { + String lib = e->LibraryName.paths[i]; + + if (lib.len == 0) { + continue; + } + + if (!string_ends_with(lib, str_lit(".o"))) { + continue; + } + + inputs = gb_string_append_fmt(inputs, " \"%.*s\"", LIT(lib)); + } } if (build_context.metrics.os == TargetOs_orca) { diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 98ed0c57e..1165476be 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -2029,7 +2029,11 @@ gb_internal void lb_set_wasm_procedure_import_attributes(LLVMValueRef value, Ent GB_ASSERT(foreign_library->LibraryName.paths.count == 1); module_name = foreign_library->LibraryName.paths[0]; - + + if (string_ends_with(module_name, str_lit(".o"))) { + return; + } + if (string_starts_with(import_name, module_name)) { import_name = substring(import_name, module_name.len+WASM_MODULE_NAME_SEPARATOR.len, import_name.len); } diff --git a/src/parser.cpp b/src/parser.cpp index 583f4a57d..997d32fa5 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -5813,7 +5813,6 @@ gb_internal bool determine_path_from_string(BlockingMutex *file_mutex, Ast *node return false; } - if (collection_name.len > 0) { // NOTE(bill): `base:runtime` == `core:runtime` if (collection_name == "core") { @@ -5968,7 +5967,7 @@ gb_internal void parse_setup_file_decls(Parser *p, AstFile *f, String const &bas Token fp_token = fp->BasicLit.token; String file_str = string_trim_whitespace(string_value_from_token(f, fp_token)); String fullpath = file_str; - if (allow_check_foreign_filepath()) { + if (!is_arch_wasm() || string_ends_with(fullpath, str_lit(".o"))) { String foreign_path = {}; bool ok = determine_path_from_string(&p->file_decl_mutex, node, base_dir, file_str, &foreign_path); if (!ok) { -- cgit v1.2.3 From 8e52a525804244a7b45858ec4e8971ee55d34056 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 18 Aug 2024 18:37:40 +0100 Subject: Cache the paddding filler type --- src/llvm_backend.hpp | 9 +++++++++ src/llvm_backend_general.cpp | 3 +++ src/llvm_backend_utility.cpp | 27 ++++++++++++++++++++++++--- 3 files changed, 36 insertions(+), 3 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 02daecf6b..29d2ccfe6 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -137,6 +137,12 @@ enum lbFunctionPassManagerKind { lbFunctionPassManager_COUNT }; +struct lbPadType { + i64 padding; + i64 padding_align; + LLVMTypeRef type; +}; + struct lbModule { LLVMModuleRef mod; LLVMContextRef ctx; @@ -199,6 +205,9 @@ struct lbModule { PtrMap exact_value_compound_literal_addr_map; // Key: Ast_CompoundLit LLVMPassManagerRef function_pass_managers[lbFunctionPassManager_COUNT]; + + BlockingMutex pad_types_mutex; + Array pad_types; }; struct lbEntityCorrection { diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 5e9234cec..b5338297e 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -91,6 +91,9 @@ gb_internal void lb_init_module(lbModule *m, Checker *c) { map_init(&m->map_cell_info_map, 0); map_init(&m->exact_value_compound_literal_addr_map, 1024); + array_init(&m->pad_types, heap_allocator()); + + m->const_dummy_builder = LLVMCreateBuilderInContext(m->ctx); } diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 1165476be..68c1e9d1e 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1003,6 +1003,21 @@ gb_internal i32 lb_convert_struct_index(lbModule *m, Type *t, i32 index) { } gb_internal LLVMTypeRef lb_type_padding_filler(lbModule *m, i64 padding, i64 padding_align) { + MUTEX_GUARD(&m->pad_types_mutex); + if (padding % padding_align == 0) { + for (auto pd : m->pad_types) { + if (pd.padding == padding && pd.padding_align == padding_align) { + return pd.type; + } + } + } else { + for (auto pd : m->pad_types) { + if (pd.padding == padding && pd.padding_align == 1) { + return pd.type; + } + } + } + // NOTE(bill): limit to `[N x u64]` to prevent ABI issues padding_align = gb_clamp(padding_align, 1, 8); if (padding % padding_align == 0) { @@ -1016,13 +1031,19 @@ gb_internal LLVMTypeRef lb_type_padding_filler(lbModule *m, i64 padding, i64 pad } GB_ASSERT_MSG(elem != nullptr, "Invalid lb_type_padding_filler padding and padding_align: %lld", padding_align); + + LLVMTypeRef type = nullptr; if (len != 1) { - return llvm_array_type(elem, len); + type = llvm_array_type(elem, len); } else { - return elem; + type = elem; } + array_add(&m->pad_types, lbPadType{padding, padding_align, type}); + return type; } else { - return llvm_array_type(lb_type(m, t_u8), padding); + LLVMTypeRef type = llvm_array_type(lb_type(m, t_u8), padding); + array_add(&m->pad_types, lbPadType{padding, 1, type}); + return type; } } -- cgit v1.2.3 From c424c940303b497f694ed2246adb1a7b97ff6d9b Mon Sep 17 00:00:00 2001 From: Feoramund <161657516+Feoramund@users.noreply.github.com> Date: Mon, 26 Aug 2024 04:48:31 -0400 Subject: Fix inline transmutation of `[16]i8` to `i128` Forbids LLVM from generating SSE aligned loads on unaligned data. --- src/llvm_backend_utility.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 68c1e9d1e..f63c42ab9 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -263,7 +263,7 @@ gb_internal lbValue lb_emit_transmute(lbProcedure *p, lbValue value, Type *t) { if (is_type_simd_vector(src) && is_type_simd_vector(dst)) { res.value = LLVMBuildBitCast(p->builder, value.value, lb_type(p->module, t), ""); return res; - } else if (is_type_array_like(src) && is_type_simd_vector(dst)) { + } else if (is_type_array_like(src) && (is_type_simd_vector(dst) || is_type_integer_128bit(dst))) { unsigned align = cast(unsigned)gb_max(type_align_of(src), type_align_of(dst)); lbValue ptr = lb_address_from_load_or_generate_local(p, value); if (lb_try_update_alignment(ptr, align)) { -- cgit v1.2.3 From da1e09c95d885e8385c7d88b856f130cee41d9c1 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Fri, 25 Oct 2024 14:26:02 +0200 Subject: check packed load and set alignment on all loads, not just lb_emit_load --- src/llvm_backend_expr.cpp | 13 ++++++------ src/llvm_backend_general.cpp | 49 +++++++++++++++++++++++++++++++------------- src/llvm_backend_proc.cpp | 5 ++--- src/llvm_backend_stmt.cpp | 2 +- src/llvm_backend_utility.cpp | 2 +- 5 files changed, 45 insertions(+), 26 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 8967a4e03..de7cadaf3 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -130,7 +130,7 @@ gb_internal lbValue lb_emit_unary_arith(lbProcedure *p, TokenKind op, lbValue x, LLVMTypeRef vector_type = nullptr; if (op != Token_Not && lb_try_vector_cast(p->module, val, &vector_type)) { LLVMValueRef vp = LLVMBuildPointerCast(p->builder, val.value, LLVMPointerType(vector_type, 0), ""); - LLVMValueRef v = LLVMBuildLoad2(p->builder, vector_type, vp, ""); + LLVMValueRef v = OdinLLVMBuildLoad(p, vector_type, vp); LLVMValueRef opv = nullptr; switch (op) { @@ -324,8 +324,8 @@ gb_internal bool lb_try_direct_vector_arith(lbProcedure *p, TokenKind op, lbValu LLVMValueRef lhs_vp = LLVMBuildPointerCast(p->builder, lhs_ptr.value, LLVMPointerType(vector_type, 0), ""); LLVMValueRef rhs_vp = LLVMBuildPointerCast(p->builder, rhs_ptr.value, LLVMPointerType(vector_type, 0), ""); - LLVMValueRef x = LLVMBuildLoad2(p->builder, vector_type, lhs_vp, ""); - LLVMValueRef y = LLVMBuildLoad2(p->builder, vector_type, rhs_vp, ""); + LLVMValueRef x = OdinLLVMBuildLoad(p, vector_type, lhs_vp); + LLVMValueRef y = OdinLLVMBuildLoad(p, vector_type, rhs_vp); LLVMValueRef z = nullptr; if (is_type_float(integral_type)) { @@ -551,15 +551,14 @@ gb_internal LLVMValueRef lb_matrix_to_vector(lbProcedure *p, lbValue matrix) { Type *mt = base_type(matrix.type); GB_ASSERT(mt->kind == Type_Matrix); LLVMTypeRef elem_type = lb_type(p->module, mt->Matrix.elem); - + unsigned total_count = cast(unsigned)matrix_type_total_internal_elems(mt); LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count); - + #if 1 LLVMValueRef ptr = lb_address_from_load_or_generate_local(p, matrix).value; LLVMValueRef matrix_vector_ptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(total_matrix_type, 0), ""); - LLVMValueRef matrix_vector = LLVMBuildLoad2(p->builder, total_matrix_type, matrix_vector_ptr, ""); - LLVMSetAlignment(matrix_vector, cast(unsigned)type_align_of(mt)); + LLVMValueRef matrix_vector = OdinLLVMBuildLoadAligned(p, total_matrix_type, matrix_vector_ptr, type_align_of(mt)); return matrix_vector; #else LLVMValueRef matrix_vector = LLVMBuildBitCast(p->builder, matrix.value, total_matrix_type, ""); diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 43c9f738a..686724f6a 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -774,6 +774,36 @@ gb_internal bool lb_try_vector_cast(lbModule *m, lbValue ptr, LLVMTypeRef *vecto return false; } +gb_internal LLVMValueRef OdinLLVMBuildLoad(lbProcedure *p, LLVMTypeRef type, LLVMValueRef value) { + LLVMValueRef result = LLVMBuildLoad2(p->builder, type, value, ""); + + // If it is not an instruction it isn't a GEP, so we don't need to track alignment in the metadata, + // which is not possible anyway (only LLVM instructions can have metadata). + if (LLVMIsAInstruction(value)) { + u64 is_packed = lb_get_metadata_custom_u64(p->module, value, ODIN_METADATA_IS_PACKED); + if (is_packed != 0) { + LLVMSetAlignment(result, 1); + } + } + + return result; +} + +gb_internal LLVMValueRef OdinLLVMBuildLoadAligned(lbProcedure *p, LLVMTypeRef type, LLVMValueRef value, i64 alignment) { + LLVMValueRef result = LLVMBuildLoad2(p->builder, type, value, ""); + + LLVMSetAlignment(result, cast(unsigned)alignment); + + if (LLVMIsAInstruction(value)) { + u64 is_packed = lb_get_metadata_custom_u64(p->module, value, ODIN_METADATA_IS_PACKED); + if (is_packed != 0) { + LLVMSetAlignment(result, 1); + } + } + + return result; +} + gb_internal void lb_addr_store(lbProcedure *p, lbAddr addr, lbValue value) { if (addr.addr.value == nullptr) { return; @@ -1119,7 +1149,7 @@ gb_internal lbValue lb_emit_load(lbProcedure *p, lbValue value) { Type *vt = base_type(value.type); GB_ASSERT(vt->kind == Type_MultiPointer); Type *t = vt->MultiPointer.elem; - LLVMValueRef v = LLVMBuildLoad2(p->builder, lb_type(p->module, t), value.value, ""); + LLVMValueRef v = OdinLLVMBuildLoad(p, lb_type(p->module, t), value.value); return lbValue{v, t}; } else if (is_type_soa_pointer(value.type)) { lbValue ptr = lb_emit_struct_ev(p, value, 0); @@ -1130,16 +1160,7 @@ gb_internal lbValue lb_emit_load(lbProcedure *p, lbValue value) { GB_ASSERT_MSG(is_type_pointer(value.type), "%s", type_to_string(value.type)); Type *t = type_deref(value.type); - LLVMValueRef v = LLVMBuildLoad2(p->builder, lb_type(p->module, t), value.value, ""); - - // If it is not an instruction it isn't a GEP, so we don't need to track alignment in the metadata, - // which is not possible anyway (only LLVM instructions can have metadata). - if (LLVMIsAInstruction(value.value)) { - u64 is_packed = lb_get_metadata_custom_u64(p->module, value.value, ODIN_METADATA_IS_PACKED); - if (is_packed != 0) { - LLVMSetAlignment(v, 1); - } - } + LLVMValueRef v = OdinLLVMBuildLoad(p, lb_type(p->module, t), value.value); return lbValue{v, t}; } @@ -1413,7 +1434,7 @@ gb_internal lbValue lb_addr_load(lbProcedure *p, lbAddr const &addr) { LLVMTypeRef vector_type = nullptr; if (lb_try_vector_cast(p->module, addr.addr, &vector_type)) { LLVMValueRef vp = LLVMBuildPointerCast(p->builder, addr.addr.value, LLVMPointerType(vector_type, 0), ""); - LLVMValueRef v = LLVMBuildLoad2(p->builder, vector_type, vp, ""); + LLVMValueRef v = OdinLLVMBuildLoad(p, vector_type, vp); LLVMValueRef scalars[4] = {}; for (u8 i = 0; i < addr.swizzle.count; i++) { scalars[i] = LLVMConstInt(lb_type(p->module, t_u32), addr.swizzle.indices[i], false); @@ -2710,7 +2731,7 @@ general_end:; if (LLVMIsALoadInst(val) && (src_size >= dst_size && src_align >= dst_align)) { LLVMValueRef val_ptr = LLVMGetOperand(val, 0); val_ptr = LLVMBuildPointerCast(p->builder, val_ptr, LLVMPointerType(dst_type, 0), ""); - LLVMValueRef loaded_val = LLVMBuildLoad2(p->builder, dst_type, val_ptr, ""); + LLVMValueRef loaded_val = OdinLLVMBuildLoad(p, dst_type, val_ptr); // LLVMSetAlignment(loaded_val, gb_min(src_align, dst_align)); @@ -2726,7 +2747,7 @@ general_end:; LLVMValueRef nptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(src_type, 0), ""); LLVMBuildStore(p->builder, val, nptr); - return LLVMBuildLoad2(p->builder, dst_type, ptr, ""); + return OdinLLVMBuildLoad(p, dst_type, ptr); } } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index d84599eb0..5aee5b639 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2568,7 +2568,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu case BuiltinProc_atomic_load_explicit: { lbValue dst = lb_build_expr(p, ce->args[0]); - LLVMValueRef instr = LLVMBuildLoad2(p->builder, lb_type(p->module, type_deref(dst.type)), dst.value, ""); + LLVMValueRef instr = OdinLLVMBuildLoad(p, lb_type(p->module, type_deref(dst.type)), dst.value); switch (id) { case BuiltinProc_non_temporal_load: { @@ -2621,8 +2621,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu if (is_type_simd_vector(t)) { lbValue res = {}; res.type = t; - res.value = LLVMBuildLoad2(p->builder, lb_type(p->module, t), src.value, ""); - LLVMSetAlignment(res.value, 1); + res.value = OdinLLVMBuildLoadAligned(p, lb_type(p->module, t), src.value, 1); return res; } else { lbAddr dst = lb_add_local_generated(p, t, false); diff --git a/src/llvm_backend_stmt.cpp b/src/llvm_backend_stmt.cpp index df3d4bc03..288e7206a 100644 --- a/src/llvm_backend_stmt.cpp +++ b/src/llvm_backend_stmt.cpp @@ -2001,7 +2001,7 @@ gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res) { LLVMValueRef ptr = p->temp_callee_return_struct_memory; LLVMValueRef nptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(src_type, 0), ""); LLVMBuildStore(p->builder, ret_val, nptr); - ret_val = LLVMBuildLoad2(p->builder, ret_type, ptr, ""); + ret_val = OdinLLVMBuildLoad(p, ret_type, ptr); } else { ret_val = OdinLLVMBuildTransmute(p, ret_val, ret_type); } diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index f63c42ab9..6367d0118 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -269,7 +269,7 @@ gb_internal lbValue lb_emit_transmute(lbProcedure *p, lbValue value, Type *t) { if (lb_try_update_alignment(ptr, align)) { LLVMTypeRef result_type = lb_type(p->module, t); res.value = LLVMBuildPointerCast(p->builder, ptr.value, LLVMPointerType(result_type, 0), ""); - res.value = LLVMBuildLoad2(p->builder, result_type, res.value, ""); + res.value = OdinLLVMBuildLoad(p, result_type, res.value); return res; } lbAddr addr = lb_add_local_generated(p, t, false); -- cgit v1.2.3 From 89a5decc33e532eca4ae3739ae89db508a700a8d Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 13 Nov 2024 18:32:50 +0000 Subject: Keep ASAN happy on type assertions --- src/llvm_backend_utility.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 6367d0118..e6049da84 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -124,8 +124,16 @@ gb_internal void lb_mem_zero_ptr(lbProcedure *p, LLVMValueRef ptr, Type *type, u switch (kind) { case LLVMStructTypeKind: case LLVMArrayTypeKind: - // NOTE(bill): Enforce zeroing through memset to make sure padding is zeroed too - lb_mem_zero_ptr_internal(p, ptr, lb_const_int(p->module, t_int, sz).value, alignment, false); + if (is_type_tuple(type)) { + // NOTE(bill): even though this should be safe, to keep ASAN happy, do not zero the implicit padding at the end + GB_ASSERT(type->kind == Type_Tuple); + i64 n = type->Tuple.variables.count-1; + i64 end_offset = type->Tuple.offsets[n] + type_size_of(type->Tuple.variables[n]->type); + lb_mem_zero_ptr_internal(p, ptr, lb_const_int(p->module, t_int, end_offset).value, alignment, false); + } else { + // NOTE(bill): Enforce zeroing through memset to make sure padding is zeroed too + lb_mem_zero_ptr_internal(p, ptr, lb_const_int(p->module, t_int, sz).value, alignment, false); + } break; default: LLVMBuildStore(p->builder, LLVMConstNull(lb_type(p->module, type)), ptr); -- cgit v1.2.3 From e38a08013e1afa1bbd7b8f90ee16cfbdfacfb13f Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 14 Nov 2024 16:17:24 +0000 Subject: Remove `#relative` types from the compiler --- base/runtime/core.odin | 12 ---- base/runtime/print.odin | 12 ---- core/fmt/fmt.odin | 12 ---- core/reflect/reflect.odin | 81 +++++++++---------------- core/reflect/types.odin | 32 ---------- examples/demo/demo.odin | 17 ------ src/check_expr.cpp | 64 -------------------- src/check_type.cpp | 37 +----------- src/checker.cpp | 24 -------- src/docs_format.cpp | 3 +- src/docs_writer.cpp | 18 ------ src/llvm_backend.hpp | 4 -- src/llvm_backend_debug.cpp | 11 ---- src/llvm_backend_expr.cpp | 37 +----------- src/llvm_backend_general.cpp | 139 ------------------------------------------- src/llvm_backend_type.cpp | 26 -------- src/llvm_backend_utility.cpp | 6 -- src/types.cpp | 98 ------------------------------ 18 files changed, 31 insertions(+), 602 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/base/runtime/core.odin b/base/runtime/core.odin index a5a3a4d8c..e47f3ecbc 100644 --- a/base/runtime/core.odin +++ b/base/runtime/core.odin @@ -171,14 +171,6 @@ Type_Info_Simd_Vector :: struct { elem_size: int, count: int, } -Type_Info_Relative_Pointer :: struct { - pointer: ^Type_Info, // ^T - base_integer: ^Type_Info, -} -Type_Info_Relative_Multi_Pointer :: struct { - pointer: ^Type_Info, // [^]T - base_integer: ^Type_Info, -} Type_Info_Matrix :: struct { elem: ^Type_Info, elem_size: int, @@ -241,8 +233,6 @@ Type_Info :: struct { Type_Info_Map, Type_Info_Bit_Set, Type_Info_Simd_Vector, - Type_Info_Relative_Pointer, - Type_Info_Relative_Multi_Pointer, Type_Info_Matrix, Type_Info_Soa_Pointer, Type_Info_Bit_Field, @@ -275,8 +265,6 @@ Typeid_Kind :: enum u8 { Map, Bit_Set, Simd_Vector, - Relative_Pointer, - Relative_Multi_Pointer, Matrix, Soa_Pointer, Bit_Field, diff --git a/base/runtime/print.odin b/base/runtime/print.odin index 45f6f01ef..c28fd593d 100644 --- a/base/runtime/print.odin +++ b/base/runtime/print.odin @@ -486,18 +486,6 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) { print_u64(u64(info.count)) print_byte(']') print_type(info.elem) - - case Type_Info_Relative_Pointer: - print_string("#relative(") - print_type(info.base_integer) - print_string(") ") - print_type(info.pointer) - - case Type_Info_Relative_Multi_Pointer: - print_string("#relative(") - print_type(info.base_integer) - print_string(") ") - print_type(info.pointer) case Type_Info_Matrix: print_string("matrix[") diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index dd5c2c6a2..49e9f2e6d 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -3002,18 +3002,6 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) { case runtime.Type_Info_Bit_Set: fmt_bit_set(fi, v, verb = verb) - case runtime.Type_Info_Relative_Pointer: - ptr := reflect.relative_pointer_to_absolute_raw(v.data, info.base_integer.id) - absolute_ptr := any{ptr, info.pointer.id} - - fmt_value(fi, absolute_ptr, verb) - - case runtime.Type_Info_Relative_Multi_Pointer: - ptr := reflect.relative_pointer_to_absolute_raw(v.data, info.base_integer.id) - absolute_ptr := any{ptr, info.pointer.id} - - fmt_value(fi, absolute_ptr, verb) - case runtime.Type_Info_Matrix: fmt_matrix(fi, v, verb, info) diff --git a/core/reflect/reflect.odin b/core/reflect/reflect.odin index d547b67ed..7f79acb77 100644 --- a/core/reflect/reflect.odin +++ b/core/reflect/reflect.odin @@ -31,8 +31,6 @@ Type_Info_Enum :: runtime.Type_Info_Enum Type_Info_Map :: runtime.Type_Info_Map Type_Info_Bit_Set :: runtime.Type_Info_Bit_Set Type_Info_Simd_Vector :: runtime.Type_Info_Simd_Vector -Type_Info_Relative_Pointer :: runtime.Type_Info_Relative_Pointer -Type_Info_Relative_Multi_Pointer :: runtime.Type_Info_Relative_Multi_Pointer Type_Info_Matrix :: runtime.Type_Info_Matrix Type_Info_Soa_Pointer :: runtime.Type_Info_Soa_Pointer Type_Info_Bit_Field :: runtime.Type_Info_Bit_Field @@ -67,8 +65,6 @@ Type_Kind :: enum { Map, Bit_Set, Simd_Vector, - Relative_Pointer, - Relative_Multi_Pointer, Matrix, Soa_Pointer, Bit_Field, @@ -80,35 +76,33 @@ type_kind :: proc(T: typeid) -> Type_Kind { ti := type_info_of(T) if ti != nil { switch _ in ti.variant { - case Type_Info_Named: return .Named - case Type_Info_Integer: return .Integer - case Type_Info_Rune: return .Rune - case Type_Info_Float: return .Float - case Type_Info_Complex: return .Complex - case Type_Info_Quaternion: return .Quaternion - case Type_Info_String: return .String - case Type_Info_Boolean: return .Boolean - case Type_Info_Any: return .Any - case Type_Info_Type_Id: return .Type_Id - case Type_Info_Pointer: return .Pointer - case Type_Info_Multi_Pointer: return .Multi_Pointer - case Type_Info_Procedure: return .Procedure - case Type_Info_Array: return .Array - case Type_Info_Enumerated_Array: return .Enumerated_Array - case Type_Info_Dynamic_Array: return .Dynamic_Array - case Type_Info_Slice: return .Slice - case Type_Info_Parameters: return .Tuple - case Type_Info_Struct: return .Struct - case Type_Info_Union: return .Union - case Type_Info_Enum: return .Enum - case Type_Info_Map: return .Map - case Type_Info_Bit_Set: return .Bit_Set - case Type_Info_Simd_Vector: return .Simd_Vector - case Type_Info_Relative_Pointer: return .Relative_Pointer - case Type_Info_Relative_Multi_Pointer: return .Relative_Multi_Pointer - case Type_Info_Matrix: return .Matrix - case Type_Info_Soa_Pointer: return .Soa_Pointer - case Type_Info_Bit_Field: return .Bit_Field + case Type_Info_Named: return .Named + case Type_Info_Integer: return .Integer + case Type_Info_Rune: return .Rune + case Type_Info_Float: return .Float + case Type_Info_Complex: return .Complex + case Type_Info_Quaternion: return .Quaternion + case Type_Info_String: return .String + case Type_Info_Boolean: return .Boolean + case Type_Info_Any: return .Any + case Type_Info_Type_Id: return .Type_Id + case Type_Info_Pointer: return .Pointer + case Type_Info_Multi_Pointer: return .Multi_Pointer + case Type_Info_Procedure: return .Procedure + case Type_Info_Array: return .Array + case Type_Info_Enumerated_Array: return .Enumerated_Array + case Type_Info_Dynamic_Array: return .Dynamic_Array + case Type_Info_Slice: return .Slice + case Type_Info_Parameters: return .Tuple + case Type_Info_Struct: return .Struct + case Type_Info_Union: return .Union + case Type_Info_Enum: return .Enum + case Type_Info_Map: return .Map + case Type_Info_Bit_Set: return .Bit_Set + case Type_Info_Simd_Vector: return .Simd_Vector + case Type_Info_Matrix: return .Matrix + case Type_Info_Soa_Pointer: return .Soa_Pointer + case Type_Info_Bit_Field: return .Bit_Field } } @@ -1488,21 +1482,6 @@ as_string :: proc(a: any) -> (value: string, valid: bool) { return } -@(require_results) -relative_pointer_to_absolute :: proc(a: any) -> rawptr { - if a == nil { return nil } - a := a - ti := runtime.type_info_core(type_info_of(a.id)) - a.id = ti.id - - #partial switch info in ti.variant { - case Type_Info_Relative_Pointer: - return relative_pointer_to_absolute_raw(a.data, info.base_integer.id) - } - return nil -} - - @(require_results) relative_pointer_to_absolute_raw :: proc(data: rawptr, base_integer_id: typeid) -> rawptr { _handle :: proc(ptr: ^$T) -> rawptr where intrinsics.type_is_integer(T) { @@ -1564,10 +1543,6 @@ as_pointer :: proc(a: any) -> (value: rawptr, valid: bool) { case cstring: value = rawptr(v) case: valid = false } - - case Type_Info_Relative_Pointer: - valid = true - value = relative_pointer_to_absolute_raw(a.data, info.base_integer.id) } return @@ -1677,8 +1652,6 @@ equal :: proc(a, b: any, including_indirect_array_recursion := false, recursion_ Type_Info_Bit_Set, Type_Info_Enum, Type_Info_Simd_Vector, - Type_Info_Relative_Pointer, - Type_Info_Relative_Multi_Pointer, Type_Info_Soa_Pointer, Type_Info_Matrix: return runtime.memory_compare(a.data, b.data, t.size) == 0 diff --git a/core/reflect/types.odin b/core/reflect/types.odin index 4f0674dc8..cb31a27e2 100644 --- a/core/reflect/types.odin +++ b/core/reflect/types.odin @@ -158,14 +158,6 @@ are_types_identical :: proc(a, b: ^Type_Info) -> bool { case Type_Info_Simd_Vector: y := b.variant.(Type_Info_Simd_Vector) or_return return x.count == y.count && x.elem == y.elem - - case Type_Info_Relative_Pointer: - y := b.variant.(Type_Info_Relative_Pointer) or_return - return x.base_integer == y.base_integer && x.pointer == y.pointer - - case Type_Info_Relative_Multi_Pointer: - y := b.variant.(Type_Info_Relative_Multi_Pointer) or_return - return x.base_integer == y.base_integer && x.pointer == y.pointer case Type_Info_Matrix: y := b.variant.(Type_Info_Matrix) or_return @@ -392,18 +384,6 @@ is_simd_vector :: proc(info: ^Type_Info) -> bool { _, ok := type_info_base(info).variant.(Type_Info_Simd_Vector) return ok } -@(require_results) -is_relative_pointer :: proc(info: ^Type_Info) -> bool { - if info == nil { return false } - _, ok := type_info_base(info).variant.(Type_Info_Relative_Pointer) - return ok -} -@(require_results) -is_relative_multi_pointer :: proc(info: ^Type_Info) -> bool { - if info == nil { return false } - _, ok := type_info_base(info).variant.(Type_Info_Relative_Multi_Pointer) - return ok -} @(require_results) @@ -736,18 +716,6 @@ write_type_writer :: #force_no_inline proc(w: io.Writer, ti: ^Type_Info, n_writt io.write_i64(w, i64(info.count), 10, &n) or_return io.write_byte(w, ']', &n) or_return write_type(w, info.elem, &n) or_return - - case Type_Info_Relative_Pointer: - io.write_string(w, "#relative(", &n) or_return - write_type(w, info.base_integer, &n) or_return - io.write_string(w, ") ", &n) or_return - write_type(w, info.pointer, &n) or_return - - case Type_Info_Relative_Multi_Pointer: - io.write_string(w, "#relative(", &n) or_return - write_type(w, info.base_integer, &n) or_return - io.write_string(w, ") ", &n) or_return - write_type(w, info.pointer, &n) or_return case Type_Info_Matrix: if info.layout == .Row_Major { diff --git a/examples/demo/demo.odin b/examples/demo/demo.odin index b234ba4f3..36d1359ca 100644 --- a/examples/demo/demo.odin +++ b/examples/demo/demo.odin @@ -2052,22 +2052,6 @@ explicit_context_definition :: proc "c" () { dummy_procedure() } -relative_data_types :: proc() { - fmt.println("\n#relative data types") - - x: int = 123 - ptr: #relative(i16) ^int - ptr = &x - fmt.println(ptr^) - - arr := [3]int{1, 2, 3} - multi_ptr: #relative(i16) [^]int - multi_ptr = &arr[0] - fmt.println(multi_ptr) - fmt.println(multi_ptr[:3]) - fmt.println(multi_ptr[1]) -} - or_else_operator :: proc() { fmt.println("\n#'or_else'") { @@ -2634,7 +2618,6 @@ main :: proc() { constant_literal_expressions() union_maybe() explicit_context_definition() - relative_data_types() or_else_operator() or_return_operator() or_break_and_or_continue_operators() diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 05f491690..cb4647f33 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -897,20 +897,6 @@ gb_internal i64 check_distance_between_types(CheckerContext *c, Operand *operand } } - if (is_type_relative_pointer(dst)) { - i64 score = check_distance_between_types(c, operand, dst->RelativePointer.pointer_type, allow_array_programming); - if (score >= 0) { - return score+2; - } - } - - if (is_type_relative_multi_pointer(dst)) { - i64 score = check_distance_between_types(c, operand, dst->RelativeMultiPointer.pointer_type, allow_array_programming); - if (score >= 0) { - return score+2; - } - } - if (is_type_proc(dst)) { if (are_types_identical(src, dst)) { return 3; @@ -1052,12 +1038,6 @@ gb_internal AstPackage *get_package_of_type(Type *type) { case Type_DynamicArray: type = type->DynamicArray.elem; continue; - case Type_RelativePointer: - type = type->RelativePointer.pointer_type; - continue; - case Type_RelativeMultiPointer: - type = type->RelativeMultiPointer.pointer_type; - continue; } return nullptr; } @@ -8230,17 +8210,6 @@ gb_internal bool check_set_index_data(Operand *o, Type *t, bool indirection, i64 } return true; - case Type_RelativeMultiPointer: - { - Type *pointer_type = base_type(t->RelativeMultiPointer.pointer_type); - GB_ASSERT(pointer_type->kind == Type_MultiPointer); - o->type = pointer_type->MultiPointer.elem; - if (o->mode != Addressing_Constant) { - o->mode = Addressing_Variable; - } - } - return true; - case Type_DynamicArray: o->type = t->DynamicArray.elem; if (o->mode != Addressing_Constant) { @@ -10623,8 +10592,6 @@ gb_internal ExprKind check_index_expr(CheckerContext *c, Operand *o, Ast *node, // Okay } else if (is_type_string(t)) { // Okay - } else if (is_type_relative_multi_pointer(t)) { - // Okay } else if (is_type_matrix(t)) { // Okay } else { @@ -10779,11 +10746,6 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node, } break; - case Type_RelativeMultiPointer: - valid = true; - o->type = type_deref(o->type); - break; - case Type_EnumeratedArray: { gbString str = expr_to_string(o->expr); @@ -10860,16 +10822,6 @@ gb_internal ExprKind check_slice_expr(CheckerContext *c, Operand *o, Ast *node, x[i:n] -> []T */ o->type = alloc_type_slice(t->MultiPointer.elem); - } else if (t->kind == Type_RelativeMultiPointer && se->high != nullptr) { - /* - x[:] -> [^]T - x[i:] -> [^]T - x[:n] -> []T - x[i:n] -> []T - */ - Type *pointer_type = base_type(t->RelativeMultiPointer.pointer_type); - GB_ASSERT(pointer_type->kind == Type_MultiPointer); - o->type = alloc_type_slice(pointer_type->MultiPointer.elem); } @@ -11230,22 +11182,6 @@ gb_internal ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast } else if (t->kind == Type_SoaPointer) { o->mode = Addressing_SoaVariable; o->type = type_deref(t); - } else if (t->kind == Type_RelativePointer) { - if (o->mode != Addressing_Variable) { - gbString str = expr_to_string(o->expr); - gbString typ = type_to_string(o->type); - error(o->expr, "Cannot dereference relative pointer '%s' of type '%s' as it does not have a variable addressing mode", str, typ); - gb_string_free(typ); - gb_string_free(str); - } - - // NOTE(bill): This is required because when dereferencing, the original type has been lost - add_type_info_type(c, o->type); - - Type *ptr_type = base_type(t->RelativePointer.pointer_type); - GB_ASSERT(ptr_type->kind == Type_Pointer); - o->mode = Addressing_Variable; - o->type = ptr_type->Pointer.elem; } else { gbString str = expr_to_string(o->expr); gbString typ = type_to_string(o->type); diff --git a/src/check_type.cpp b/src/check_type.cpp index bbeff9ca7..84e7fb249 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -3517,41 +3517,8 @@ gb_internal bool check_type_internal(CheckerContext *ctx, Ast *e, Type **type, T case_end; case_ast_node(rt, RelativeType, e); - GB_ASSERT(rt->tag->kind == Ast_CallExpr); - ast_node(ce, CallExpr, rt->tag); - - Type *base_integer = nullptr; - - if (ce->args.count != 1) { - error(rt->type, "#relative expected 1 type argument, got %td", ce->args.count); - } else { - base_integer = check_type(ctx, ce->args[0]); - if (!is_type_integer(base_integer)) { - error(rt->type, "#relative base types must be an integer"); - base_integer = nullptr; - } else if (type_size_of(base_integer) > 64) { - error(rt->type, "#relative base integer types be less than or equal to 64-bits"); - base_integer = nullptr; - } - } - - Type *relative_type = nullptr; - Type *base_type = check_type(ctx, rt->type); - if (!is_type_pointer(base_type) && !is_type_multi_pointer(base_type)) { - error(rt->type, "#relative types can only be a pointer or multi-pointer"); - relative_type = base_type; - } else if (base_integer == nullptr) { - relative_type = base_type; - } else { - if (is_type_pointer(base_type)) { - relative_type = alloc_type_relative_pointer(base_type, base_integer); - } else if (is_type_multi_pointer(base_type)) { - relative_type = alloc_type_relative_multi_pointer(base_type, base_integer); - } - } - GB_ASSERT(relative_type != nullptr); - - *type = relative_type; + error(e, "#relative types have been removed from the compiler. Prefer \"core:relative\"."); + *type = t_invalid; set_base_type(named_type, *type); return true; case_end; diff --git a/src/checker.cpp b/src/checker.cpp index 76f996648..b7cf343f8 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -2186,16 +2186,6 @@ gb_internal void add_type_info_type_internal(CheckerContext *c, Type *t) { add_type_info_type_internal(c, bt->SimdVector.elem); break; - case Type_RelativePointer: - add_type_info_type_internal(c, bt->RelativePointer.pointer_type); - add_type_info_type_internal(c, bt->RelativePointer.base_integer); - break; - - case Type_RelativeMultiPointer: - add_type_info_type_internal(c, bt->RelativeMultiPointer.pointer_type); - add_type_info_type_internal(c, bt->RelativeMultiPointer.base_integer); - break; - case Type_Matrix: add_type_info_type_internal(c, bt->Matrix.elem); break; @@ -2441,16 +2431,6 @@ gb_internal void add_min_dep_type_info(Checker *c, Type *t) { add_min_dep_type_info(c, bt->SimdVector.elem); break; - case Type_RelativePointer: - add_min_dep_type_info(c, bt->RelativePointer.pointer_type); - add_min_dep_type_info(c, bt->RelativePointer.base_integer); - break; - - case Type_RelativeMultiPointer: - add_min_dep_type_info(c, bt->RelativeMultiPointer.pointer_type); - add_min_dep_type_info(c, bt->RelativeMultiPointer.base_integer); - break; - case Type_Matrix: add_min_dep_type_info(c, bt->Matrix.elem); break; @@ -3075,8 +3055,6 @@ gb_internal void init_core_type_info(Checker *c) { t_type_info_map = find_core_type(c, str_lit("Type_Info_Map")); t_type_info_bit_set = find_core_type(c, str_lit("Type_Info_Bit_Set")); t_type_info_simd_vector = find_core_type(c, str_lit("Type_Info_Simd_Vector")); - t_type_info_relative_pointer = find_core_type(c, str_lit("Type_Info_Relative_Pointer")); - t_type_info_relative_multi_pointer = find_core_type(c, str_lit("Type_Info_Relative_Multi_Pointer")); t_type_info_matrix = find_core_type(c, str_lit("Type_Info_Matrix")); t_type_info_soa_pointer = find_core_type(c, str_lit("Type_Info_Soa_Pointer")); t_type_info_bit_field = find_core_type(c, str_lit("Type_Info_Bit_Field")); @@ -3105,8 +3083,6 @@ gb_internal void init_core_type_info(Checker *c) { t_type_info_map_ptr = alloc_type_pointer(t_type_info_map); t_type_info_bit_set_ptr = alloc_type_pointer(t_type_info_bit_set); t_type_info_simd_vector_ptr = alloc_type_pointer(t_type_info_simd_vector); - t_type_info_relative_pointer_ptr = alloc_type_pointer(t_type_info_relative_pointer); - t_type_info_relative_multi_pointer_ptr = alloc_type_pointer(t_type_info_relative_multi_pointer); t_type_info_matrix_ptr = alloc_type_pointer(t_type_info_matrix); t_type_info_soa_pointer_ptr = alloc_type_pointer(t_type_info_soa_pointer); t_type_info_bit_field_ptr = alloc_type_pointer(t_type_info_bit_field); diff --git a/src/docs_format.cpp b/src/docs_format.cpp index ca6ecb5c2..6378971d0 100644 --- a/src/docs_format.cpp +++ b/src/docs_format.cpp @@ -79,8 +79,7 @@ enum OdinDocTypeKind : u32 { OdinDocType_SOAStructFixed = 17, OdinDocType_SOAStructSlice = 18, OdinDocType_SOAStructDynamic = 19, - OdinDocType_RelativePointer = 20, - OdinDocType_RelativeMultiPointer = 21, + OdinDocType_MultiPointer = 22, OdinDocType_Matrix = 23, OdinDocType_SoaPointer = 24, diff --git a/src/docs_writer.cpp b/src/docs_writer.cpp index 835dfdff1..341b3fa6b 100644 --- a/src/docs_writer.cpp +++ b/src/docs_writer.cpp @@ -776,24 +776,6 @@ gb_internal OdinDocTypeIndex odin_doc_type(OdinDocWriter *w, Type *type) { doc_type.types = odin_doc_type_as_slice(w, type->SimdVector.elem); // TODO(bill): break; - case Type_RelativePointer: - doc_type.kind = OdinDocType_RelativePointer; - { - OdinDocTypeIndex types[2] = {}; - types[0] = odin_doc_type(w, type->RelativePointer.pointer_type); - types[1] = odin_doc_type(w, type->RelativePointer.base_integer); - doc_type.types = odin_write_slice(w, types, gb_count_of(types)); - } - break; - case Type_RelativeMultiPointer: - doc_type.kind = OdinDocType_RelativeMultiPointer; - { - OdinDocTypeIndex types[2] = {}; - types[0] = odin_doc_type(w, type->RelativeMultiPointer.pointer_type); - types[1] = odin_doc_type(w, type->RelativeMultiPointer.base_integer); - doc_type.types = odin_write_slice(w, types, gb_count_of(types)); - } - break; case Type_Matrix: doc_type.kind = OdinDocType_Matrix; diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 68f95cb03..464efa325 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -75,7 +75,6 @@ enum lbAddrKind { lbAddr_Context, lbAddr_SoaVariable, - lbAddr_RelativePointer, lbAddr_Swizzle, lbAddr_SwizzleLarge, @@ -103,9 +102,6 @@ struct lbAddr { lbValue index; Ast *node; } index_set; - struct { - bool deref; - } relative; struct { Type *type; u8 count; // 2, 3, or 4 components diff --git a/src/llvm_backend_debug.cpp b/src/llvm_backend_debug.cpp index 5cc79dcc8..464f7065c 100644 --- a/src/llvm_backend_debug.cpp +++ b/src/llvm_backend_debug.cpp @@ -920,17 +920,6 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) { elem, subscripts, gb_count_of(subscripts)); } - case Type_RelativePointer: { - LLVMMetadataRef base_integer = lb_debug_type(m, type->RelativePointer.base_integer); - gbString name = type_to_string(type, temporary_allocator()); - return LLVMDIBuilderCreateTypedef(m->debug_builder, base_integer, name, gb_string_length(name), nullptr, 0, nullptr, cast(u32)(8*type_align_of(type))); - } - case Type_RelativeMultiPointer: { - LLVMMetadataRef base_integer = lb_debug_type(m, type->RelativeMultiPointer.base_integer); - gbString name = type_to_string(type, temporary_allocator()); - return LLVMDIBuilderCreateTypedef(m->debug_builder, base_integer, name, gb_string_length(name), nullptr, 0, nullptr, cast(u32)(8*type_align_of(type))); - } - case Type_Matrix: { LLVMMetadataRef subscripts[1] = {}; subscripts[0] = LLVMDIBuilderGetOrCreateSubrange(m->debug_builder, diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 8ad44035d..80c469ae6 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -4200,30 +4200,6 @@ gb_internal lbAddr lb_build_addr_index_expr(lbProcedure *p, Ast *expr) { return lb_addr(v); } - case Type_RelativeMultiPointer: { - lbAddr rel_ptr_addr = {}; - if (deref) { - lbValue rel_ptr_ptr = lb_build_expr(p, ie->expr); - rel_ptr_addr = lb_addr(rel_ptr_ptr); - } else { - rel_ptr_addr = lb_build_addr(p, ie->expr); - } - lbValue rel_ptr = lb_relative_pointer_to_pointer(p, rel_ptr_addr); - - lbValue index = lb_build_expr(p, ie->index); - index = lb_emit_conv(p, index, t_int); - lbValue v = {}; - - Type *pointer_type = base_type(t->RelativeMultiPointer.pointer_type); - GB_ASSERT(pointer_type->kind == Type_MultiPointer); - Type *elem = pointer_type->MultiPointer.elem; - - LLVMValueRef indices[1] = {index.value}; - v.value = LLVMBuildGEP2(p->builder, lb_type(p->module, elem), rel_ptr.value, indices, 1, ""); - v.type = alloc_type_pointer(elem); - return lb_addr(v); - } - case Type_DynamicArray: { lbValue dynamic_array = {}; dynamic_array = lb_build_expr(p, ie->expr); @@ -4333,13 +4309,6 @@ gb_internal lbAddr lb_build_addr_slice_expr(lbProcedure *p, Ast *expr) { return slice; } - case Type_RelativePointer: - GB_PANIC("TODO(bill): Type_RelativePointer should be handled above already on the lb_addr_load"); - break; - case Type_RelativeMultiPointer: - GB_PANIC("TODO(bill): Type_RelativeMultiPointer should be handled above already on the lb_addr_load"); - break; - case Type_DynamicArray: { Type *elem_type = type->DynamicArray.elem; Type *slice_type = alloc_type_slice(elem_type); @@ -5343,11 +5312,7 @@ gb_internal lbAddr lb_build_addr_internal(lbProcedure *p, Ast *expr) { case_ast_node(de, DerefExpr, expr); Type *t = type_of_expr(de->expr); - if (is_type_relative_pointer(t)) { - lbAddr addr = lb_build_addr(p, de->expr); - addr.relative.deref = true; - return addr; - } else if (is_type_soa_pointer(t)) { + if (is_type_soa_pointer(t)) { lbValue value = lb_build_expr(p, de->expr); lbValue ptr = lb_emit_struct_ev(p, value, 0); lbValue idx = lb_emit_struct_ev(p, value, 1); diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 686724f6a..9dc603993 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -409,14 +409,6 @@ gb_internal lbModule *lb_module_of_entity(lbGenerator *gen, Entity *e) { gb_internal lbAddr lb_addr(lbValue addr) { lbAddr v = {lbAddr_Default, addr}; - if (addr.type != nullptr && is_type_relative_pointer(type_deref(addr.type))) { - GB_ASSERT(is_type_pointer(addr.type)); - v.kind = lbAddr_RelativePointer; - } else if (addr.type != nullptr && is_type_relative_multi_pointer(type_deref(addr.type))) { - GB_ASSERT(is_type_pointer(addr.type) || - is_type_multi_pointer(addr.type)); - v.kind = lbAddr_RelativePointer; - } return v; } @@ -501,42 +493,6 @@ gb_internal Type *lb_addr_type(lbAddr const &addr) { return type_deref(addr.addr.type); } - -gb_internal lbValue lb_relative_pointer_to_pointer(lbProcedure *p, lbAddr const &addr) { - GB_ASSERT(addr.kind == lbAddr_RelativePointer); - - Type *t = base_type(lb_addr_type(addr)); - GB_ASSERT(is_type_relative_pointer(t) || is_type_relative_multi_pointer(t)); - - Type *pointer_type = nullptr; - Type *base_integer = nullptr; - if (t->kind == Type_RelativePointer) { - pointer_type = t->RelativePointer.pointer_type; - base_integer = t->RelativePointer.base_integer; - } else if (t->kind == Type_RelativeMultiPointer) { - pointer_type = t->RelativeMultiPointer.pointer_type; - base_integer = t->RelativeMultiPointer.base_integer; - } - - lbValue ptr = lb_emit_conv(p, addr.addr, t_uintptr); - lbValue offset = lb_emit_conv(p, ptr, alloc_type_pointer(base_integer)); - offset = lb_emit_load(p, offset); - - if (!is_type_unsigned(base_integer)) { - offset = lb_emit_conv(p, offset, t_i64); - } - offset = lb_emit_conv(p, offset, t_uintptr); - lbValue absolute_ptr = lb_emit_arith(p, Token_Add, ptr, offset, t_uintptr); - absolute_ptr = lb_emit_conv(p, absolute_ptr, pointer_type); - - lbValue cond = lb_emit_comp(p, Token_CmpEq, offset, lb_const_nil(p->module, base_integer)); - - // NOTE(bill): nil check - lbValue nil_ptr = lb_const_nil(p->module, pointer_type); - lbValue final_ptr = lb_emit_select(p, cond, nil_ptr, absolute_ptr); - return final_ptr; -} - gb_internal lbValue lb_make_soa_pointer(lbProcedure *p, Type *type, lbValue const &addr, lbValue const &index) { lbAddr v = lb_add_local_generated(p, type, false); lbValue ptr = lb_emit_struct_ep(p, v.addr, 0); @@ -557,9 +513,6 @@ gb_internal lbValue lb_addr_get_ptr(lbProcedure *p, lbAddr const &addr) { case lbAddr_Map: return lb_internal_dynamic_map_get_ptr(p, addr.addr, addr.map.key); - case lbAddr_RelativePointer: - return lb_relative_pointer_to_pointer(p, addr); - case lbAddr_SoaVariable: { Type *soa_ptr_type = alloc_type_soa_pointer(lb_addr_type(addr)); @@ -584,9 +537,6 @@ gb_internal lbValue lb_addr_get_ptr(lbProcedure *p, lbAddr const &addr) { gb_internal lbValue lb_build_addr_ptr(lbProcedure *p, Ast *expr) { lbAddr addr = lb_build_addr(p, expr); - if (addr.kind == lbAddr_RelativePointer) { - return addr.addr; - } return lb_addr_get_ptr(p, addr); } @@ -819,10 +769,6 @@ gb_internal void lb_addr_store(lbProcedure *p, lbAddr addr, lbValue value) { value.value = LLVMConstNull(lb_type(p->module, t)); } - if (addr.kind == lbAddr_RelativePointer && addr.relative.deref) { - addr = lb_addr(lb_address_from_load(p, lb_addr_load(p, addr))); - } - if (addr.kind == lbAddr_BitField) { lbValue dst = addr.addr; if (is_type_endian_big(addr.bitfield.type)) { @@ -860,44 +806,6 @@ gb_internal void lb_addr_store(lbProcedure *p, lbAddr addr, lbValue value) { lb_emit_runtime_call(p, "__write_bits", args); } return; - } else if (addr.kind == lbAddr_RelativePointer) { - Type *rel_ptr = base_type(lb_addr_type(addr)); - GB_ASSERT(rel_ptr->kind == Type_RelativePointer || - rel_ptr->kind == Type_RelativeMultiPointer); - Type *pointer_type = nullptr; - Type *base_integer = nullptr; - - if (rel_ptr->kind == Type_RelativePointer) { - pointer_type = rel_ptr->RelativePointer.pointer_type; - base_integer = rel_ptr->RelativePointer.base_integer; - } else if (rel_ptr->kind == Type_RelativeMultiPointer) { - pointer_type = rel_ptr->RelativeMultiPointer.pointer_type; - base_integer = rel_ptr->RelativeMultiPointer.base_integer; - } - - value = lb_emit_conv(p, value, pointer_type); - - GB_ASSERT(is_type_pointer(addr.addr.type)); - lbValue ptr = lb_emit_conv(p, addr.addr, t_uintptr); - lbValue val_ptr = lb_emit_conv(p, value, t_uintptr); - lbValue offset = {}; - offset.value = LLVMBuildSub(p->builder, val_ptr.value, ptr.value, ""); - offset.type = t_uintptr; - - if (!is_type_unsigned(base_integer)) { - offset = lb_emit_conv(p, offset, t_i64); - } - offset = lb_emit_conv(p, offset, base_integer); - - lbValue offset_ptr = lb_emit_conv(p, addr.addr, alloc_type_pointer(base_integer)); - offset = lb_emit_select(p, - lb_emit_comp(p, Token_CmpEq, val_ptr, lb_const_nil(p->module, t_uintptr)), - lb_const_nil(p->module, base_integer), - offset - ); - LLVMBuildStore(p->builder, offset.value, offset_ptr.value); - return; - } else if (addr.kind == lbAddr_Map) { lb_internal_dynamic_map_set(p, addr.addr, addr.map.type, addr.map.key, value, p->curr_stmt); return; @@ -1246,46 +1154,6 @@ gb_internal lbValue lb_addr_load(lbProcedure *p, lbAddr const &addr) { } return r; - } else if (addr.kind == lbAddr_RelativePointer) { - Type *rel_ptr = base_type(lb_addr_type(addr)); - Type *base_integer = nullptr; - Type *pointer_type = nullptr; - GB_ASSERT(rel_ptr->kind == Type_RelativePointer || - rel_ptr->kind == Type_RelativeMultiPointer); - - if (rel_ptr->kind == Type_RelativePointer) { - base_integer = rel_ptr->RelativePointer.base_integer; - pointer_type = rel_ptr->RelativePointer.pointer_type; - } else if (rel_ptr->kind == Type_RelativeMultiPointer) { - base_integer = rel_ptr->RelativeMultiPointer.base_integer; - pointer_type = rel_ptr->RelativeMultiPointer.pointer_type; - } - - lbValue ptr = lb_emit_conv(p, addr.addr, t_uintptr); - lbValue offset = lb_emit_conv(p, ptr, alloc_type_pointer(base_integer)); - offset = lb_emit_load(p, offset); - - - if (!is_type_unsigned(base_integer)) { - offset = lb_emit_conv(p, offset, t_i64); - } - offset = lb_emit_conv(p, offset, t_uintptr); - lbValue absolute_ptr = lb_emit_arith(p, Token_Add, ptr, offset, t_uintptr); - absolute_ptr = lb_emit_conv(p, absolute_ptr, pointer_type); - - lbValue cond = lb_emit_comp(p, Token_CmpEq, offset, lb_const_nil(p->module, base_integer)); - - // NOTE(bill): nil check - lbValue nil_ptr = lb_const_nil(p->module, pointer_type); - lbValue final_ptr = {}; - final_ptr.type = absolute_ptr.type; - final_ptr.value = LLVMBuildSelect(p->builder, cond.value, nil_ptr.value, absolute_ptr.value, ""); - - if (rel_ptr->kind == Type_RelativeMultiPointer) { - return final_ptr; - } - return lb_emit_load(p, final_ptr); - } else if (addr.kind == lbAddr_Map) { Type *map_type = base_type(type_deref(addr.addr.type)); GB_ASSERT(map_type->kind == Type_Map); @@ -2378,13 +2246,6 @@ gb_internal LLVMTypeRef lb_type_internal(lbModule *m, Type *type) { case Type_SimdVector: return LLVMVectorType(lb_type(m, type->SimdVector.elem), cast(unsigned)type->SimdVector.count); - - case Type_RelativePointer: - return lb_type_internal(m, type->RelativePointer.base_integer); - case Type_RelativeMultiPointer: - return lb_type_internal(m, type->RelativeMultiPointer.base_integer); - - case Type_Matrix: { diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index f3fcc8de4..6c12b37be 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -61,8 +61,6 @@ gb_internal u64 lb_typeid_kind(lbModule *m, Type *type, u64 id=0) { case Type_Proc: kind = Typeid_Procedure; break; case Type_BitSet: kind = Typeid_Bit_Set; break; case Type_SimdVector: kind = Typeid_Simd_Vector; break; - case Type_RelativePointer: kind = Typeid_Relative_Pointer; break; - case Type_RelativeMultiPointer: kind = Typeid_Relative_Multi_Pointer; break; case Type_SoaPointer: kind = Typeid_SoaPointer; break; case Type_BitField: kind = Typeid_Bit_Field; break; } @@ -950,30 +948,6 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ } break; - case Type_RelativePointer: - { - tag_type = t_type_info_relative_pointer; - LLVMValueRef vals[2] = { - get_type_info_ptr(m, t->RelativePointer.pointer_type), - get_type_info_ptr(m, t->RelativePointer.base_integer), - }; - - variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); - } - break; - - case Type_RelativeMultiPointer: - { - tag_type = t_type_info_relative_multi_pointer; - LLVMValueRef vals[2] = { - get_type_info_ptr(m, t->RelativeMultiPointer.pointer_type), - get_type_info_ptr(m, t->RelativeMultiPointer.base_integer), - }; - - variant_value = llvm_const_named_struct(m, tag_type, vals, gb_count_of(vals)); - } - break; - case Type_Matrix: { tag_type = t_type_info_matrix; diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index e6049da84..a2a0ba4cc 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1131,10 +1131,6 @@ gb_internal lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) { Type *t = base_type(type_deref(s.type)); Type *result_type = nullptr; - if (is_type_relative_pointer(t)) { - s = lb_addr_get_ptr(p, lb_addr(s)); - } - if (is_type_struct(t)) { result_type = get_struct_field_type(t, index); } else if (is_type_union(t)) { @@ -1440,8 +1436,6 @@ gb_internal lbValue lb_emit_deep_field_gep(lbProcedure *p, lbValue e, Selection e = lb_emit_array_epi(p, e, index); } else if (type->kind == Type_Map) { e = lb_emit_struct_ep(p, e, index); - } else if (type->kind == Type_RelativePointer) { - e = lb_emit_struct_ep(p, e, index); } else { GB_PANIC("un-gep-able type %s", type_to_string(type)); } diff --git a/src/types.cpp b/src/types.cpp index b49bce7d4..c51df7261 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -272,14 +272,6 @@ struct TypeProc { Type *elem; \ Type *generic_count; \ }) \ - TYPE_KIND(RelativePointer, struct { \ - Type *pointer_type; \ - Type *base_integer; \ - }) \ - TYPE_KIND(RelativeMultiPointer, struct { \ - Type *pointer_type; \ - Type *base_integer; \ - }) \ TYPE_KIND(Matrix, struct { \ Type *elem; \ i64 row_count; \ @@ -367,8 +359,6 @@ enum Typeid_Kind : u8 { Typeid_Map, Typeid_Bit_Set, Typeid_Simd_Vector, - Typeid_Relative_Pointer, - Typeid_Relative_Multi_Pointer, Typeid_Matrix, Typeid_SoaPointer, Typeid_Bit_Field, @@ -678,8 +668,6 @@ gb_global Type *t_type_info_enum = nullptr; gb_global Type *t_type_info_map = nullptr; gb_global Type *t_type_info_bit_set = nullptr; gb_global Type *t_type_info_simd_vector = nullptr; -gb_global Type *t_type_info_relative_pointer = nullptr; -gb_global Type *t_type_info_relative_multi_pointer = nullptr; gb_global Type *t_type_info_matrix = nullptr; gb_global Type *t_type_info_soa_pointer = nullptr; gb_global Type *t_type_info_bit_field = nullptr; @@ -708,8 +696,6 @@ gb_global Type *t_type_info_enum_ptr = nullptr; gb_global Type *t_type_info_map_ptr = nullptr; gb_global Type *t_type_info_bit_set_ptr = nullptr; gb_global Type *t_type_info_simd_vector_ptr = nullptr; -gb_global Type *t_type_info_relative_pointer_ptr = nullptr; -gb_global Type *t_type_info_relative_multi_pointer_ptr = nullptr; gb_global Type *t_type_info_matrix_ptr = nullptr; gb_global Type *t_type_info_soa_pointer_ptr = nullptr; gb_global Type *t_type_info_bit_field_ptr = nullptr; @@ -1118,24 +1104,6 @@ gb_internal Type *alloc_type_bit_field() { return t; } -gb_internal Type *alloc_type_relative_pointer(Type *pointer_type, Type *base_integer) { - GB_ASSERT(is_type_pointer(pointer_type)); - GB_ASSERT(is_type_integer(base_integer)); - Type *t = alloc_type(Type_RelativePointer); - t->RelativePointer.pointer_type = pointer_type; - t->RelativePointer.base_integer = base_integer; - return t; -} - -gb_internal Type *alloc_type_relative_multi_pointer(Type *pointer_type, Type *base_integer) { - GB_ASSERT(is_type_multi_pointer(pointer_type)); - GB_ASSERT(is_type_integer(base_integer)); - Type *t = alloc_type(Type_RelativeMultiPointer); - t->RelativeMultiPointer.pointer_type = pointer_type; - t->RelativeMultiPointer.base_integer = base_integer; - return t; -} - gb_internal Type *alloc_type_named(String name, Type *base, Entity *type_name) { Type *t = alloc_type(Type_Named); t->Named.name = name; @@ -1227,8 +1195,6 @@ gb_internal Type *type_deref(Type *t, bool allow_multi_pointer) { switch (bt->kind) { case Type_Pointer: return bt->Pointer.elem; - case Type_RelativePointer: - return type_deref(bt->RelativePointer.pointer_type); case Type_SoaPointer: { Type *elem = base_type(bt->SoaPointer.elem); @@ -1667,15 +1633,6 @@ gb_internal bool is_type_generic(Type *t) { return t->kind == Type_Generic; } -gb_internal bool is_type_relative_pointer(Type *t) { - t = base_type(t); - return t->kind == Type_RelativePointer; -} -gb_internal bool is_type_relative_multi_pointer(Type *t) { - t = base_type(t); - return t->kind == Type_RelativeMultiPointer; -} - gb_internal bool is_type_u8_slice(Type *t) { t = base_type(t); if (t->kind == Type_Slice) { @@ -2118,8 +2075,6 @@ gb_internal bool is_type_indexable(Type *t) { return true; case Type_EnumeratedArray: return true; - case Type_RelativeMultiPointer: - return true; case Type_Matrix: return true; } @@ -2137,8 +2092,6 @@ gb_internal bool is_type_sliceable(Type *t) { return true; case Type_EnumeratedArray: return false; - case Type_RelativeMultiPointer: - return true; case Type_Matrix: return false; } @@ -2345,27 +2298,7 @@ gb_internal bool is_type_polymorphic(Type *t, bool or_specialized=false) { return true; } break; - - case Type_RelativeMultiPointer: - if (is_type_polymorphic(t->RelativeMultiPointer.pointer_type, or_specialized)) { - return true; - } - if (t->RelativeMultiPointer.base_integer != nullptr && - is_type_polymorphic(t->RelativeMultiPointer.base_integer, or_specialized)) { - return true; - } - break; - case Type_RelativePointer: - if (is_type_polymorphic(t->RelativePointer.pointer_type, or_specialized)) { - return true; - } - if (t->RelativePointer.base_integer != nullptr && - is_type_polymorphic(t->RelativePointer.base_integer, or_specialized)) { - return true; - } - break; } - return false; } @@ -2407,10 +2340,6 @@ gb_internal bool type_has_nil(Type *t) { } } return false; - - case Type_RelativePointer: - case Type_RelativeMultiPointer: - return true; } return false; } @@ -2579,10 +2508,6 @@ gb_internal bool is_type_load_safe(Type *type) { } return true; - case Type_RelativePointer: - case Type_RelativeMultiPointer: - return true; - case Type_Pointer: case Type_MultiPointer: case Type_Slice: @@ -3945,11 +3870,6 @@ gb_internal i64 type_align_of_internal(Type *t, TypePath *path) { case Type_Matrix: return matrix_align_of(t, path); - case Type_RelativePointer: - return type_align_of_internal(t->RelativePointer.base_integer, path); - case Type_RelativeMultiPointer: - return type_align_of_internal(t->RelativeMultiPointer.base_integer, path); - case Type_SoaPointer: return build_context.int_size; } @@ -4242,11 +4162,6 @@ gb_internal i64 type_size_of_internal(Type *t, TypePath *path) { case Type_BitField: return type_size_of_internal(t->BitField.backing_type, path); - - case Type_RelativePointer: - return type_size_of_internal(t->RelativePointer.base_integer, path); - case Type_RelativeMultiPointer: - return type_size_of_internal(t->RelativeMultiPointer.base_integer, path); } // Catch all @@ -4872,19 +4787,6 @@ gb_internal gbString write_type_to_string(gbString str, Type *type, bool shortha str = gb_string_append_fmt(str, "#simd[%d]", cast(int)type->SimdVector.count); str = write_type_to_string(str, type->SimdVector.elem); break; - - case Type_RelativePointer: - str = gb_string_append_fmt(str, "#relative("); - str = write_type_to_string(str, type->RelativePointer.base_integer); - str = gb_string_append_fmt(str, ") "); - str = write_type_to_string(str, type->RelativePointer.pointer_type); - break; - case Type_RelativeMultiPointer: - str = gb_string_append_fmt(str, "#relative("); - str = write_type_to_string(str, type->RelativePointer.base_integer); - str = gb_string_append_fmt(str, ") "); - str = write_type_to_string(str, type->RelativePointer.pointer_type); - break; case Type_Matrix: if (type->Matrix.is_row_major) { -- cgit v1.2.3 From fdf510b7b35119739d6e41170abd46204356d58b Mon Sep 17 00:00:00 2001 From: misomosi Date: Sat, 21 Dec 2024 19:59:31 -0500 Subject: Pack struct when needed, use field_align metadata --- src/llvm_backend.hpp | 2 ++ src/llvm_backend_general.cpp | 18 +++++++++++++++++- src/llvm_backend_utility.cpp | 19 ++++++++++++++++--- 3 files changed, 35 insertions(+), 4 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index e84ffd1cd..3bbd97e4b 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -742,3 +742,5 @@ gb_global char const *llvm_linkage_strings[] = { }; #define ODIN_METADATA_IS_PACKED str_lit("odin-is-packed") +#define ODIN_METADATA_MIN_ALIGN str_lit("odin-min-align") +#define ODIN_METADATA_MAX_ALIGN str_lit("odin-max-align") diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index bab330da7..762256258 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -734,6 +734,17 @@ gb_internal LLVMValueRef OdinLLVMBuildLoad(lbProcedure *p, LLVMTypeRef type, LLV if (is_packed != 0) { LLVMSetAlignment(result, 1); } + u64 align = LLVMGetAlignment(result); + u64 align_min = lb_get_metadata_custom_u64(p->module, value, ODIN_METADATA_MIN_ALIGN); + u64 align_max = lb_get_metadata_custom_u64(p->module, value, ODIN_METADATA_MAX_ALIGN); + if (align_min != 0 && align < align_min) { + align = align_min; + } + if (align_max != 0 && align > align_max) { + align = align_max; + } + GB_ASSERT(align <= UINT_MAX); + LLVMSetAlignment(result, (unsigned int)align); } return result; @@ -2121,6 +2132,7 @@ gb_internal LLVMTypeRef lb_type_internal(lbModule *m, Type *type) { } i64 prev_offset = 0; + bool requires_packing = type->Struct.is_packed; for (i32 field_index : struct_fields_index_by_increasing_offset(temporary_allocator(), type)) { Entity *field = type->Struct.fields[field_index]; i64 offset = type->Struct.offsets[field_index]; @@ -2141,6 +2153,10 @@ gb_internal LLVMTypeRef lb_type_internal(lbModule *m, Type *type) { field_type = t_rawptr; } + // max_field_align might misalign items in a way that requires packing + // so check the alignment of all fields to see if packing is required. + requires_packing = requires_packing || ((offset % type_align_of(field_type)) != 0); + array_add(&fields, lb_type(m, field_type)); prev_offset = offset + type_size_of(field->type); @@ -2155,7 +2171,7 @@ gb_internal LLVMTypeRef lb_type_internal(lbModule *m, Type *type) { GB_ASSERT(fields[i] != nullptr); } - LLVMTypeRef struct_type = LLVMStructTypeInContext(ctx, fields.data, cast(unsigned)fields.count, type->Struct.is_packed); + LLVMTypeRef struct_type = LLVMStructTypeInContext(ctx, fields.data, cast(unsigned)fields.count, requires_packing); map_set(&m->struct_field_remapping, cast(void *)struct_type, field_remapping); map_set(&m->struct_field_remapping, cast(void *)type, field_remapping); #if 0 diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index a2a0ba4cc..b86d0773b 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1200,9 +1200,22 @@ gb_internal lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) { lbValue gep = lb_emit_struct_ep_internal(p, s, index, result_type); Type *bt = base_type(t); - if (bt->kind == Type_Struct && bt->Struct.is_packed) { - lb_set_metadata_custom_u64(p->module, gep.value, ODIN_METADATA_IS_PACKED, 1); - GB_ASSERT(lb_get_metadata_custom_u64(p->module, gep.value, ODIN_METADATA_IS_PACKED) == 1); + if (bt->kind == Type_Struct) { + if (bt->Struct.is_packed) { + lb_set_metadata_custom_u64(p->module, gep.value, ODIN_METADATA_IS_PACKED, 1); + GB_ASSERT(lb_get_metadata_custom_u64(p->module, gep.value, ODIN_METADATA_IS_PACKED) == 1); + } + u64 align_max = bt->Struct.custom_max_field_align; + u64 align_min = bt->Struct.custom_min_field_align; + GB_ASSERT(align_min == 0 || align_max == 0 || align_min <= align_max); + if (align_max) { + lb_set_metadata_custom_u64(p->module, gep.value, ODIN_METADATA_MAX_ALIGN, align_max); + GB_ASSERT(lb_get_metadata_custom_u64(p->module, gep.value, ODIN_METADATA_MAX_ALIGN) == align_max); + } + if (align_min) { + lb_set_metadata_custom_u64(p->module, gep.value, ODIN_METADATA_MIN_ALIGN, align_min); + GB_ASSERT(lb_get_metadata_custom_u64(p->module, gep.value, ODIN_METADATA_MIN_ALIGN) == align_min); + } } return gep; -- cgit v1.2.3 From 7b334d2bd9e881b450fb19e394d6d71840a62cf9 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 1 Jan 2025 17:26:15 +0000 Subject: Add `#branch_location` --- src/check_expr.cpp | 12 +++++++ src/entity.cpp | 1 + src/llvm_backend.hpp | 7 +++- src/llvm_backend_expr.cpp | 12 +++++-- src/llvm_backend_proc.cpp | 6 +++- src/llvm_backend_stmt.cpp | 81 ++++++++++++++++++++++++++++---------------- src/llvm_backend_utility.cpp | 8 ++--- 7 files changed, 89 insertions(+), 38 deletions(-) (limited to 'src/llvm_backend_utility.cpp') diff --git a/src/check_expr.cpp b/src/check_expr.cpp index cc9483187..fba9b8dad 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -8725,6 +8725,18 @@ gb_internal ExprKind check_basic_directive_expr(CheckerContext *c, Operand *o, A error(node, "#caller_expression may only be used as a default argument parameter"); o->type = t_string; o->mode = Addressing_Value; + } else if (name == "branch_location") { + if (!c->in_defer) { + error(node, "#branch_location may only be used within a 'defer' statement"); + } else if (c->curr_proc_decl) { + Entity *e = c->curr_proc_decl->entity; + if (e != nullptr) { + GB_ASSERT(e->kind == Entity_Procedure); + e->Procedure.uses_branch_location = true; + } + } + o->type = t_source_code_location; + o->mode = Addressing_Value; } else { if (name == "location") { init_core_source_code_location(c->checker); diff --git a/src/entity.cpp b/src/entity.cpp index 0c4a20df4..802b381f9 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -256,6 +256,7 @@ struct Entity { bool entry_point_only : 1; bool has_instrumentation : 1; bool is_memcpy_like : 1; + bool uses_branch_location : 1; } Procedure; struct { Array entities; diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index e84ffd1cd..8ca11bf28 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -359,6 +359,10 @@ struct lbProcedure { bool in_multi_assignment; Array raw_input_parameters; + bool uses_branch_location; + TokenPos branch_location_pos; + TokenPos curr_token_pos; + Array variadic_reuses; lbAddr variadic_reuse_base_array_ptr; @@ -444,7 +448,8 @@ gb_internal lbValue lb_emit_matrix_ev(lbProcedure *p, lbValue s, isize row, isiz gb_internal lbValue lb_emit_arith(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type); gb_internal lbValue lb_emit_byte_swap(lbProcedure *p, lbValue value, Type *end_type); -gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block); +gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, TokenPos pos); +gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, Ast *node); gb_internal lbValue lb_emit_transmute(lbProcedure *p, lbValue value, Type *t); gb_internal lbValue lb_emit_comp(lbProcedure *p, TokenKind op_kind, lbValue left, lbValue right); gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array const &args, ProcInlining inlining = ProcInlining_none); diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 9c325e088..3b238bcd8 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -3502,7 +3502,13 @@ gb_internal lbValue lb_build_expr_internal(lbProcedure *p, Ast *expr) { case_ast_node(bd, BasicDirective, expr); TokenPos pos = bd->token.pos; - GB_PANIC("Non-constant basic literal %s - %.*s", token_pos_to_string(pos), LIT(bd->name.string)); + String name = bd->name.string; + if (name == "branch_location") { + GB_ASSERT(p->uses_branch_location); + String proc_name = p->entity->token.string; + return lb_emit_source_code_location_as_global(p, proc_name, p->branch_location_pos); + } + GB_PANIC("Non-constant basic literal %s - %.*s", token_pos_to_string(pos), LIT(name)); case_end; case_ast_node(i, Implicit, expr); @@ -3668,7 +3674,7 @@ gb_internal lbValue lb_build_expr_internal(lbProcedure *p, Ast *expr) { lb_emit_if(p, lb_emit_try_has_value(p, rhs), then, else_); lb_start_block(p, else_); - lb_emit_defer_stmts(p, lbDeferExit_Branch, block); + lb_emit_defer_stmts(p, lbDeferExit_Branch, block, expr); lb_emit_jump(p, block); lb_start_block(p, then); @@ -5493,7 +5499,7 @@ gb_internal lbAddr lb_build_addr_internal(lbProcedure *p, Ast *expr) { lb_emit_if(p, lb_emit_try_has_value(p, rhs), then, else_); lb_start_block(p, else_); - lb_emit_defer_stmts(p, lbDeferExit_Branch, block); + lb_emit_defer_stmts(p, lbDeferExit_Branch, block, expr); lb_emit_jump(p, block); lb_start_block(p, then); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 712e13592..7e44a0046 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -125,6 +125,10 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i // map_init(&p->selector_addr, 0); // map_init(&p->tuple_fix_map, 0); + if (p->entity != nullptr && p->entity->Procedure.uses_branch_location) { + p->uses_branch_location = true; + } + if (p->is_foreign) { lb_add_foreign_library_path(p->module, entity->Procedure.foreign_library); } @@ -757,7 +761,7 @@ gb_internal void lb_end_procedure_body(lbProcedure *p) { if (p->type->Proc.result_count == 0) { instr = LLVMGetLastInstruction(p->curr_block->block); if (!lb_is_instr_terminating(instr)) { - lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr); + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, p->body); lb_set_debug_position_to_procedure_end(p); LLVMBuildRetVoid(p->builder); } diff --git a/src/llvm_backend_stmt.cpp b/src/llvm_backend_stmt.cpp index 9a5f25712..a2f0d2f4a 100644 --- a/src/llvm_backend_stmt.cpp +++ b/src/llvm_backend_stmt.cpp @@ -208,8 +208,8 @@ gb_internal void lb_open_scope(lbProcedure *p, Scope *s) { } -gb_internal void lb_close_scope(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, bool pop_stack=true) { - lb_emit_defer_stmts(p, kind, block); +gb_internal void lb_close_scope(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, Ast *node, bool pop_stack=true) { + lb_emit_defer_stmts(p, kind, block, node); GB_ASSERT(p->scope_index > 0); // NOTE(bill): Remove `context`s made in that scope @@ -721,7 +721,7 @@ gb_internal void lb_build_range_interval(lbProcedure *p, AstBinaryExpr *node, lb_build_stmt(p, rs->body); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, node->left); lb_pop_target_list(p); if (check != nullptr) { @@ -854,7 +854,7 @@ gb_internal void lb_build_range_tuple(lbProcedure *p, AstRangeStmt *rs, Scope *s lb_build_stmt(p, rs->body); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, rs->body); lb_pop_target_list(p); lb_emit_jump(p, loop); lb_start_block(p, done); @@ -976,7 +976,7 @@ gb_internal void lb_build_range_stmt_struct_soa(lbProcedure *p, AstRangeStmt *rs lb_build_stmt(p, rs->body); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, rs->body); lb_pop_target_list(p); lb_emit_jump(p, loop); lb_start_block(p, done); @@ -1192,7 +1192,7 @@ gb_internal void lb_build_range_stmt(lbProcedure *p, AstRangeStmt *rs, Scope *sc lb_build_stmt(p, rs->body); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, rs->body); lb_pop_target_list(p); lb_emit_jump(p, loop); lb_start_block(p, done); @@ -1363,7 +1363,7 @@ gb_internal void lb_build_unroll_range_stmt(lbProcedure *p, AstUnrollRangeStmt * } - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, rs->body); } gb_internal bool lb_switch_stmt_can_be_trivial_jump_table(AstSwitchStmt *ss, bool *default_found_) { @@ -1433,6 +1433,7 @@ gb_internal void lb_build_switch_stmt(lbProcedure *p, AstSwitchStmt *ss, Scope * ast_node(body, BlockStmt, ss->body); isize case_count = body->stmts.count; + Ast *default_clause = nullptr; Slice default_stmts = {}; lbBlock *default_fall = nullptr; lbBlock *default_block = nullptr; @@ -1482,6 +1483,7 @@ gb_internal void lb_build_switch_stmt(lbProcedure *p, AstSwitchStmt *ss, Scope * if (cc->list.count == 0) { // default case + default_clause = clause; default_stmts = cc->stmts; default_fall = fall; if (switch_instr == nullptr) { @@ -1552,7 +1554,7 @@ gb_internal void lb_build_switch_stmt(lbProcedure *p, AstSwitchStmt *ss, Scope * lb_push_target_list(p, ss->label, done, nullptr, fall); lb_open_scope(p, body->scope); lb_build_stmt_list(p, cc->stmts); - lb_close_scope(p, lbDeferExit_Default, body); + lb_close_scope(p, lbDeferExit_Default, body, clause); lb_pop_target_list(p); lb_emit_jump(p, done); @@ -1570,13 +1572,13 @@ gb_internal void lb_build_switch_stmt(lbProcedure *p, AstSwitchStmt *ss, Scope * lb_push_target_list(p, ss->label, done, nullptr, default_fall); lb_open_scope(p, default_block->scope); lb_build_stmt_list(p, default_stmts); - lb_close_scope(p, lbDeferExit_Default, default_block); + lb_close_scope(p, lbDeferExit_Default, default_block, default_clause); lb_pop_target_list(p); } lb_emit_jump(p, done); lb_start_block(p, done); - lb_close_scope(p, lbDeferExit_Default, done); + lb_close_scope(p, lbDeferExit_Default, done, ss->body); } gb_internal void lb_store_type_case_implicit(lbProcedure *p, Ast *clause, lbValue value, bool is_default_case) { @@ -1627,7 +1629,7 @@ gb_internal void lb_type_case_body(lbProcedure *p, Ast *label, Ast *clause, lbBl lb_push_target_list(p, label, done, nullptr, nullptr); lb_build_stmt_list(p, cc->stmts); - lb_close_scope(p, lbDeferExit_Default, body); + lb_close_scope(p, lbDeferExit_Default, body, clause); lb_pop_target_list(p); lb_emit_jump(p, done); @@ -1835,7 +1837,7 @@ gb_internal void lb_build_type_switch_stmt(lbProcedure *p, AstTypeSwitchStmt *ss lb_emit_jump(p, done); lb_start_block(p, done); - lb_close_scope(p, lbDeferExit_Default, done); + lb_close_scope(p, lbDeferExit_Default, done, ss->body); } @@ -1959,7 +1961,7 @@ gb_internal void lb_build_assignment(lbProcedure *p, Array &lvals, Slice p->in_multi_assignment = prev_in_assignment; } -gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res) { +gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res, TokenPos pos) { lbFunctionType *ft = lb_get_function_type(p->module, p->type); bool return_by_pointer = ft->ret.kind == lbArg_Indirect; bool split_returns = ft->multiple_return_original_type != nullptr; @@ -1982,7 +1984,7 @@ gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res) { LLVMBuildStore(p->builder, LLVMConstNull(p->abi_function_type->ret.type), p->return_ptr.addr.value); } - lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr); + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, pos); // Check for terminator in the defer stmts LLVMValueRef instr = LLVMGetLastInstruction(p->curr_block->block); @@ -2012,7 +2014,7 @@ gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res) { ret_val = OdinLLVMBuildTransmute(p, ret_val, ret_type); } - lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr); + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, pos); // Check for terminator in the defer stmts LLVMValueRef instr = LLVMGetLastInstruction(p->curr_block->block); @@ -2021,7 +2023,7 @@ gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res) { } } } -gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return_results) { +gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return_results, TokenPos pos) { lb_ensure_abi_function_type(p->module, p); isize return_count = p->type->Proc.result_count; @@ -2029,7 +2031,7 @@ gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return if (return_count == 0) { // No return values - lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr); + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, pos); // Check for terminator in the defer stmts LLVMValueRef instr = LLVMGetLastInstruction(p->curr_block->block); @@ -2138,11 +2140,11 @@ gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return GB_ASSERT(result_values.count-1 == result_eps.count); lb_addr_store(p, p->return_ptr, result_values[result_values.count-1]); - lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr); + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, pos); LLVMBuildRetVoid(p->builder); return; } else { - return lb_build_return_stmt_internal(p, result_values[result_values.count-1]); + return lb_build_return_stmt_internal(p, result_values[result_values.count-1], pos); } } else { @@ -2169,7 +2171,7 @@ gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return } if (return_by_pointer) { - lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr); + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, pos); LLVMBuildRetVoid(p->builder); return; } @@ -2177,13 +2179,13 @@ gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return res = lb_emit_load(p, res); } } - lb_build_return_stmt_internal(p, res); + lb_build_return_stmt_internal(p, res, pos); } gb_internal void lb_build_if_stmt(lbProcedure *p, Ast *node) { ast_node(is, IfStmt, node); lb_open_scope(p, is->scope); // Scope #1 - defer (lb_close_scope(p, lbDeferExit_Default, nullptr)); + defer (lb_close_scope(p, lbDeferExit_Default, nullptr, node)); lbBlock *then = lb_create_block(p, "if.then"); lbBlock *done = lb_create_block(p, "if.done"); @@ -2234,7 +2236,7 @@ gb_internal void lb_build_if_stmt(lbProcedure *p, Ast *node) { lb_open_scope(p, scope_of_node(is->else_stmt)); lb_build_stmt(p, is->else_stmt); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, is->else_stmt); } lb_emit_jump(p, done); @@ -2251,7 +2253,7 @@ gb_internal void lb_build_if_stmt(lbProcedure *p, Ast *node) { lb_open_scope(p, scope_of_node(is->else_stmt)); lb_build_stmt(p, is->else_stmt); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, is->else_stmt); lb_emit_jump(p, done); } @@ -2322,7 +2324,7 @@ gb_internal void lb_build_for_stmt(lbProcedure *p, Ast *node) { } lb_start_block(p, done); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, node); } gb_internal void lb_build_assign_stmt_array(lbProcedure *p, TokenKind op, lbAddr const &lhs, lbValue const &value) { @@ -2588,7 +2590,7 @@ gb_internal void lb_build_stmt(lbProcedure *p, Ast *node) { lb_open_scope(p, bs->scope); lb_build_stmt_list(p, bs->stmts); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, node); if (done != nullptr) { lb_emit_jump(p, done); @@ -2702,7 +2704,7 @@ gb_internal void lb_build_stmt(lbProcedure *p, Ast *node) { case_end; case_ast_node(rs, ReturnStmt, node); - lb_build_return_stmt(p, rs->results); + lb_build_return_stmt(p, rs->results, ast_token(node).pos); case_end; case_ast_node(is, IfStmt, node); @@ -2755,7 +2757,7 @@ gb_internal void lb_build_stmt(lbProcedure *p, Ast *node) { } } if (block != nullptr) { - lb_emit_defer_stmts(p, lbDeferExit_Branch, block); + lb_emit_defer_stmts(p, lbDeferExit_Branch, block, node); } lb_emit_jump(p, block); lb_start_block(p, lb_create_block(p, "unreachable")); @@ -2795,7 +2797,13 @@ gb_internal void lb_build_defer_stmt(lbProcedure *p, lbDefer const &d) { } } -gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block) { +gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, TokenPos pos) { + TokenPos prev_token_pos = p->branch_location_pos; + if (p->uses_branch_location) { + p->branch_location_pos = pos; + } + defer (p->branch_location_pos = prev_token_pos); + isize count = p->defer_stmts.count; isize i = count; while (i --> 0) { @@ -2822,6 +2830,21 @@ gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlo } } +gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, Ast *node) { + TokenPos pos = {}; + if (node) { + if (node->kind == Ast_BlockStmt) { + pos = ast_end_token(node).pos; + } else if (node->kind == Ast_CaseClause) { + pos = ast_end_token(node).pos; + } else { + pos = ast_token(node).pos; + } + } + return lb_emit_defer_stmts(p, kind, block, pos); +} + + gb_internal void lb_add_defer_node(lbProcedure *p, isize scope_index, Ast *stmt) { Type *pt = base_type(p->type); GB_ASSERT(pt->kind == Type_Proc); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index a2a0ba4cc..3e4393a8f 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -476,8 +476,8 @@ gb_internal lbValue lb_emit_or_else(lbProcedure *p, Ast *arg, Ast *else_expr, Ty } } -gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return_results); -gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res); +gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return_results, TokenPos pos); +gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res, TokenPos pos); gb_internal lbValue lb_emit_or_return(lbProcedure *p, Ast *arg, TypeAndValue const &tv) { lbValue lhs = {}; @@ -506,10 +506,10 @@ gb_internal lbValue lb_emit_or_return(lbProcedure *p, Ast *arg, TypeAndValue con lbValue found = map_must_get(&p->module->values, end_entity); lb_emit_store(p, found, rhs); - lb_build_return_stmt(p, {}); + lb_build_return_stmt(p, {}, ast_token(arg).pos); } else { GB_ASSERT(tuple->variables.count == 1); - lb_build_return_stmt_internal(p, rhs); + lb_build_return_stmt_internal(p, rhs, ast_token(arg).pos); } } lb_start_block(p, continue_block); -- cgit v1.2.3