From 759e3428723828113b5c1624ef1ae7bfa0095c2f Mon Sep 17 00:00:00 2001 From: korvahkh <92224397+korvahkh@users.noreply.github.com> Date: Fri, 29 Dec 2023 22:39:34 -0600 Subject: Fix #3056 --- src/llvm_backend_proc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index f64cbd52a..1244bd377 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -3385,7 +3385,7 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { } lbValue arg = args[arg_index]; - if (arg.value == nullptr) { + if (arg.value == nullptr && arg.type == nullptr) { switch (e->kind) { case Entity_TypeName: args[arg_index] = lb_const_nil(p->module, e->type); -- cgit v1.2.3 From 0b83e3dae5c96550f1500612aa7073ee8f6ae5b6 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Fri, 5 Jan 2024 14:29:14 +0000 Subject: Enforce naming the parameters with `builtin.quaternion` to reduce confusion --- core/encoding/json/unmarshal.odin | 6 +- core/math/linalg/general.odin | 2 +- core/runtime/internal.odin | 12 ++-- core/sys/windows/advapi32.odin | 5 ++ examples/demo/demo.odin | 8 ++- src/check_builtin.cpp | 139 ++++++++++++++++++++++++++++++++++---- src/llvm_backend_proc.cpp | 43 ++++++++---- 7 files changed, 174 insertions(+), 41 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index afcc43c0c..c1905f6b0 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -137,9 +137,9 @@ assign_float :: proc(val: any, f: $T) -> bool { case complex64: dst = complex(f32(f), 0) case complex128: dst = complex(f64(f), 0) - case quaternion64: dst = quaternion(f16(f), 0, 0, 0) - case quaternion128: dst = quaternion(f32(f), 0, 0, 0) - case quaternion256: dst = quaternion(f64(f), 0, 0, 0) + case quaternion64: dst = quaternion(w=f16(f), x=0, y=0, z=0) + case quaternion128: dst = quaternion(w=f32(f), x=0, y=0, z=0) + case quaternion256: dst = quaternion(w=f64(f), x=0, y=0, z=0) case: return false } diff --git a/core/math/linalg/general.odin b/core/math/linalg/general.odin index b58305c63..a61fe9189 100644 --- a/core/math/linalg/general.odin +++ b/core/math/linalg/general.odin @@ -70,7 +70,7 @@ outer_product :: builtin.outer_product @(require_results) quaternion_inverse :: proc "contextless" (q: $Q) -> Q where IS_QUATERNION(Q) { - return conj(q) * quaternion(1.0/dot(q, q), 0, 0, 0) + return conj(q) * quaternion(w=1.0/dot(q, q), x=0, y=0, z=0) } diff --git a/core/runtime/internal.odin b/core/runtime/internal.odin index d0e550743..0d7bdee8b 100644 --- a/core/runtime/internal.odin +++ b/core/runtime/internal.odin @@ -730,7 +730,7 @@ mul_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 { t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1 t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0 - return quaternion(t0, t1, t2, t3) + return quaternion(w=t0, x=t1, y=t2, z=t3) } mul_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 { @@ -742,7 +742,7 @@ mul_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 { t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1 t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0 - return quaternion(t0, t1, t2, t3) + return quaternion(w=t0, x=t1, y=t2, z=t3) } mul_quaternion256 :: proc "contextless" (q, r: quaternion256) -> quaternion256 { @@ -754,7 +754,7 @@ mul_quaternion256 :: proc "contextless" (q, r: quaternion256) -> quaternion256 { t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1 t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0 - return quaternion(t0, t1, t2, t3) + return quaternion(w=t0, x=t1, y=t2, z=t3) } quo_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 { @@ -768,7 +768,7 @@ quo_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 { t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2 t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2 - return quaternion(t0, t1, t2, t3) + return quaternion(w=t0, x=t1, y=t2, z=t3) } quo_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 { @@ -782,7 +782,7 @@ quo_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 { t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2 t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2 - return quaternion(t0, t1, t2, t3) + return quaternion(w=t0, x=t1, y=t2, z=t3) } quo_quaternion256 :: proc "contextless" (q, r: quaternion256) -> quaternion256 { @@ -796,7 +796,7 @@ quo_quaternion256 :: proc "contextless" (q, r: quaternion256) -> quaternion256 { t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2 t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2 - return quaternion(t0, t1, t2, t3) + return quaternion(w=t0, x=t1, y=t2, z=t3) } @(link_name="__truncsfhf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE) diff --git a/core/sys/windows/advapi32.odin b/core/sys/windows/advapi32.odin index dc7ec1e08..86e173211 100644 --- a/core/sys/windows/advapi32.odin +++ b/core/sys/windows/advapi32.odin @@ -13,6 +13,11 @@ foreign advapi32 { DesiredAccess: DWORD, TokenHandle: ^HANDLE) -> BOOL --- + OpenThreadToken :: proc(ThreadHandle: HANDLE, + DesiredAccess: DWORD, + OpenAsSelf: BOOL, + TokenHandle: ^HANDLE) -> BOOL --- + CryptAcquireContextW :: proc(hProv: ^HCRYPTPROV, szContainer, szProvider: wstring, dwProvType, dwFlags: DWORD) -> DWORD --- CryptGenRandom :: proc(hProv: HCRYPTPROV, dwLen: DWORD, buf: LPVOID) -> DWORD --- CryptReleaseContext :: proc(hProv: HCRYPTPROV, dwFlags: DWORD) -> DWORD --- diff --git a/examples/demo/demo.odin b/examples/demo/demo.odin index b890d5778..bc6a4d9ea 100644 --- a/examples/demo/demo.odin +++ b/examples/demo/demo.odin @@ -1514,7 +1514,7 @@ quaternions :: proc() { { // Quaternion operations q := 1 + 2i + 3j + 4k - r := quaternion(5, 6, 7, 8) + r := quaternion(real=5, imag=6, jmag=7, kmag=8) t := q * r fmt.printf("(%v) * (%v) = %v\n", q, r, t) v := q / r @@ -1527,8 +1527,10 @@ quaternions :: proc() { { // The quaternion types q128: quaternion128 // 4xf32 q256: quaternion256 // 4xf64 - q128 = quaternion(1, 0, 0, 0) - q256 = 1 // quaternion(1, 0, 0, 0) + q128 = quaternion(w=1, x=0, y=0, z=0) + q256 = 1 // quaternion(x=0, y=0, z=0, w=1) + + // NOTE: The internal memory layout of a quaternion is xyzw } { // Built-in procedures q := 1 + 2i + 3j + 4k diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index b3aaab754..55962f89f 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -1666,7 +1666,12 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As if (ce->args.count > 0) { if (ce->args[0]->kind == Ast_FieldValue) { - if (id != BuiltinProc_soa_zip) { + switch (id) { + case BuiltinProc_soa_zip: + case BuiltinProc_quaternion: + // okay + break; + default: error(call, "'field = value' calling is not allowed on built-in procedures"); return false; } @@ -2299,8 +2304,32 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As } case BuiltinProc_quaternion: { + bool first_is_field_value = (ce->args[0]->kind == Ast_FieldValue); + + bool fail = false; + for (Ast *arg : ce->args) { + bool mix = false; + if (first_is_field_value) { + mix = arg->kind != Ast_FieldValue; + } else { + mix = arg->kind == Ast_FieldValue; + } + if (mix) { + error(arg, "Mixture of 'field = value' and value elements in the procedure call '%.*s' is not allowed", LIT(builtin_name)); + fail = true; + break; + } + } + + if (fail) { + operand->type = t_untyped_quaternion; + operand->mode = Addressing_Constant; + operand->value = exact_value_quaternion(0.0, 0.0, 0.0, 0.0); + break; + } + // quaternion :: proc(real, imag, jmag, kmag: float_type) -> complex_type - Operand x = *operand; + Operand x = {}; Operand y = {}; Operand z = {}; Operand w = {}; @@ -2309,23 +2338,103 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As operand->type = t_invalid; operand->mode = Addressing_Invalid; - check_expr(c, &y, ce->args[1]); - if (y.mode == Addressing_Invalid) { - return false; - } - check_expr(c, &z, ce->args[2]); - if (y.mode == Addressing_Invalid) { - return false; - } - check_expr(c, &w, ce->args[3]); - if (y.mode == Addressing_Invalid) { - return false; - } + if (first_is_field_value) { + u32 fields_set[4] = {}; // 0 unset, 1 xyzw, 2 real/etc + + auto const check_field = [&fields_set, &builtin_name](CheckerContext *c, Operand *o, Ast *arg, i32 *index) -> bool { + *index = -1; + + ast_node(field, FieldValue, arg); + String name = {}; + if (field->field->kind == Ast_Ident) { + name = field->field->Ident.token.string; + } else { + error(field->field, "Expected an identifier for field argument"); + return false; + } + + u32 style = 0; + + if (name == "x") { + *index = 1; style = 1; + } else if (name == "y") { + *index = 2; style = 1; + } else if (name == "z") { + *index = 3; style = 1; + } else if (name == "w") { + *index = 0; style = 1; + } else if (name == "imag") { + *index = 1; style = 2; + } else if (name == "jmag") { + *index = 2; style = 2; + } else if (name == "kmag") { + *index = 3; style = 2; + } else if (name == "real") { + *index = 0; style = 2; + } else { + error(field->field, "Unknown name for '%.*s', expected (w, x, y, z; or real, imag, jmag, kmag), got '%.*s'", LIT(builtin_name), LIT(name)); + return false; + } + + if (fields_set[*index]) { + error(field->field, "Previously assigned field: '%.*s'", LIT(name)); + } + fields_set[*index] = style; + + check_expr(c, o, field->value); + return o->mode != Addressing_Invalid; + }; + + // TODO(bill): disallow style mixing + Operand *refs[4] = {&x, &y, &z, &w}; + + for (i32 i = 0; i < 4; i++) { + i32 index = -1; + Operand o = {}; + bool ok = check_field(c, &o, ce->args[i], &index); + if (!ok) { + return false; + } + + *refs[index] = o; + } + + for (i32 i = 0; i < 4; i++) { + GB_ASSERT(fields_set[i]); + } + for (i32 i = 1; i < 4; i++) { + if (fields_set[i] != fields_set[i-1]) { + error(call, "Mixture of xyzw and real/etc is not allowed with '%.*s'", LIT(builtin_name)); + break; + } + } + } else { + error(call, "'%.*s' requires that all arguments are named (w, x, y, z; or real, imag, jmag, kmag)", LIT(builtin_name)); + + check_expr(c, &x, ce->args[0]); + if (x.mode == Addressing_Invalid) { + return false; + } + + check_expr(c, &y, ce->args[1]); + if (y.mode == Addressing_Invalid) { + return false; + } + check_expr(c, &z, ce->args[2]); + if (z.mode == Addressing_Invalid) { + return false; + } + check_expr(c, &w, ce->args[3]); + if (w.mode == Addressing_Invalid) { + return false; + } + } convert_to_typed(c, &x, y.type); if (x.mode == Addressing_Invalid) return false; convert_to_typed(c, &y, x.type); if (y.mode == Addressing_Invalid) return false; convert_to_typed(c, &z, x.type); if (z.mode == Addressing_Invalid) return false; convert_to_typed(c, &w, x.type); if (w.mode == Addressing_Invalid) return false; + if (x.mode == Addressing_Constant && y.mode == Addressing_Constant && z.mode == Addressing_Constant && @@ -3092,7 +3201,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As mix = arg->kind == Ast_FieldValue; } if (mix) { - error(arg, "Mixture of 'field = value' and value elements in the procedure call 'soa_zip' is not allowed"); + error(arg, "Mixture of 'field = value' and value elements in the procedure call '%.*s' is not allowed", LIT(builtin_name)); fail = true; break; } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 1244bd377..3f16d07a5 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1826,24 +1826,41 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu } case BuiltinProc_quaternion: { - lbValue real = lb_build_expr(p, ce->args[0]); - lbValue imag = lb_build_expr(p, ce->args[1]); - lbValue jmag = lb_build_expr(p, ce->args[2]); - lbValue kmag = lb_build_expr(p, ce->args[3]); + lbValue xyzw[4] = {}; + for (i32 i = 0; i < 4; i++) { + ast_node(f, FieldValue, ce->args[i]); + GB_ASSERT(f->field->kind == Ast_Ident); + String name = f->field->Ident.token.string; + i32 index = -1; + + // @QuaternionLayout + if (name == "x" || name == "imag") { + index = 0; + } else if (name == "y" || name == "jmag") { + index = 1; + } else if (name == "z" || name == "kmag") { + index = 2; + } else if (name == "w" || name == "real") { + index = 3; + } + GB_ASSERT(index >= 0); + + xyzw[index] = lb_build_expr(p, ce->args[i]); + } + - // @QuaternionLayout lbAddr dst_addr = lb_add_local_generated(p, tv.type, false); lbValue dst = lb_addr_get_ptr(p, dst_addr); Type *ft = base_complex_elem_type(tv.type); - real = lb_emit_conv(p, real, ft); - imag = lb_emit_conv(p, imag, ft); - jmag = lb_emit_conv(p, jmag, ft); - kmag = lb_emit_conv(p, kmag, ft); - lb_emit_store(p, lb_emit_struct_ep(p, dst, 3), real); - lb_emit_store(p, lb_emit_struct_ep(p, dst, 0), imag); - lb_emit_store(p, lb_emit_struct_ep(p, dst, 1), jmag); - lb_emit_store(p, lb_emit_struct_ep(p, dst, 2), kmag); + xyzw[0] = lb_emit_conv(p, xyzw[0], ft); + xyzw[1] = lb_emit_conv(p, xyzw[1], ft); + xyzw[2] = lb_emit_conv(p, xyzw[2], ft); + xyzw[3] = lb_emit_conv(p, xyzw[3], ft); + lb_emit_store(p, lb_emit_struct_ep(p, dst, 0), xyzw[0]); + lb_emit_store(p, lb_emit_struct_ep(p, dst, 1), xyzw[1]); + lb_emit_store(p, lb_emit_struct_ep(p, dst, 2), xyzw[2]); + lb_emit_store(p, lb_emit_struct_ep(p, dst, 3), xyzw[3]); return lb_emit_load(p, dst); } -- cgit v1.2.3 From 3bf7b416e72259059a91a785d70b50961f6c41c6 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Fri, 5 Jan 2024 14:36:58 +0000 Subject: Fix `builtin.quaternion` generation --- core/builtin/builtin.odin | 2 +- src/check_builtin.cpp | 126 ++++++++++++++++++++-------------------------- src/llvm_backend_proc.cpp | 2 +- 3 files changed, 56 insertions(+), 74 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/core/builtin/builtin.odin b/core/builtin/builtin.odin index 211db9770..5cba3c8ea 100644 --- a/core/builtin/builtin.odin +++ b/core/builtin/builtin.odin @@ -110,7 +110,7 @@ typeid_of :: proc($T: typeid) -> typeid --- swizzle :: proc(x: [N]T, indices: ..int) -> [len(indices)]T --- complex :: proc(real, imag: Float) -> Complex_Type --- -quaternion :: proc(real, imag, jmag, kmag: Float) -> Quaternion_Type --- +quaternion :: proc(imag, jmag, kmag, real: Float) -> Quaternion_Type --- // fields must be named real :: proc(value: Complex_Or_Quaternion) -> Float --- imag :: proc(value: Complex_Or_Quaternion) -> Float --- jmag :: proc(value: Quaternion) -> Float --- diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 55962f89f..0b3d65f78 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -2328,11 +2328,8 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As break; } - // quaternion :: proc(real, imag, jmag, kmag: float_type) -> complex_type - Operand x = {}; - Operand y = {}; - Operand z = {}; - Operand w = {}; + // quaternion :: proc(imag, jmag, kmag, real: float_type) -> complex_type + Operand xyzw[4] = {}; // NOTE(bill): Invalid will be the default till fixed operand->type = t_invalid; @@ -2356,21 +2353,21 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As u32 style = 0; if (name == "x") { - *index = 1; style = 1; + *index = 0; style = 1; } else if (name == "y") { - *index = 2; style = 1; + *index = 1; style = 1; } else if (name == "z") { - *index = 3; style = 1; + *index = 2; style = 1; } else if (name == "w") { - *index = 0; style = 1; + *index = 3; style = 1; } else if (name == "imag") { - *index = 1; style = 2; + *index = 0; style = 2; } else if (name == "jmag") { - *index = 2; style = 2; + *index = 1; style = 2; } else if (name == "kmag") { - *index = 3; style = 2; + *index = 2; style = 2; } else if (name == "real") { - *index = 0; style = 2; + *index = 3; style = 2; } else { error(field->field, "Unknown name for '%.*s', expected (w, x, y, z; or real, imag, jmag, kmag), got '%.*s'", LIT(builtin_name), LIT(name)); return false; @@ -2385,9 +2382,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As return o->mode != Addressing_Invalid; }; - // TODO(bill): disallow style mixing - - Operand *refs[4] = {&x, &y, &z, &w}; + Operand *refs[4] = {&xyzw[0], &xyzw[1], &xyzw[2], &xyzw[3]}; for (i32 i = 0; i < 4; i++) { i32 index = -1; @@ -2412,57 +2407,40 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As } else { error(call, "'%.*s' requires that all arguments are named (w, x, y, z; or real, imag, jmag, kmag)", LIT(builtin_name)); - check_expr(c, &x, ce->args[0]); - if (x.mode == Addressing_Invalid) { - return false; - } - - check_expr(c, &y, ce->args[1]); - if (y.mode == Addressing_Invalid) { - return false; - } - check_expr(c, &z, ce->args[2]); - if (z.mode == Addressing_Invalid) { - return false; - } - check_expr(c, &w, ce->args[3]); - if (w.mode == Addressing_Invalid) { - return false; + for (i32 i = 0; i < 4; i++) { + check_expr(c, &xyzw[i], ce->args[i]); + if (xyzw[i].mode == Addressing_Invalid) { + return false; + } } } - convert_to_typed(c, &x, y.type); if (x.mode == Addressing_Invalid) return false; - convert_to_typed(c, &y, x.type); if (y.mode == Addressing_Invalid) return false; - convert_to_typed(c, &z, x.type); if (z.mode == Addressing_Invalid) return false; - convert_to_typed(c, &w, x.type); if (w.mode == Addressing_Invalid) return false; + convert_to_typed(c, &xyzw[0], xyzw[1].type); if (xyzw[0].mode == Addressing_Invalid) return false; + convert_to_typed(c, &xyzw[1], xyzw[0].type); if (xyzw[1].mode == Addressing_Invalid) return false; + convert_to_typed(c, &xyzw[2], xyzw[0].type); if (xyzw[2].mode == Addressing_Invalid) return false; + convert_to_typed(c, &xyzw[3], xyzw[0].type); if (xyzw[3].mode == Addressing_Invalid) return false; - if (x.mode == Addressing_Constant && - y.mode == Addressing_Constant && - z.mode == Addressing_Constant && - w.mode == Addressing_Constant) { - x.value = exact_value_to_float(x.value); - y.value = exact_value_to_float(y.value); - z.value = exact_value_to_float(z.value); - w.value = exact_value_to_float(w.value); - if (is_type_numeric(x.type) && x.value.kind == ExactValue_Float) { - x.type = t_untyped_float; - } - if (is_type_numeric(y.type) && y.value.kind == ExactValue_Float) { - y.type = t_untyped_float; - } - if (is_type_numeric(z.type) && z.value.kind == ExactValue_Float) { - z.type = t_untyped_float; + if (xyzw[0].mode == Addressing_Constant && + xyzw[1].mode == Addressing_Constant && + xyzw[2].mode == Addressing_Constant && + xyzw[3].mode == Addressing_Constant) { + for (i32 i = 0; i < 4; i++) { + xyzw[i].value = exact_value_to_float(xyzw[i].value); } - if (is_type_numeric(w.type) && w.value.kind == ExactValue_Float) { - w.type = t_untyped_float; + for (i32 i = 0; i < 4; i++) { + if (is_type_numeric(xyzw[i].type) && xyzw[i].value.kind == ExactValue_Float) { + xyzw[i].type = t_untyped_float; + } } } - if (!(are_types_identical(x.type, y.type) && are_types_identical(x.type, z.type) && are_types_identical(x.type, w.type))) { - gbString tx = type_to_string(x.type); - gbString ty = type_to_string(y.type); - gbString tz = type_to_string(z.type); - gbString tw = type_to_string(w.type); - error(call, "Mismatched types to 'quaternion', '%s' vs '%s' vs '%s' vs '%s'", tx, ty, tz, tw); + if (!(are_types_identical(xyzw[0].type, xyzw[1].type) && + are_types_identical(xyzw[0].type, xyzw[2].type) && + are_types_identical(xyzw[0].type, xyzw[3].type))) { + gbString tx = type_to_string(xyzw[0].type); + gbString ty = type_to_string(xyzw[1].type); + gbString tz = type_to_string(xyzw[2].type); + gbString tw = type_to_string(xyzw[3].type); + error(call, "Mismatched types to 'quaternion', 'x=%s' vs 'y=%s' vs 'z=%s' vs 'w=%s'", tx, ty, tz, tw); gb_string_free(tw); gb_string_free(tz); gb_string_free(ty); @@ -2470,31 +2448,35 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As return false; } - if (!is_type_float(x.type)) { - gbString s = type_to_string(x.type); + if (!is_type_float(xyzw[0].type)) { + gbString s = type_to_string(xyzw[0].type); error(call, "Arguments have type '%s', expected a floating point", s); gb_string_free(s); return false; } - if (is_type_endian_specific(x.type)) { - gbString s = type_to_string(x.type); + if (is_type_endian_specific(xyzw[0].type)) { + gbString s = type_to_string(xyzw[0].type); error(call, "Arguments with a specified endian are not allow, expected a normal floating point, got '%s'", s); gb_string_free(s); return false; } - if (x.mode == Addressing_Constant && y.mode == Addressing_Constant && z.mode == Addressing_Constant && w.mode == Addressing_Constant) { - f64 r = exact_value_to_float(x.value).value_float; - f64 i = exact_value_to_float(y.value).value_float; - f64 j = exact_value_to_float(z.value).value_float; - f64 k = exact_value_to_float(w.value).value_float; + + operand->mode = Addressing_Value; + + if (xyzw[0].mode == Addressing_Constant && + xyzw[1].mode == Addressing_Constant && + xyzw[2].mode == Addressing_Constant && + xyzw[3].mode == Addressing_Constant) { + f64 r = exact_value_to_float(xyzw[3].value).value_float; + f64 i = exact_value_to_float(xyzw[0].value).value_float; + f64 j = exact_value_to_float(xyzw[1].value).value_float; + f64 k = exact_value_to_float(xyzw[2].value).value_float; operand->value = exact_value_quaternion(r, i, j, k); operand->mode = Addressing_Constant; - } else { - operand->mode = Addressing_Value; } - BasicKind kind = core_type(x.type)->Basic.kind; + BasicKind kind = core_type(xyzw[0].type)->Basic.kind; switch (kind) { case Basic_f16: operand->type = t_quaternion64; break; case Basic_f32: operand->type = t_quaternion128; break; diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 3f16d07a5..d4eae84bc 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1845,7 +1845,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu } GB_ASSERT(index >= 0); - xyzw[index] = lb_build_expr(p, ce->args[i]); + xyzw[index] = lb_build_expr(p, f->value); } -- cgit v1.2.3 From f4782157d3b586a88983d3b770c2c0eeb17946d1 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 7 Jan 2024 21:34:44 +0000 Subject: Implement instrumentation pass --- core/runtime/core.odin | 1 + core/runtime/entry_unix.odin | 1 + core/runtime/entry_wasm.odin | 1 + core/runtime/entry_windows.odin | 1 + core/runtime/error_checks.odin | 24 ++++++----- core/runtime/procs_windows_amd64.odin | 1 + core/runtime/procs_windows_i386.odin | 1 + src/check_decl.cpp | 26 ++++++++---- src/entity.cpp | 2 +- src/llvm_backend.cpp | 2 - src/llvm_backend.hpp | 2 +- src/llvm_backend_general.cpp | 13 ++++++ src/llvm_backend_opt.cpp | 77 +++++++++++++++++++++++++++++++++++ src/llvm_backend_proc.cpp | 12 ++++++ 14 files changed, 143 insertions(+), 21 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/core/runtime/core.odin b/core/runtime/core.odin index 2d176f909..e12d9a43d 100644 --- a/core/runtime/core.odin +++ b/core/runtime/core.odin @@ -18,6 +18,7 @@ // This could change at a later date if the all these data structures are // implemented within the compiler rather than in this "preload" file // +//+no-instrumentation package runtime import "core:intrinsics" diff --git a/core/runtime/entry_unix.odin b/core/runtime/entry_unix.odin index 78e545c22..f494a509e 100644 --- a/core/runtime/entry_unix.odin +++ b/core/runtime/entry_unix.odin @@ -1,5 +1,6 @@ //+private //+build linux, darwin, freebsd, openbsd +//+no-instrumentation package runtime import "core:intrinsics" diff --git a/core/runtime/entry_wasm.odin b/core/runtime/entry_wasm.odin index 235d5611b..e7f3f156f 100644 --- a/core/runtime/entry_wasm.odin +++ b/core/runtime/entry_wasm.odin @@ -1,5 +1,6 @@ //+private //+build wasm32, wasm64p32 +//+no-instrumentation package runtime import "core:intrinsics" diff --git a/core/runtime/entry_windows.odin b/core/runtime/entry_windows.odin index a315c1209..277daecca 100644 --- a/core/runtime/entry_windows.odin +++ b/core/runtime/entry_windows.odin @@ -1,5 +1,6 @@ //+private //+build windows +//+no-instrumentation package runtime import "core:intrinsics" diff --git a/core/runtime/error_checks.odin b/core/runtime/error_checks.odin index 9d484979a..ea6333c29 100644 --- a/core/runtime/error_checks.odin +++ b/core/runtime/error_checks.odin @@ -1,5 +1,6 @@ package runtime +@(no_instrumentation) bounds_trap :: proc "contextless" () -> ! { when ODIN_OS == .Windows { windows_trap_array_bounds() @@ -8,6 +9,7 @@ bounds_trap :: proc "contextless" () -> ! { } } +@(no_instrumentation) type_assertion_trap :: proc "contextless" () -> ! { when ODIN_OS == .Windows { windows_trap_type_assertion() @@ -21,7 +23,7 @@ bounds_check_error :: proc "contextless" (file: string, line, column: i32, index if uint(index) < uint(count) { return } - @(cold) + @(cold, no_instrumentation) handle_error :: proc "contextless" (file: string, line, column: i32, index, count: int) -> ! { print_caller_location(Source_Code_Location{file, line, column, ""}) print_string(" Index ") @@ -34,6 +36,7 @@ bounds_check_error :: proc "contextless" (file: string, line, column: i32, index handle_error(file, line, column, index, count) } +@(no_instrumentation) slice_handle_error :: proc "contextless" (file: string, line, column: i32, lo, hi: int, len: int) -> ! { print_caller_location(Source_Code_Location{file, line, column, ""}) print_string(" Invalid slice indices ") @@ -46,6 +49,7 @@ slice_handle_error :: proc "contextless" (file: string, line, column: i32, lo, h bounds_trap() } +@(no_instrumentation) multi_pointer_slice_handle_error :: proc "contextless" (file: string, line, column: i32, lo, hi: int) -> ! { print_caller_location(Source_Code_Location{file, line, column, ""}) print_string(" Invalid slice indices ") @@ -82,7 +86,7 @@ dynamic_array_expr_error :: proc "contextless" (file: string, line, column: i32, if 0 <= low && low <= high && high <= max { return } - @(cold) + @(cold, no_instrumentation) handle_error :: proc "contextless" (file: string, line, column: i32, low, high, max: int) -> ! { print_caller_location(Source_Code_Location{file, line, column, ""}) print_string(" Invalid dynamic array indices ") @@ -103,7 +107,7 @@ matrix_bounds_check_error :: proc "contextless" (file: string, line, column: i32 uint(column_index) < uint(column_count) { return } - @(cold) + @(cold, no_instrumentation) handle_error :: proc "contextless" (file: string, line, column: i32, row_index, column_index, row_count, column_count: int) -> ! { print_caller_location(Source_Code_Location{file, line, column, ""}) print_string(" Matrix indices [") @@ -127,7 +131,7 @@ when ODIN_NO_RTTI { if ok { return } - @(cold) + @(cold, no_instrumentation) handle_error :: proc "contextless" (file: string, line, column: i32) -> ! { print_caller_location(Source_Code_Location{file, line, column, ""}) print_string(" Invalid type assertion\n") @@ -140,7 +144,7 @@ when ODIN_NO_RTTI { if ok { return } - @(cold) + @(cold, no_instrumentation) handle_error :: proc "contextless" (file: string, line, column: i32) -> ! { print_caller_location(Source_Code_Location{file, line, column, ""}) print_string(" Invalid type assertion\n") @@ -153,7 +157,7 @@ when ODIN_NO_RTTI { if ok { return } - @(cold) + @(cold, no_instrumentation) handle_error :: proc "contextless" (file: string, line, column: i32, from, to: typeid) -> ! { print_caller_location(Source_Code_Location{file, line, column, ""}) print_string(" Invalid type assertion from ") @@ -198,7 +202,7 @@ when ODIN_NO_RTTI { return id } - @(cold) + @(cold, no_instrumentation) handle_error :: proc "contextless" (file: string, line, column: i32, from, to: typeid, from_data: rawptr) -> ! { actual := variant_type(from, from_data) @@ -224,7 +228,7 @@ make_slice_error_loc :: #force_inline proc "contextless" (loc := #caller_locatio if 0 <= len { return } - @(cold) + @(cold, no_instrumentation) handle_error :: proc "contextless" (loc: Source_Code_Location, len: int) -> ! { print_caller_location(loc) print_string(" Invalid slice length for make: ") @@ -239,7 +243,7 @@ make_dynamic_array_error_loc :: #force_inline proc "contextless" (loc := #caller if 0 <= len && len <= cap { return } - @(cold) + @(cold, no_instrumentation) handle_error :: proc "contextless" (loc: Source_Code_Location, len, cap: int) -> ! { print_caller_location(loc) print_string(" Invalid dynamic array parameters for make: ") @@ -256,7 +260,7 @@ make_map_expr_error_loc :: #force_inline proc "contextless" (loc := #caller_loca if 0 <= cap { return } - @(cold) + @(cold, no_instrumentation) handle_error :: proc "contextless" (loc: Source_Code_Location, cap: int) -> ! { print_caller_location(loc) print_string(" Invalid map capacity for make: ") diff --git a/core/runtime/procs_windows_amd64.odin b/core/runtime/procs_windows_amd64.odin index e430357be..a30985d5c 100644 --- a/core/runtime/procs_windows_amd64.odin +++ b/core/runtime/procs_windows_amd64.odin @@ -1,4 +1,5 @@ //+private +//+no-instrumentation package runtime foreign import kernel32 "system:Kernel32.lib" diff --git a/core/runtime/procs_windows_i386.odin b/core/runtime/procs_windows_i386.odin index f810197f1..4f606da8f 100644 --- a/core/runtime/procs_windows_i386.odin +++ b/core/runtime/procs_windows_i386.odin @@ -1,4 +1,5 @@ //+private +//+no-instrumentation package runtime @require foreign import "system:int64.lib" diff --git a/src/check_decl.cpp b/src/check_decl.cpp index c69d8185e..a530945f9 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -910,24 +910,24 @@ gb_internal void check_proc_decl(CheckerContext *ctx, Entity *e, DeclInfo *d) { e->Procedure.entry_point_only = ac.entry_point_only; e->Procedure.is_export = ac.is_export; - bool no_instrumentation = false; + bool has_instrumentation = false; if (pl->body == nullptr) { - no_instrumentation = true; + has_instrumentation = false; if (ac.no_instrumentation != Instrumentation_Default) { error(e->token, "@(no_instrumentation) is not allowed on foreign procedures"); } } else { - if (e->file) { - no_instrumentation = (e->file->flags & AstFile_NoInstrumentation) != 0; + AstFile *file = e->token.pos.file_id ? global_files[e->token.pos.file_id] : nullptr; + if (file) { + has_instrumentation = (file->flags & AstFile_NoInstrumentation) == 0; } switch (ac.no_instrumentation) { - case Instrumentation_Enabled: no_instrumentation = false; break; + case Instrumentation_Enabled: has_instrumentation = true; break; case Instrumentation_Default: break; - case Instrumentation_Disabled: no_instrumentation = true; break; + case Instrumentation_Disabled: has_instrumentation = false; break; } } - e->Procedure.no_instrumentation = no_instrumentation; auto const is_valid_instrumentation_call = [](Type *type) -> bool { if (type == nullptr || type->kind != Type_Proc) { @@ -949,6 +949,9 @@ gb_internal void check_proc_decl(CheckerContext *ctx, Entity *e, DeclInfo *d) { if (ac.instrumentation_enter && ac.instrumentation_exit) { error(e->token, "A procedure cannot be marked with both @(instrumentation_enter) and @(instrumentation_exit)"); + + has_instrumentation = false; + e->flags |= EntityFlag_Require; } else if (ac.instrumentation_enter) { if (!is_valid_instrumentation_call(e->type)) { gbString s = type_to_string(e->type); @@ -961,6 +964,9 @@ gb_internal void check_proc_decl(CheckerContext *ctx, Entity *e, DeclInfo *d) { } else { ctx->info->instrumentation_enter_entity = e; } + + has_instrumentation = false; + e->flags |= EntityFlag_Require; } else if (ac.instrumentation_exit) { if (!is_valid_instrumentation_call(e->type)) { gbString s = type_to_string(e->type); @@ -973,8 +979,14 @@ gb_internal void check_proc_decl(CheckerContext *ctx, Entity *e, DeclInfo *d) { } else { ctx->info->instrumentation_exit_entity = e; } + + has_instrumentation = false; + e->flags |= EntityFlag_Require; } + e->Procedure.has_instrumentation = has_instrumentation; + + e->deprecated_message = ac.deprecated_message; e->warning_message = ac.warning_message; ac.link_name = handle_link_name(ctx, e->token, ac.link_name, ac.link_prefix); diff --git a/src/entity.cpp b/src/entity.cpp index 0539386d0..e6c46d37e 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -251,7 +251,7 @@ struct Entity { bool generated_from_polymorphic : 1; bool target_feature_disabled : 1; bool entry_point_only : 1; - bool no_instrumentation : 1; + bool has_instrumentation : 1; String target_feature; } Procedure; struct { diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index b90fd8495..0175d039e 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -1497,8 +1497,6 @@ gb_internal WORKER_TASK_PROC(lb_llvm_module_pass_worker_proc) { auto passes = array_make(heap_allocator(), 0, 64); defer (array_free(&passes)); - - LLVMPassBuilderOptionsRef pb_options = LLVMCreatePassBuilderOptions(); defer (LLVMDisposePassBuilderOptions(pb_options)); diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 4e193bcea..b645d66d6 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -563,7 +563,7 @@ gb_internal LLVMTypeRef OdinLLVMGetVectorElementType(LLVMTypeRef type); gb_internal String lb_filepath_ll_for_module(lbModule *m); - +gb_internal LLVMTypeRef lb_type_internal_for_procedures_raw(lbModule *m, Type *type); gb_internal LLVMTypeRef llvm_array_type(LLVMTypeRef ElementType, uint64_t ElementCount) { #if LB_USE_NEW_PASS_SYSTEM diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 54327cc54..f0f5327c6 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -2348,6 +2348,15 @@ gb_internal LLVMAttributeRef lb_create_enum_attribute(LLVMContextRef ctx, char c return LLVMCreateEnumAttribute(ctx, kind, value); } +gb_internal LLVMAttributeRef lb_create_string_attribute(LLVMContextRef ctx, String const &key, String const &value) { + LLVMAttributeRef attr = LLVMCreateStringAttribute( + ctx, + cast(char const *)key.text, cast(unsigned)key.len, + cast(char const *)value.text, cast(unsigned)value.len); + return attr; +} + + gb_internal void lb_add_proc_attribute_at_index(lbProcedure *p, isize index, char const *name, u64 value) { LLVMAttributeRef attr = lb_create_enum_attribute(p->module->ctx, name, value); GB_ASSERT(attr != nullptr); @@ -2361,6 +2370,10 @@ gb_internal void lb_add_proc_attribute_at_index(lbProcedure *p, isize index, cha gb_internal void lb_add_attribute_to_proc(lbModule *m, LLVMValueRef proc_value, char const *name, u64 value=0) { LLVMAddAttributeAtIndex(proc_value, LLVMAttributeIndex_FunctionIndex, lb_create_enum_attribute(m->ctx, name, value)); } +gb_internal void lb_add_attribute_to_proc_with_string(lbModule *m, LLVMValueRef proc_value, String const &name, String const &value) { + LLVMAttributeRef attr = lb_create_string_attribute(m->ctx, name, value); + LLVMAddAttributeAtIndex(proc_value, LLVMAttributeIndex_FunctionIndex, attr); +} diff --git a/src/llvm_backend_opt.cpp b/src/llvm_backend_opt.cpp index 2e03b7974..d5487d259 100644 --- a/src/llvm_backend_opt.cpp +++ b/src/llvm_backend_opt.cpp @@ -380,6 +380,80 @@ gb_internal void lb_run_remove_dead_instruction_pass(lbProcedure *p) { } } +gb_internal LLVMValueRef lb_run_instrumentation_pass_insert_call(lbProcedure *p, Entity *entity, LLVMBuilderRef dummy_builder) { + lbModule *m = p->module; + + lbValue cc = lb_find_procedure_value_from_entity(m, entity); + + LLVMValueRef args[2] = {}; + args[0] = p->value; + + LLVMValueRef returnaddress_args[1] = {}; + + returnaddress_args[0] = LLVMConstInt(LLVMInt32TypeInContext(m->ctx), 0, false); + + char const *instrinsic_name = "llvm.returnaddress"; + unsigned id = LLVMLookupIntrinsicID(instrinsic_name, gb_strlen(instrinsic_name)); + GB_ASSERT_MSG(id != 0, "Unable to find %s", instrinsic_name); + LLVMValueRef ip = LLVMGetIntrinsicDeclaration(m->mod, id, nullptr, 0); + LLVMTypeRef call_type = LLVMIntrinsicGetType(m->ctx, id, nullptr, 0); + args[1] = LLVMBuildCall2(dummy_builder, call_type, ip, returnaddress_args, gb_count_of(returnaddress_args), ""); + + LLVMTypeRef fnp = lb_type_internal_for_procedures_raw(p->module, entity->type); + return LLVMBuildCall2(dummy_builder, fnp, cc.value, args, 2, ""); +} + + +gb_internal void lb_run_instrumentation_pass(lbProcedure *p) { + lbModule *m = p->module; + Entity *enter = m->info->instrumentation_enter_entity; + Entity *exit = m->info->instrumentation_exit_entity; + if (enter == nullptr || exit == nullptr) { + return; + } + if (!(p->entity && + p->entity->kind == Entity_Procedure && + p->entity->Procedure.has_instrumentation)) { + return; + } + +#define LLVM_V_NAME(x) x, cast(unsigned)(gb_count_of(x)-1) + + LLVMBuilderRef dummy_builder = LLVMCreateBuilderInContext(m->ctx); + defer (LLVMDisposeBuilder(dummy_builder)); + + LLVMBasicBlockRef entry_bb = p->entry_block->block; + LLVMPositionBuilder(dummy_builder, entry_bb, LLVMGetFirstInstruction(entry_bb)); + lb_run_instrumentation_pass_insert_call(p, enter, dummy_builder); + LLVMRemoveStringAttributeAtIndex(p->value, LLVMAttributeIndex_FunctionIndex, LLVM_V_NAME("instrument-function-entry")); + + unsigned bb_count = LLVMCountBasicBlocks(p->value); + LLVMBasicBlockRef *bbs = gb_alloc_array(temporary_allocator(), LLVMBasicBlockRef, bb_count); + LLVMGetBasicBlocks(p->value, bbs); + for (unsigned i = 0; i < bb_count; i++) { + LLVMBasicBlockRef bb = bbs[i]; + LLVMValueRef terminator = LLVMGetBasicBlockTerminator(bb); + if (terminator == nullptr || + !LLVMIsAReturnInst(terminator)) { + continue; + } + + // TODO(bill): getTerminatingMustTailCall() + // If T is preceded by a musttail call, that's the real terminator. + // if (CallInst *CI = BB.getTerminatingMustTailCall()) + // T = CI; + + + LLVMPositionBuilderBefore(dummy_builder, terminator); + lb_run_instrumentation_pass_insert_call(p, exit, dummy_builder); + } + + LLVMRemoveStringAttributeAtIndex(p->value, LLVMAttributeIndex_FunctionIndex, LLVM_V_NAME("instrument-function-exit")); + +#undef LLVM_V_NAME +} + + gb_internal void lb_run_function_pass_manager(LLVMPassManagerRef fpm, lbProcedure *p, lbFunctionPassManagerKind pass_manager_kind) { if (p == nullptr) { @@ -401,6 +475,7 @@ gb_internal void lb_run_function_pass_manager(LLVMPassManagerRef fpm, lbProcedur } break; } + lb_run_instrumentation_pass(p); LLVMRunFunctionPassManager(fpm, p->value); } @@ -552,3 +627,5 @@ gb_internal void lb_run_remove_unused_globals_pass(lbModule *m) { } } } + + diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index d4eae84bc..09bebd0cf 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -329,6 +329,18 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i } } + if (p->body && entity->Procedure.has_instrumentation) { + Entity *instrumentation_enter = m->info->instrumentation_enter_entity; + Entity *instrumentation_exit = m->info->instrumentation_exit_entity; + if (instrumentation_enter && instrumentation_exit) { + String enter = lb_get_entity_name(m, instrumentation_enter); + String exit = lb_get_entity_name(m, instrumentation_exit); + + lb_add_attribute_to_proc_with_string(m, p->value, make_string_c("instrument-function-entry"), enter); + lb_add_attribute_to_proc_with_string(m, p->value, make_string_c("instrument-function-exit"), exit); + } + } + lbValue proc_value = {p->value, p->type}; lb_add_entity(m, entity, proc_value); lb_add_member(m, p->name, proc_value); -- cgit v1.2.3 From c1d853a24e69689a40668c4aa036312bc871540c Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 28 Jan 2024 17:32:34 +0000 Subject: Remove dead code --- core/runtime/internal.odin | 74 -------------------------------------------- src/checker.cpp | 10 ++++-- src/llvm_backend_proc.cpp | 6 ++-- src/llvm_backend_utility.cpp | 26 ++++------------ 4 files changed, 16 insertions(+), 100 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/core/runtime/internal.odin b/core/runtime/internal.odin index d4c43ed7e..a03c2a701 100644 --- a/core/runtime/internal.odin +++ b/core/runtime/internal.odin @@ -22,50 +22,6 @@ byte_slice :: #force_inline proc "contextless" (data: rawptr, len: int) -> []byt return ([^]byte)(data)[:max(len, 0)] } -bswap_16 :: proc "contextless" (x: u16) -> u16 { - return x>>8 | x<<8 -} - -bswap_32 :: proc "contextless" (x: u32) -> u32 { - return x>>24 | (x>>8)&0xff00 | (x<<8)&0xff0000 | x<<24 -} - -bswap_64 :: proc "contextless" (x: u64) -> u64 { - z := x - z = (z & 0x00000000ffffffff) << 32 | (z & 0xffffffff00000000) >> 32 - z = (z & 0x0000ffff0000ffff) << 16 | (z & 0xffff0000ffff0000) >> 16 - z = (z & 0x00ff00ff00ff00ff) << 8 | (z & 0xff00ff00ff00ff00) >> 8 - return z -} - -bswap_128 :: proc "contextless" (x: u128) -> u128 { - z := transmute([4]u32)x - z[0], z[3] = bswap_32(z[3]), bswap_32(z[0]) - z[1], z[2] = bswap_32(z[2]), bswap_32(z[1]) - return transmute(u128)z -} - -bswap_f16 :: proc "contextless" (f: f16) -> f16 { - x := transmute(u16)f - z := bswap_16(x) - return transmute(f16)z - -} - -bswap_f32 :: proc "contextless" (f: f32) -> f32 { - x := transmute(u32)f - z := bswap_32(x) - return transmute(f32)z - -} - -bswap_f64 :: proc "contextless" (f: f64) -> f64 { - x := transmute(u64)f - z := bswap_64(x) - return transmute(f64)z -} - - is_power_of_two_int :: #force_inline proc(x: int) -> bool { if x <= 0 { return false @@ -608,36 +564,6 @@ string_decode_last_rune :: proc "contextless" (s: string) -> (rune, int) { return r, size } - -abs_f16 :: #force_inline proc "contextless" (x: f16) -> f16 { - return -x if x < 0 else x -} -abs_f32 :: #force_inline proc "contextless" (x: f32) -> f32 { - return -x if x < 0 else x -} -abs_f64 :: #force_inline proc "contextless" (x: f64) -> f64 { - return -x if x < 0 else x -} - -min_f16 :: #force_inline proc "contextless" (a, b: f16) -> f16 { - return a if a < b else b -} -min_f32 :: #force_inline proc "contextless" (a, b: f32) -> f32 { - return a if a < b else b -} -min_f64 :: #force_inline proc "contextless" (a, b: f64) -> f64 { - return a if a < b else b -} -max_f16 :: #force_inline proc "contextless" (a, b: f16) -> f16 { - return a if a > b else b -} -max_f32 :: #force_inline proc "contextless" (a, b: f32) -> f32 { - return a if a > b else b -} -max_f64 :: #force_inline proc "contextless" (a, b: f64) -> f64 { - return a if a > b else b -} - abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 { p, q := abs(real(x)), abs(imag(x)) if p < q { diff --git a/src/checker.cpp b/src/checker.cpp index 4d7514d0b..498fce7d2 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -2517,13 +2517,11 @@ gb_internal void generate_minimum_dependency_set(Checker *c, Entity *start) { // Odin internal procedures str_lit("__init_context"), - str_lit("cstring_to_string"), + // str_lit("cstring_to_string"), str_lit("_cleanup_runtime"), // Pseudo-CRT required procedures str_lit("memset"), - str_lit("memcpy"), - str_lit("memmove"), // Utility procedures str_lit("memory_equal"), @@ -2531,6 +2529,12 @@ gb_internal void generate_minimum_dependency_set(Checker *c, Entity *start) { str_lit("memory_compare_zero"), ); + // Only required if no CRT is present + FORCE_ADD_RUNTIME_ENTITIES(build_context.no_crt, + str_lit("memcpy"), + str_lit("memmove"), + ); + FORCE_ADD_RUNTIME_ENTITIES(!build_context.tilde_backend, // Extended data type internal procedures str_lit("umodti3"), diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 09bebd0cf..e0aca2c10 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2033,9 +2033,9 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu case BuiltinProc_clamp: return lb_emit_clamp(p, type_of_expr(expr), - lb_build_expr(p, ce->args[0]), - lb_build_expr(p, ce->args[1]), - lb_build_expr(p, ce->args[2])); + lb_build_expr(p, ce->args[0]), + lb_build_expr(p, ce->args[1]), + lb_build_expr(p, ce->args[2])); case BuiltinProc_soa_zip: diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index be3ae9c8a..bc5106601 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -83,27 +83,13 @@ gb_internal LLVMValueRef lb_mem_zero_ptr_internal(lbProcedure *p, LLVMValueRef p lb_type(p->module, t_rawptr), lb_type(p->module, t_int) }; - if (true || is_inlinable) { + LLVMValueRef args[4] = {}; + args[0] = LLVMBuildPointerCast(p->builder, ptr, types[0], ""); + args[1] = LLVMConstInt(LLVMInt8TypeInContext(p->module->ctx), 0, false); + args[2] = LLVMBuildIntCast2(p->builder, len, types[1], /*signed*/false, ""); + args[3] = LLVMConstInt(LLVMInt1TypeInContext(p->module->ctx), is_volatile, false); - LLVMValueRef args[4] = {}; - args[0] = LLVMBuildPointerCast(p->builder, ptr, types[0], ""); - args[1] = LLVMConstInt(LLVMInt8TypeInContext(p->module->ctx), 0, false); - args[2] = LLVMBuildIntCast2(p->builder, len, types[1], /*signed*/false, ""); - args[3] = LLVMConstInt(LLVMInt1TypeInContext(p->module->ctx), is_volatile, false); - - return lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); - } else { - lbValue pr = lb_lookup_runtime_procedure(p->module, str_lit("memset")); - - LLVMValueRef args[3] = {}; - args[0] = LLVMBuildPointerCast(p->builder, ptr, types[0], ""); - args[1] = LLVMConstInt(LLVMInt32TypeInContext(p->module->ctx), 0, false); - args[2] = LLVMBuildIntCast2(p->builder, len, types[1], /*signed*/false, ""); - - // We always get the function pointer type rather than the function and there is apparently no way around that? - LLVMTypeRef type = lb_type_internal_for_procedures_raw(p->module, pr.type); - return LLVMBuildCall2(p->builder, type, pr.value, args, gb_count_of(args), ""); - } + return lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); } -- cgit v1.2.3 From 5c4485f65767366c14dfd9a98945a5479ae0e449 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Fri, 9 Feb 2024 15:18:29 +0000 Subject: Add `#load_directory(path: string) > []runtime.Load_Directory_File` --- base/runtime/core.odin | 8 ++ src/check_builtin.cpp | 187 +++++++++++++++++++++++++++++++++------------- src/check_expr.cpp | 5 +- src/checker.cpp | 15 ++++ src/checker.hpp | 18 +++++ src/llvm_backend_proc.cpp | 67 +++++++++++++---- src/string.cpp | 12 +++ src/types.cpp | 4 + 8 files changed, 247 insertions(+), 69 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/base/runtime/core.odin b/base/runtime/core.odin index fbdf33085..85e64242d 100644 --- a/base/runtime/core.odin +++ b/base/runtime/core.odin @@ -296,6 +296,14 @@ Source_Code_Location :: struct { procedure: string, } +/* + Used by the built-in directory `#load_directory(path: string) -> []Load_Directory_File` +*/ +Load_Directory_File :: struct { + name: string, + data: []byte, // immutable data +} + Assertion_Failure_Proc :: #type proc(prefix, message: string, loc: Source_Code_Location) -> ! // Allocation Stuff diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 4e374add6..d39be37a9 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -1264,6 +1264,139 @@ gb_internal LoadDirectiveResult check_load_directive(CheckerContext *c, Operand } +gb_internal int file_cache_sort_cmp(void const *x, void const *y) { + LoadFileCache const *a = *(LoadFileCache const **)(x); + LoadFileCache const *b = *(LoadFileCache const **)(y); + return string_compare(a->path, b->path); +} + +gb_internal LoadDirectiveResult check_load_directory_directive(CheckerContext *c, Operand *operand, Ast *call, Type *type_hint, bool err_on_not_found) { + ast_node(ce, CallExpr, call); + ast_node(bd, BasicDirective, ce->proc); + String name = bd->name.string; + GB_ASSERT(name == "load_directory"); + + if (ce->args.count != 1) { + error(ce->args[0], "'#%.*s' expects 1 argument, got %td", LIT(name), ce->args.count); + return LoadDirective_Error; + } + + Ast *arg = ce->args[0]; + Operand o = {}; + check_expr(c, &o, arg); + if (o.mode != Addressing_Constant) { + error(arg, "'#%.*s' expected a constant string argument", LIT(name)); + return LoadDirective_Error; + } + + if (!is_type_string(o.type)) { + gbString str = type_to_string(o.type); + error(arg, "'#%.*s' expected a constant string, got %s", LIT(name), str); + gb_string_free(str); + return LoadDirective_Error; + } + + GB_ASSERT(o.value.kind == ExactValue_String); + + init_core_load_directory_file(c->checker); + + operand->type = t_load_directory_file_slice; + operand->mode = Addressing_Value; + + + String original_string = o.value.value_string; + String path; + if (gb_path_is_absolute((char*)original_string.text)) { + path = original_string; + } else { + String base_dir = dir_from_path(get_file_path_string(call->file_id)); + + BlockingMutex *ignore_mutex = nullptr; + bool ok = determine_path_from_string(ignore_mutex, call, base_dir, original_string, &path); + gb_unused(ok); + } + MUTEX_GUARD(&c->info->load_directory_mutex); + + + gbFileError file_error = gbFileError_None; + + Array file_caches = {}; + + LoadDirectoryCache **cache_ptr = string_map_get(&c->info->load_directory_cache, path); + LoadDirectoryCache *cache = cache_ptr ? *cache_ptr : nullptr; + if (cache) { + file_error = cache->file_error; + } + defer ({ + if (cache == nullptr) { + LoadDirectoryCache *new_cache = gb_alloc_item(permanent_allocator(), LoadDirectoryCache); + new_cache->path = path; + new_cache->files = file_caches; + new_cache->file_error = file_error; + string_map_set(&c->info->load_directory_cache, path, new_cache); + + map_set(&c->info->load_directory_map, call, new_cache); + } else { + cache->file_error = file_error; + } + }); + + + LoadDirectiveResult result = LoadDirective_Success; + + + if (cache == nullptr) { + Array list = {}; + ReadDirectoryError rd_err = read_directory(path, &list); + defer (array_free(&list)); + + if (list.count == 1) { + GB_ASSERT(path != list[0].fullpath); + } + + + switch (rd_err) { + case ReadDirectory_InvalidPath: + error(call, "%.*s error - invalid path: %.*s", LIT(name), LIT(original_string)); + return LoadDirective_NotFound; + case ReadDirectory_NotExists: + error(call, "%.*s error - path does not exist: %.*s", LIT(name), LIT(original_string)); + return LoadDirective_NotFound; + case ReadDirectory_Permission: + error(call, "%.*s error - unknown error whilst reading path, %.*s", LIT(name), LIT(original_string)); + return LoadDirective_Error; + case ReadDirectory_NotDir: + error(call, "%.*s error - expected a directory, got a file: %.*s", LIT(name), LIT(original_string)); + return LoadDirective_Error; + case ReadDirectory_Empty: + error(call, "%.*s error - empty directory: %.*s", LIT(name), LIT(original_string)); + return LoadDirective_NotFound; + case ReadDirectory_Unknown: + error(call, "%.*s error - unknown error whilst reading path %.*s", LIT(name), LIT(original_string)); + return LoadDirective_Error; + } + + isize files_to_reserve = list.count+1; // always reserve 1 + + file_caches = array_make(heap_allocator(), 0, files_to_reserve); + + for (FileInfo fi : list) { + LoadFileCache *cache = nullptr; + if (cache_load_file_directive(c, call, fi.fullpath, err_on_not_found, &cache)) { + array_add(&file_caches, cache); + } else { + result = LoadDirective_Error; + } + } + + gb_sort_array(file_caches.data, file_caches.count, file_cache_sort_cmp); + + } + + return result; +} + + gb_internal bool check_builtin_procedure_directive(CheckerContext *c, Operand *operand, Ast *call, Type *type_hint) { ast_node(ce, CallExpr, call); @@ -1291,6 +1424,8 @@ gb_internal bool check_builtin_procedure_directive(CheckerContext *c, Operand *o operand->mode = Addressing_Value; } else if (name == "load") { return check_load_directive(c, operand, call, type_hint, true) == LoadDirective_Success; + } else if (name == "load_directory") { + return check_load_directory_directive(c, operand, call, type_hint, true) == LoadDirective_Success; } else if (name == "load_hash") { if (ce->args.count != 2) { if (ce->args.count == 0) { @@ -1408,58 +1543,6 @@ gb_internal bool check_builtin_procedure_directive(CheckerContext *c, Operand *o return true; } return false; - } else if (name == "load_or") { - error(call, "'#load_or' has now been removed in favour of '#load(path) or_else default'"); - - if (ce->args.count != 2) { - if (ce->args.count == 0) { - error(ce->close, "'#load_or' expects 2 arguments, got 0"); - } else { - error(ce->args[0], "'#load_or' expects 2 arguments, got %td", ce->args.count); - } - return false; - } - - Ast *arg = ce->args[0]; - Operand o = {}; - check_expr(c, &o, arg); - if (o.mode != Addressing_Constant) { - error(arg, "'#load_or' expected a constant string argument"); - return false; - } - - if (!is_type_string(o.type)) { - gbString str = type_to_string(o.type); - error(arg, "'#load_or' expected a constant string, got %s", str); - gb_string_free(str); - return false; - } - - Ast *default_arg = ce->args[1]; - Operand default_op = {}; - check_expr_with_type_hint(c, &default_op, default_arg, t_u8_slice); - if (default_op.mode != Addressing_Constant) { - error(arg, "'#load_or' expected a constant '[]byte' argument"); - return false; - } - - if (!are_types_identical(base_type(default_op.type), t_u8_slice)) { - gbString str = type_to_string(default_op.type); - error(arg, "'#load_or' expected a constant '[]byte', got %s", str); - gb_string_free(str); - return false; - } - GB_ASSERT(o.value.kind == ExactValue_String); - String original_string = o.value.value_string; - - operand->type = t_u8_slice; - operand->mode = Addressing_Constant; - LoadFileCache *cache = nullptr; - if (cache_load_file_directive(c, call, original_string, false, &cache)) { - operand->value = exact_value_string(cache->data); - } else { - operand->value = default_op.value; - } } else if (name == "assert") { if (ce->args.count != 1 && ce->args.count != 2) { error(call, "'#assert' expects either 1 or 2 arguments, got %td", ce->args.count); diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 9b71208cd..11eb4b533 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -7107,8 +7107,8 @@ gb_internal ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *c name == "defined" || name == "config" || name == "load" || - name == "load_hash" || - name == "load_or" + name == "load_directory" || + name == "load_hash" ) { operand->mode = Addressing_Builtin; operand->builtin_id = BuiltinProc_DIRECTIVE; @@ -7958,6 +7958,7 @@ gb_internal ExprKind check_basic_directive_expr(CheckerContext *c, Operand *o, A name == "config" || name == "load" || name == "load_hash" || + name == "load_directory" || name == "load_or" ) { error(node, "'#%.*s' must be used as a call", LIT(name)); diff --git a/src/checker.cpp b/src/checker.cpp index 457ee6146..569a3c76f 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1257,6 +1257,9 @@ gb_internal void init_checker_info(CheckerInfo *i) { mpsc_init(&i->required_global_variable_queue, a); // 1<<10); mpsc_init(&i->required_foreign_imports_through_force_queue, a); // 1<<10); mpsc_init(&i->intrinsics_entry_point_usage, a); // 1<<10); // just waste some memory here, even if it probably never used + + string_map_init(&i->load_directory_cache); + map_init(&i->load_directory_map); } gb_internal void destroy_checker_info(CheckerInfo *i) { @@ -1280,6 +1283,8 @@ gb_internal void destroy_checker_info(CheckerInfo *i) { map_destroy(&i->objc_msgSend_types); string_map_destroy(&i->load_file_cache); + string_map_destroy(&i->load_directory_cache); + map_destroy(&i->load_directory_map); } gb_internal CheckerContext make_checker_context(Checker *c) { @@ -2958,6 +2963,16 @@ gb_internal void init_core_source_code_location(Checker *c) { t_source_code_location_ptr = alloc_type_pointer(t_source_code_location); } +gb_internal void init_core_load_directory_file(Checker *c) { + if (t_load_directory_file != nullptr) { + return; + } + t_load_directory_file = find_core_type(c, str_lit("Load_Directory_File")); + t_load_directory_file_ptr = alloc_type_pointer(t_load_directory_file); + t_load_directory_file_slice = alloc_type_slice(t_load_directory_file); +} + + gb_internal void init_core_map_type(Checker *c) { if (t_map_info != nullptr) { return; diff --git a/src/checker.hpp b/src/checker.hpp index 9da0f2950..9aee82257 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -340,6 +340,19 @@ struct LoadFileCache { StringMap hashes; }; + +struct LoadDirectoryFile { + String file_name; + String data; +}; + +struct LoadDirectoryCache { + String path; + gbFileError file_error; + Array files; +}; + + struct GenProcsData { Array procs; RwMutex mutex; @@ -416,6 +429,11 @@ struct CheckerInfo { BlockingMutex instrumentation_mutex; Entity *instrumentation_enter_entity; Entity *instrumentation_exit_entity; + + + BlockingMutex load_directory_mutex; + StringMap load_directory_cache; + PtrMap load_directory_map; // Key: Ast_CallExpr * }; struct CheckerContext { diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index e0aca2c10..9419f9a3c 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1693,24 +1693,61 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu case BuiltinProc_DIRECTIVE: { ast_node(bd, BasicDirective, ce->proc); String name = bd->name.string; - GB_ASSERT(name == "location"); - String procedure = p->entity->token.string; - TokenPos pos = ast_token(ce->proc).pos; - if (ce->args.count > 0) { - Ast *ident = unselector_expr(ce->args[0]); - GB_ASSERT(ident->kind == Ast_Ident); - Entity *e = entity_of_node(ident); - GB_ASSERT(e != nullptr); - - if (e->parent_proc_decl != nullptr && e->parent_proc_decl->entity != nullptr) { - procedure = e->parent_proc_decl->entity->token.string; - } else { - procedure = str_lit(""); + if (name == "location") { + String procedure = p->entity->token.string; + TokenPos pos = ast_token(ce->proc).pos; + if (ce->args.count > 0) { + Ast *ident = unselector_expr(ce->args[0]); + GB_ASSERT(ident->kind == Ast_Ident); + Entity *e = entity_of_node(ident); + GB_ASSERT(e != nullptr); + + if (e->parent_proc_decl != nullptr && e->parent_proc_decl->entity != nullptr) { + procedure = e->parent_proc_decl->entity->token.string; + } else { + procedure = str_lit(""); + } + pos = e->token.pos; + } - pos = e->token.pos; + return lb_emit_source_code_location_as_global(p, procedure, pos); + } else if (name == "load_directory") { + lbModule *m = p->module; + TEMPORARY_ALLOCATOR_GUARD(); + LoadDirectoryCache *cache = map_must_get(&m->info->load_directory_map, expr); + isize count = cache->files.count; + + LLVMValueRef *elements = gb_alloc_array(temporary_allocator(), LLVMValueRef, count); + for_array(i, cache->files) { + LoadFileCache *file = cache->files[i]; + String file_name = filename_without_directory(file->path); + + LLVMValueRef values[2] = {}; + values[0] = lb_const_string(m, file_name).value; + values[1] = lb_const_string(m, file->data).value; + LLVMValueRef element = llvm_const_named_struct(m, t_load_directory_file, values, gb_count_of(values)); + elements[i] = element; + } + + LLVMValueRef backing_array = llvm_const_array(lb_type(m, t_load_directory_file), elements, count); + + Type *array_type = alloc_type_array(t_load_directory_file, count); + lbAddr backing_array_addr = lb_add_global_generated(m, array_type, {backing_array, array_type}, nullptr); + lb_make_global_private_const(backing_array_addr); + + LLVMValueRef backing_array_ptr = backing_array_addr.addr.value; + backing_array_ptr = LLVMConstPointerCast(backing_array_ptr, lb_type(m, t_load_directory_file_ptr)); + + LLVMValueRef const_slice = llvm_const_slice_internal(m, backing_array_ptr, LLVMConstInt(lb_type(m, t_int), count, false)); + + lbAddr addr = lb_add_global_generated(p->module, tv.type, {const_slice, t_load_directory_file_slice}, nullptr); + lb_make_global_private_const(addr); + + return lb_addr_load(p, addr); + } else { + GB_PANIC("UNKNOWN DIRECTIVE: %.*s", LIT(name)); } - return lb_emit_source_code_location_as_global(p, procedure, pos); } case BuiltinProc_type_info_of: { diff --git a/src/string.cpp b/src/string.cpp index 9fb933b1b..bd703b2a6 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -293,6 +293,18 @@ gb_internal String filename_from_path(String s) { return make_string(nullptr, 0); } + +gb_internal String filename_without_directory(String s) { + isize j = 0; + for (j = s.len-1; j >= 0; j--) { + if (s[j] == '/' || + s[j] == '\\') { + break; + } + } + return substring(s, gb_max(j+1, 0), s.len); +} + gb_internal String concatenate_strings(gbAllocator a, String const &x, String const &y) { isize len = x.len+y.len; u8 *data = gb_alloc_array(a, u8, len+1); diff --git a/src/types.cpp b/src/types.cpp index c4b03c967..8275b87ba 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -679,6 +679,10 @@ gb_global Type *t_allocator_error = nullptr; gb_global Type *t_source_code_location = nullptr; gb_global Type *t_source_code_location_ptr = nullptr; +gb_global Type *t_load_directory_file = nullptr; +gb_global Type *t_load_directory_file_ptr = nullptr; +gb_global Type *t_load_directory_file_slice = nullptr; + gb_global Type *t_map_info = nullptr; gb_global Type *t_map_cell_info = nullptr; gb_global Type *t_raw_map = nullptr; -- cgit v1.2.3 From f6f3a760bcdbad183a4141738b19779e88ed7dfc Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 21 Feb 2024 22:05:11 +0100 Subject: Promote types in `#c_varargs` according to C rules --- src/llvm_backend_proc.cpp | 4 ++-- src/types.cpp | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 9419f9a3c..4a981277d 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -3361,9 +3361,9 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { for (Ast *var_arg : variadic) { lbValue arg = lb_build_expr(p, var_arg); if (is_type_any(elem_type)) { - array_add(&args, lb_emit_conv(p, arg, default_type(arg.type))); + array_add(&args, lb_emit_conv(p, arg, c_vararg_promote_type(default_type(arg.type)))); } else { - array_add(&args, lb_emit_conv(p, arg, elem_type)); + array_add(&args, lb_emit_conv(p, arg, c_vararg_promote_type(elem_type))); } } break; diff --git a/src/types.cpp b/src/types.cpp index 2f1994574..c31b6e1bd 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -548,6 +548,14 @@ gb_global Type *t_f16 = &basic_types[Basic_f16]; gb_global Type *t_f32 = &basic_types[Basic_f32]; gb_global Type *t_f64 = &basic_types[Basic_f64]; +gb_global Type *t_f16be = &basic_types[Basic_f16be]; +gb_global Type *t_f32be = &basic_types[Basic_f32be]; +gb_global Type *t_f64be = &basic_types[Basic_f64be]; + +gb_global Type *t_f16le = &basic_types[Basic_f16le]; +gb_global Type *t_f32le = &basic_types[Basic_f32le]; +gb_global Type *t_f64le = &basic_types[Basic_f64le]; + gb_global Type *t_complex32 = &basic_types[Basic_complex32]; gb_global Type *t_complex64 = &basic_types[Basic_complex64]; gb_global Type *t_complex128 = &basic_types[Basic_complex128]; @@ -2795,6 +2803,44 @@ gb_internal Type *default_type(Type *type) { return type; } +// See https://en.cppreference.com/w/c/language/conversion#Default_argument_promotions +gb_internal Type *c_vararg_promote_type(Type *type) { + GB_ASSERT(type != nullptr); + + Type *core = core_type(type); + if (core->kind == Type_Basic) { + switch (core->Basic.kind) { + case Basic_f32: + case Basic_UntypedFloat: + return t_f64; + case Basic_f32le: + return t_f64le; + case Basic_f32be: + return t_f64be; + + case Basic_UntypedBool: + case Basic_bool: + case Basic_b8: + case Basic_b16: + case Basic_i8: + case Basic_i16: + case Basic_u8: + case Basic_u16: + return t_i32; + + case Basic_i16le: + case Basic_u16le: + return t_i32le; + + case Basic_i16be: + case Basic_u16be: + return t_i32be; + } + } + + return type; +} + gb_internal bool union_variant_index_types_equal(Type *v, Type *vt) { if (are_types_identical(v, vt)) { return true; -- cgit v1.2.3 From 5107bdc06b7c1c8d02caef3e270e904218d82911 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 27 Feb 2024 15:45:53 +0000 Subject: Make `lb_type_info` use a procedure to load the global value --- src/llvm_backend.cpp | 2 +- src/llvm_backend.hpp | 2 +- src/llvm_backend_proc.cpp | 2 +- src/llvm_backend_stmt.cpp | 2 +- src/llvm_backend_type.cpp | 13 +++++-------- 5 files changed, 9 insertions(+), 12 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index a8d2df181..efba19f23 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -1164,7 +1164,7 @@ gb_internal lbProcedure *lb_create_startup_runtime(lbModule *main_module, lbProc lbValue data = lb_emit_struct_ep(p, var.var, 0); lbValue ti = lb_emit_struct_ep(p, var.var, 1); lb_emit_store(p, data, lb_emit_conv(p, gp, t_rawptr)); - lb_emit_store(p, ti, lb_type_info(main_module, var_type)); + lb_emit_store(p, ti, lb_type_info(p, var_type)); } else { LLVMTypeRef vt = llvm_addr_type(p->module, var.var); lbValue src0 = lb_emit_conv(p, var.init, t); diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 48e1c87c6..741557efd 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -485,7 +485,7 @@ gb_internal lbValue lb_emit_mul_add(lbProcedure *p, lbValue a, lbValue b, lbValu gb_internal void lb_fill_slice(lbProcedure *p, lbAddr const &slice, lbValue base_elem, lbValue len); -gb_internal lbValue lb_type_info(lbModule *m, Type *type); +gb_internal lbValue lb_type_info(lbProcedure *p, Type *type); gb_internal lbValue lb_find_or_add_entity_string(lbModule *m, String const &str); gb_internal lbValue lb_generate_anonymous_proc_lit(lbModule *m, String const &prefix_name, Ast *expr, lbProcedure *parent = nullptr); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 9419f9a3c..13b0171e4 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1755,7 +1755,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu TypeAndValue tav = type_and_value_of_expr(arg); if (tav.mode == Addressing_Type) { Type *t = default_type(type_of_expr(arg)); - return lb_type_info(p->module, t); + return lb_type_info(p, t); } GB_ASSERT(is_type_typeid(tav.type)); diff --git a/src/llvm_backend_stmt.cpp b/src/llvm_backend_stmt.cpp index 388b512b2..0de9c0bf9 100644 --- a/src/llvm_backend_stmt.cpp +++ b/src/llvm_backend_stmt.cpp @@ -748,7 +748,7 @@ gb_internal void lb_build_range_enum(lbProcedure *p, Type *enum_type, Type *val_ i64 enum_count = t->Enum.fields.count; lbValue max_count = lb_const_int(m, t_int, enum_count); - lbValue ti = lb_type_info(m, t); + lbValue ti = lb_type_info(p, t); lbValue variant = lb_emit_struct_ep(p, ti, 4); lbValue eti_ptr = lb_emit_conv(p, variant, t_type_info_enum_ptr); lbValue values = lb_emit_load(p, lb_emit_struct_ep(p, eti_ptr, 2)); diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index 881ac3119..aec1fb201 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -111,22 +111,19 @@ gb_internal lbValue lb_typeid(lbModule *m, Type *type) { return res; } -gb_internal lbValue lb_type_info(lbModule *m, Type *type) { +gb_internal lbValue lb_type_info(lbProcedure *p, Type *type) { GB_ASSERT(!build_context.no_rtti); type = default_type(type); + lbModule *m = p->module; isize index = lb_type_info_index(m->info, type); GB_ASSERT(index >= 0); - LLVMValueRef global = lb_global_type_info_data_ptr(m).value; + lbValue global = lb_global_type_info_data_ptr(m); - LLVMValueRef global_array = LLVMGetInitializer(global); - LLVMValueRef index_value = LLVMConstInt(lb_type(m, t_int), index, false); - lbValue res = {}; - res.value = LLVMConstPointerCast(LLVMConstExtractElement(global_array, index_value), lb_type(m, t_type_info_ptr)); - res.type = t_type_info_ptr; - return res; + lbValue ptr = lb_emit_array_epi(p, global, index); + return lb_emit_load(p, ptr); } gb_internal LLVMTypeRef lb_get_procedure_raw_type(lbModule *m, Type *type) { -- cgit v1.2.3 From a7bab89c934d181ddbfa6e17103b2587581ee5e9 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 6 Mar 2024 15:07:21 +0000 Subject: Unify min/max semantics for simd_(min|max) --- src/llvm_backend_proc.cpp | 6 ++---- src/llvm_backend_utility.cpp | 6 ++++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 2c94222cf..fba7eb381 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1399,8 +1399,7 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn return res; case BuiltinProc_simd_min: if (is_float) { - LLVMValueRef cond = LLVMBuildFCmp(p->builder, LLVMRealOLT, arg0.value, arg1.value, ""); - res.value = LLVMBuildSelect(p->builder, cond, arg0.value, arg1.value, ""); + return lb_emit_min(p, res.type, arg0, arg1); } else { LLVMValueRef cond = LLVMBuildICmp(p->builder, is_signed ? LLVMIntSLT : LLVMIntULT, arg0.value, arg1.value, ""); res.value = LLVMBuildSelect(p->builder, cond, arg0.value, arg1.value, ""); @@ -1408,8 +1407,7 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn return res; case BuiltinProc_simd_max: if (is_float) { - LLVMValueRef cond = LLVMBuildFCmp(p->builder, LLVMRealOGT, arg0.value, arg1.value, ""); - res.value = LLVMBuildSelect(p->builder, cond, arg0.value, arg1.value, ""); + return lb_emit_max(p, res.type, arg0, arg1); } else { LLVMValueRef cond = LLVMBuildICmp(p->builder, is_signed ? LLVMIntSGT : LLVMIntUGT, arg0.value, arg1.value, ""); res.value = LLVMBuildSelect(p->builder, cond, arg0.value, arg1.value, ""); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 0f5d7fb43..2c80f9c6a 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -124,7 +124,8 @@ gb_internal lbValue lb_emit_select(lbProcedure *p, lbValue cond, lbValue x, lbVa gb_internal lbValue lb_emit_min(lbProcedure *p, Type *t, lbValue x, lbValue y) { x = lb_emit_conv(p, x, t); y = lb_emit_conv(p, y, t); - if (is_type_float(t)) { + bool use_llvm_intrinsic = is_type_float(t) || (is_type_simd_vector(t) && is_type_float(base_array_type(t))); + if (use_llvm_intrinsic) { // NOTE(bill): f either operand is a NaN, returns NaN. Otherwise returns the lesser of the two arguments. // -0.0 is considered to be less than +0.0 for this intrinsic. // These semantics are specified by IEEE 754-2019. @@ -138,7 +139,8 @@ gb_internal lbValue lb_emit_min(lbProcedure *p, Type *t, lbValue x, lbValue y) { gb_internal lbValue lb_emit_max(lbProcedure *p, Type *t, lbValue x, lbValue y) { x = lb_emit_conv(p, x, t); y = lb_emit_conv(p, y, t); - if (is_type_float(t)) { + bool use_llvm_intrinsic = is_type_float(t) || (is_type_simd_vector(t) && is_type_float(base_array_type(t))); + if (use_llvm_intrinsic) { // NOTE(bill): If either operand is a NaN, returns NaN. Otherwise returns the greater of the two arguments. // -0.0 is considered to be less than +0.0 for this intrinsic. // These semantics are specified by IEEE 754-2019. -- cgit v1.2.3 From a8d8696e2fcaccfc34504ec897a6551ff7bfe4b1 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Tue, 2 Apr 2024 17:20:44 +0200 Subject: fix named arguments with #c_vararg Previously `args=1`, `args={}`, `args={1, 2, 3}` would all crash the compiler. Now it passes them correctly, and if given a compound literal, the values are expanded into the call so you can use a named arg while passing multiple values. Fixes #3168 --- src/llvm_backend_proc.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index fba7eb381..7338281dc 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -3423,6 +3423,27 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { if (e->kind == Entity_TypeName) { lbValue value = lb_const_nil(p->module, e->type); args[param_index] = value; + } else if (is_c_vararg && pt->variadic && pt->variadic_index == param_index) { + GB_ASSERT(param_index == pt->param_count-1); + Type *slice_type = e->type; + GB_ASSERT(slice_type->kind == Type_Slice); + Type *elem_type = slice_type->Slice.elem; + + if (fv->value->kind == Ast_CompoundLit) { + ast_node(literal, CompoundLit, fv->value); + for (Ast *var_arg : literal->elems) { + lbValue arg = lb_build_expr(p, var_arg); + if (is_type_any(elem_type)) { + array_add(&args, lb_emit_conv(p, arg, c_vararg_promote_type(default_type(arg.type)))); + } else { + array_add(&args, lb_emit_conv(p, arg, c_vararg_promote_type(elem_type))); + } + } + } else { + lbValue value = lb_build_expr(p, fv->value); + GB_ASSERT(!is_type_tuple(value.type)); + array_add(&args, lb_emit_conv(p, value, c_vararg_promote_type(value.type))); + } } else { lbValue value = lb_build_expr(p, fv->value); GB_ASSERT(!is_type_tuple(value.type)); -- cgit v1.2.3 From fc30bde0f6d91cdcfc518b9ca729ff5014a006ef Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Tue, 2 Apr 2024 18:49:35 +0200 Subject: fix untyped nil into c varargs Fixes #2842 --- src/llvm_backend_proc.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 7338281dc..2c79499f4 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -3359,6 +3359,9 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { for (Ast *var_arg : variadic) { lbValue arg = lb_build_expr(p, var_arg); if (is_type_any(elem_type)) { + if (is_type_untyped_nil(arg.type)) { + arg = lb_const_nil(p->module, t_rawptr); + } array_add(&args, lb_emit_conv(p, arg, c_vararg_promote_type(default_type(arg.type)))); } else { array_add(&args, lb_emit_conv(p, arg, c_vararg_promote_type(elem_type))); @@ -3434,6 +3437,9 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { for (Ast *var_arg : literal->elems) { lbValue arg = lb_build_expr(p, var_arg); if (is_type_any(elem_type)) { + if (is_type_untyped_nil(arg.type)) { + arg = lb_const_nil(p->module, t_rawptr); + } array_add(&args, lb_emit_conv(p, arg, c_vararg_promote_type(default_type(arg.type)))); } else { array_add(&args, lb_emit_conv(p, arg, c_vararg_promote_type(elem_type))); -- cgit v1.2.3 From a7056f2b4f8ac7c5fe78b00cbae686da4867e206 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Thu, 4 Apr 2024 16:58:22 +0200 Subject: fix lbArg_Ignore logic Fixes #2698 --- src/llvm_abi.cpp | 2 +- src/llvm_backend_proc.cpp | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_abi.cpp b/src/llvm_abi.cpp index 724e4e35a..49b5224ec 100644 --- a/src/llvm_abi.cpp +++ b/src/llvm_abi.cpp @@ -192,7 +192,7 @@ gb_internal void lb_add_function_type_attributes(LLVMValueRef fn, lbFunctionType // } LLVMSetFunctionCallConv(fn, cc_kind); if (calling_convention == ProcCC_Odin) { - unsigned context_index = offset+arg_count; + unsigned context_index = arg_index; LLVMAddAttributeAtIndex(fn, context_index, noalias_attr); LLVMAddAttributeAtIndex(fn, context_index, nonnull_attr); LLVMAddAttributeAtIndex(fn, context_index, nocapture_attr); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 2c79499f4..8ce116715 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -578,7 +578,10 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { defer (param_index += 1); if (arg_type->kind == lbArg_Ignore) { - continue; + // Even though it is an ignored argument, it might still be referenced in the + // body. + lbValue dummy = lb_add_local_generated(p, e->type, false).addr; + lb_add_entity(p->module, e, dummy); } else if (arg_type->kind == lbArg_Direct) { if (e->token.string.len != 0 && !is_blank_ident(e->token.string)) { LLVMTypeRef param_type = lb_type(p->module, e->type); @@ -1051,6 +1054,7 @@ gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array c Type *original_type = e->type; lbArgType *arg = &ft->args[param_index]; if (arg->kind == lbArg_Ignore) { + param_index += 1; continue; } -- cgit v1.2.3 From b150f49c464f2b3062306627ab277635383a24c7 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 6 Apr 2024 23:32:38 +0200 Subject: fix wasm atomics Fixes #2745 --- src/llvm_backend_proc.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 8ce116715..bb4aed3f1 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -3059,9 +3059,6 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu case BuiltinProc_wasm_memory_atomic_wait32: { char const *name = "llvm.wasm.memory.atomic.wait32"; - LLVMTypeRef types[1] = { - lb_type(p->module, t_u32), - }; Type *t_u32_ptr = alloc_type_pointer(t_u32); @@ -3072,26 +3069,24 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu lbValue res = {}; res.type = tv.type; - res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); + res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), nullptr, 0); return res; } case BuiltinProc_wasm_memory_atomic_notify32: { char const *name = "llvm.wasm.memory.atomic.notify"; - LLVMTypeRef types[1] = { - lb_type(p->module, t_u32), - }; Type *t_u32_ptr = alloc_type_pointer(t_u32); LLVMValueRef args[2] = { - lb_emit_conv(p, lb_build_expr(p, ce->args[0]), t_u32_ptr).value, - lb_emit_conv(p, lb_build_expr(p, ce->args[1]), t_u32).value }; + lb_emit_conv(p, lb_build_expr(p, ce->args[0]), t_u32_ptr).value, + lb_emit_conv(p, lb_build_expr(p, ce->args[1]), t_u32).value + }; lbValue res = {}; res.type = tv.type; - res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); + res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), nullptr, 0); return res; } -- cgit v1.2.3 From 90369b669b5d48f1912f5a3667fcea57a0c4cef2 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Mon, 22 Apr 2024 19:05:50 +0200 Subject: fix direct proc args debug info --- src/llvm_backend_debug.cpp | 12 ++---------- src/llvm_backend_proc.cpp | 13 ++----------- 2 files changed, 4 insertions(+), 21 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_debug.cpp b/src/llvm_backend_debug.cpp index 853941496..2654a1d28 100644 --- a/src/llvm_backend_debug.cpp +++ b/src/llvm_backend_debug.cpp @@ -1027,7 +1027,7 @@ gb_internal void lb_add_debug_local_variable(lbProcedure *p, LLVMValueRef ptr, T LLVMDIBuilderInsertDeclareAtEnd(m->debug_builder, storage, var_info, llvm_expr, llvm_debug_loc, block); } -gb_internal void lb_add_debug_param_variable(lbProcedure *p, LLVMValueRef ptr, Type *type, Token const &token, unsigned arg_number, lbBlock *block, lbArgKind arg_kind) { +gb_internal void lb_add_debug_param_variable(lbProcedure *p, LLVMValueRef ptr, Type *type, Token const &token, unsigned arg_number, lbBlock *block) { if (p->debug_info == nullptr) { return; } @@ -1088,15 +1088,7 @@ gb_internal void lb_add_debug_param_variable(lbProcedure *p, LLVMValueRef ptr, T // NOTE(bill, 2022-02-01): For parameter values, you must insert them at the end of the decl block // The reason is that if the parameter is at index 0 and a pointer, there is not such things as an // instruction "before" it. - switch (arg_kind) { - case lbArg_Direct: - LLVMDIBuilderInsertDbgValueAtEnd(m->debug_builder, storage, var_info, llvm_expr, llvm_debug_loc, block->block); - break; - case lbArg_Indirect: - LLVMDIBuilderInsertDeclareAtEnd(m->debug_builder, storage, var_info, llvm_expr, llvm_debug_loc, block->block); - break; - } - + LLVMDIBuilderInsertDeclareAtEnd(m->debug_builder, storage, var_info, llvm_expr, llvm_debug_loc, block->block); } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index bb4aed3f1..f73698d34 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -597,16 +597,7 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { lbValue ptr = lb_address_from_load_or_generate_local(p, param); GB_ASSERT(LLVMIsAAllocaInst(ptr.value)); lb_add_entity(p->module, e, ptr); - - lbBlock *block = p->decl_block; - if (original_value != value) { - block = p->curr_block; - } - LLVMValueRef debug_storage_value = value; - if (original_value != value && LLVMIsALoadInst(value)) { - debug_storage_value = LLVMGetOperand(value, 0); - } - lb_add_debug_param_variable(p, debug_storage_value, e->type, e->token, param_index+1, block, arg_type->kind); + lb_add_debug_param_variable(p, ptr.value, e->type, e->token, param_index+1, p->curr_block); } } else if (arg_type->kind == lbArg_Indirect) { if (e->token.string.len != 0 && !is_blank_ident(e->token.string)) { @@ -614,7 +605,7 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { ptr.value = LLVMGetParam(p->value, param_offset+param_index); ptr.type = alloc_type_pointer(e->type); lb_add_entity(p->module, e, ptr); - lb_add_debug_param_variable(p, ptr.value, e->type, e->token, param_index+1, p->decl_block, arg_type->kind); + lb_add_debug_param_variable(p, ptr.value, e->type, e->token, param_index+1, p->decl_block); } } } -- cgit v1.2.3 From 25f1d0906d2b5a8276c3832783970a798c12cc6c Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 1 May 2024 22:12:37 +0200 Subject: compiler: improve target features support --- .gitignore | 4 +- base/intrinsics/intrinsics.odin | 10 +- core/simd/x86/sse3.odin | 4 +- core/simd/x86/sse41.odin | 4 +- core/sync/futex_wasm.odin | 44 ++- misc/featuregen/README.md | 28 ++ misc/featuregen/featuregen.cpp | 37 +++ misc/featuregen/featuregen.py | 116 ++++++++ src/build_settings.cpp | 617 ++++++++++++++++++++++++++++++++++------ src/check_builtin.cpp | 46 ++- src/check_decl.cpp | 49 +++- src/check_expr.cpp | 68 ++++- src/checker_builtin_procs.hpp | 4 + src/entity.cpp | 4 +- src/llvm_backend.cpp | 104 +++---- src/llvm_backend_proc.cpp | 27 +- src/llvm_backend_utility.cpp | 3 +- src/main.cpp | 79 ++++- src/types.cpp | 22 ++ 19 files changed, 1066 insertions(+), 204 deletions(-) create mode 100644 misc/featuregen/README.md create mode 100644 misc/featuregen/featuregen.cpp create mode 100644 misc/featuregen/featuregen.py (limited to 'src/llvm_backend_proc.cpp') diff --git a/.gitignore b/.gitignore index f6c3927a2..2b6b5281a 100644 --- a/.gitignore +++ b/.gitignore @@ -322,4 +322,6 @@ build.sh !core/debug/ # RAD debugger project file -*.raddbg \ No newline at end of file +*.raddbg + +misc/featuregen/featuregen diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index dca33bfd9..d887f8dcc 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -282,6 +282,12 @@ simd_reverse :: proc(a: #simd[N]T) -> #simd[N]T --- simd_rotate_left :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T --- simd_rotate_right :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T --- +// Checks if the current target supports the given target features. +// +// Takes a constant comma-seperated string (eg: "sha512,sse4.1"), or a procedure type which has either +// `@(require_target_feature)` or `@(enable_target_feature)` as its input and returns a boolean indicating +// if all listed features are supported. +has_target_feature :: proc($test: $T) -> bool where type_is_string(T) || type_is_proc(T) --- // WASM targets only wasm_memory_grow :: proc(index, delta: uintptr) -> int --- @@ -293,9 +299,9 @@ wasm_memory_size :: proc(index: uintptr) -> int --- // 0 - indicates that the thread blocked and then was woken up // 1 - the loaded value from `ptr` did not match `expected`, the thread did not block // 2 - the thread blocked, but the timeout -@(enable_target_feature="atomics") +@(require_target_feature="atomics") wasm_memory_atomic_wait32 :: proc(ptr: ^u32, expected: u32, timeout_ns: i64) -> u32 --- -@(enable_target_feature="atomics") +@(require_target_feature="atomics") wasm_memory_atomic_notify32 :: proc(ptr: ^u32, waiters: u32) -> (waiters_woken_up: u32) --- // x86 Targets (i386, amd64) diff --git a/core/simd/x86/sse3.odin b/core/simd/x86/sse3.odin index cf5f3b2fa..ca19c3954 100644 --- a/core/simd/x86/sse3.odin +++ b/core/simd/x86/sse3.odin @@ -36,7 +36,7 @@ _mm_lddqu_si128 :: #force_inline proc "c" (mem_addr: ^__m128i) -> __m128i { _mm_movedup_pd :: #force_inline proc "c" (a: __m128d) -> __m128d { return simd.shuffle(a, a, 0, 0) } -@(require_results, enable_target_feature="sse3") +@(require_results, enable_target_feature="sse2,sse3") _mm_loaddup_pd :: #force_inline proc "c" (mem_addr: [^]f64) -> __m128d { return _mm_load1_pd(mem_addr) } @@ -65,4 +65,4 @@ foreign _ { hsubps :: proc(a, b: __m128) -> __m128 --- @(link_name = "llvm.x86.sse3.ldu.dq") lddqu :: proc(mem_addr: rawptr) -> i8x16 --- -} \ No newline at end of file +} diff --git a/core/simd/x86/sse41.odin b/core/simd/x86/sse41.odin index 8c306ba4c..0b9c5986f 100644 --- a/core/simd/x86/sse41.odin +++ b/core/simd/x86/sse41.odin @@ -268,7 +268,7 @@ _mm_testnzc_si128 :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 { _mm_test_all_zeros :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 { return _mm_testz_si128(a, mask) } -@(require_results, enable_target_feature="sse4.1") +@(require_results, enable_target_feature="sse2,sse4.1") _mm_test_all_ones :: #force_inline proc "c" (a: __m128i) -> i32 { return _mm_testc_si128(a, _mm_cmpeq_epi32(a, a)) } @@ -349,4 +349,4 @@ foreign _ { ptestc :: proc(a, mask: i64x2) -> i32 --- @(link_name = "llvm.x86.sse41.ptestnzc") ptestnzc :: proc(a, mask: i64x2) -> i32 --- -} \ No newline at end of file +} diff --git a/core/sync/futex_wasm.odin b/core/sync/futex_wasm.odin index de1013364..de88e8198 100644 --- a/core/sync/futex_wasm.odin +++ b/core/sync/futex_wasm.odin @@ -5,31 +5,49 @@ package sync import "base:intrinsics" import "core:time" +// NOTE: because `core:sync` is in the dependency chain of a lot of the core packages (mostly through `core:mem`) +// without actually calling into it much, I opted for a runtime panic instead of a compile error here. + _futex_wait :: proc "contextless" (f: ^Futex, expected: u32) -> bool { - s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, -1) - return s != 0 + when !intrinsics.has_target_feature("atomics") { + _panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it") + } else { + s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, -1) + return s != 0 + } } _futex_wait_with_timeout :: proc "contextless" (f: ^Futex, expected: u32, duration: time.Duration) -> bool { - s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, i64(duration)) - return s != 0 - + when !intrinsics.has_target_feature("atomics") { + _panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it") + } else { + s := intrinsics.wasm_memory_atomic_wait32((^u32)(f), expected, i64(duration)) + return s != 0 + } } _futex_signal :: proc "contextless" (f: ^Futex) { - loop: for { - s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), 1) - if s >= 1 { - return + when !intrinsics.has_target_feature("atomics") { + _panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it") + } else { + loop: for { + s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), 1) + if s >= 1 { + return + } } } } _futex_broadcast :: proc "contextless" (f: ^Futex) { - loop: for { - s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), ~u32(0)) - if s >= 0 { - return + when !intrinsics.has_target_feature("atomics") { + _panic("usage of `core:sync` requires the `-target-feature:\"atomics\"` or a `-microarch` that supports it") + } else { + loop: for { + s := intrinsics.wasm_memory_atomic_notify32((^u32)(f), ~u32(0)) + if s >= 0 { + return + } } } } diff --git a/misc/featuregen/README.md b/misc/featuregen/README.md new file mode 100644 index 000000000..22a798cca --- /dev/null +++ b/misc/featuregen/README.md @@ -0,0 +1,28 @@ +# Featuregen + +This directory contains a python and CPP script that generates the needed information +for features regarding microarchitecture and target features of the compiler. + +It is not pretty! But LLVM has no way to query this information with their C API. + +It generates these globals (intended for `src/build_settings.cpp`: + +- `target_microarch_list`: an array of strings indexed by the architecture, each string is a comma-seperated list of microarchitectures available on that architecture +- `target_features_list`: an array of strings indexed by the architecture, each string is a comma-seperated list of target features available on that architecture +- `target_microarch_counts`: an array of ints indexed by the architecture, each int represents the amount of microarchitectures available on that target, intended for easier iteration of the next global +- `microarch_features_list`: an array of a tuple like struct where the first string is a microarchitecture and the second is a comma-seperated list of all features that are enabled by default for it + +In order to get the default features for a microarchitecture there is a small CPP program that takes +a target triple and microarchitecture and spits out the default features, this is then parsed by the python script. + +This should be ran each time we update LLVM to stay in sync. + +If there are minor differences (like the Odin user using LLVM 14 and this table being generated on LLVM 17) it +does not impact much at all, the only thing it will do is make LLVM print a message that the feature is ignored (if it was added between 14 and 17 in this case). + +## Usage + +1. Make sure the table of architectures at the top of the python script is up-to-date (the triple can be any valid triple for the architecture) +1. `./build.sh` +1. `python3 featuregen.py` +1. Copy the output into `src/build_settings.cpp` diff --git a/misc/featuregen/featuregen.cpp b/misc/featuregen/featuregen.cpp new file mode 100644 index 000000000..a1d00ab31 --- /dev/null +++ b/misc/featuregen/featuregen.cpp @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include +#include + +// Dumps the default set of supported features for the given microarch. +int main(int argc, char **argv) { + if (argc < 3) { + llvm::errs() << "Error: first arg should be triple, second should be microarch\n"; + return 1; + } + + llvm::InitializeAllTargets(); + llvm::InitializeAllTargetMCs(); + + std::string error; + const llvm::Target* target = llvm::TargetRegistry::lookupTarget(argv[1], error); + + if (!target) { + llvm::errs() << "Error: " << error << "\n"; + return 1; + } + + auto STI = target->createMCSubtargetInfo(argv[1], argv[2], ""); + + std::string plus = "+"; + llvm::ArrayRef features = STI->getAllProcessorFeatures(); + for (const auto& feature : features) { + if (STI->checkFeatures(plus + feature.Key)) { + llvm::outs() << feature.Key << "\n"; + } + } + + return 0; +} diff --git a/misc/featuregen/featuregen.py b/misc/featuregen/featuregen.py new file mode 100644 index 000000000..da4cc68f5 --- /dev/null +++ b/misc/featuregen/featuregen.py @@ -0,0 +1,116 @@ +import subprocess +import tempfile +import os +import sys + +archs = [ + ("amd64", "linux_amd64", "x86_64-pc-linux-gnu", [], []), + ("i386", "linux_i386", "i386-pc-linux-gnu", [], []), + ("arm32", "linux_arm32", "arm-linux-gnu", [], []), + ("arm64", "linux_arm64", "aarch64-linux-elf", [], []), + ("wasm32", "js_wasm32", "wasm32-js-js", [], []), + ("wasm64p32", "js_wasm64p32","wasm32-js-js", [], []), +]; + +SEEKING_CPUS = 0 +PARSING_CPUS = 1 +PARSING_FEATURES = 2 + +with tempfile.NamedTemporaryFile(suffix=".odin", delete=True) as temp_file: + temp_file.write(b"package main\n") + + for arch, target, triple, cpus, features in archs: + cmd = ["odin", "build", temp_file.name, "-file", "-build-mode:llvm", "-out:temp", "-target-features:\"help\"", f"-target:\"{target}\""] + process = subprocess.Popen(cmd, stderr=subprocess.PIPE, text=True) + + state = SEEKING_CPUS + for line in process.stderr: + + if state == SEEKING_CPUS: + if line == "Available CPUs for this target:\n": + state = PARSING_CPUS + + elif state == PARSING_CPUS: + if line == "Available features for this target:\n": + state = PARSING_FEATURES + continue + + parts = line.split(" -", maxsplit=1) + if len(parts) < 2: + continue + + cpu = parts[0].strip() + cpus.append(cpu) + + elif state == PARSING_FEATURES: + if line == "\n" and len(features) > 0: + break + + parts = line.split(" -", maxsplit=1) + if len(parts) < 2: + continue + + feature = parts[0].strip() + features.append(feature) + + process.wait() + if process.returncode != 0: + print(f"odin build returned with non-zero exit code {process.returncode}") + sys.exit(1) + + os.remove("temp.ll") + +def print_default_features(triple, microarch): + cmd = ["./featuregen", triple, microarch] + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True) + first = True + for line in process.stdout: + print("" if first else ",", line.strip(), sep="", end="") + first = False + process.wait() + if process.returncode != 0: + print(f"featuregen returned with non-zero exit code {process.returncode}") + sys.exit(1) + +print("// Generated with the featuregen script in `misc/featuregen`") +print("gb_global String target_microarch_list[TargetArch_COUNT] = {") +print("\t// TargetArch_Invalid:") +print('\tstr_lit(""),') +for arch, target, triple, cpus, features in archs: + print(f"\t// TargetArch_{arch}:") + print(f'\tstr_lit("{','.join(cpus)}"),') +print("};") + +print("") + +print("// Generated with the featuregen script in `misc/featuregen`") +print("gb_global String target_features_list[TargetArch_COUNT] = {") +print("\t// TargetArch_Invalid:") +print('\tstr_lit(""),') +for arch, target, triple, cpus, features in archs: + print(f"\t// TargetArch_{arch}:") + print(f'\tstr_lit("{','.join(features)}"),') +print("};") + +print("") + +print("// Generated with the featuregen script in `misc/featuregen`") +print("gb_global int target_microarch_counts[TargetArch_COUNT] = {") +print("\t// TargetArch_Invalid:") +print("\t0,") +for arch, target, triple, cpus, feature in archs: + print(f"\t// TargetArch_{arch}:") + print(f"\t{len(cpus)},") +print("};") + +print("") + +print("// Generated with the featuregen script in `misc/featuregen`") +print("gb_global MicroarchFeatureList microarch_features_list[] = {") +for arch, target, triple, cpus, features in archs: + print(f"\t// TargetArch_{arch}:") + for cpu in cpus: + print(f'\t{{ str_lit("{cpu}"), str_lit("', end="") + print_default_features(triple, cpu) + print('") },') +print("};") diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 8509394ff..8ad03b1b9 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -71,6 +71,11 @@ enum Windows_Subsystem : u8 { Windows_Subsystem_COUNT, }; +struct MicroarchFeatureList { + String microarch; + String features; +}; + gb_global String target_os_names[TargetOs_COUNT] = { str_lit(""), str_lit("windows"), @@ -97,21 +102,467 @@ gb_global String target_arch_names[TargetArch_COUNT] = { str_lit("wasm64p32"), }; +// Generated with the featuregen script in `misc/featuregen` gb_global String target_microarch_list[TargetArch_COUNT] = { - // TargetArch_Invalid, - str_lit("Invalid!"), - // TargetArch_amd64, - str_lit("alderlake,amdfam10,athlon-fx,athlon64,athlon64-sse3,atom_sse4_2,atom_sse4_2_movbe,barcelona,bdver1,bdver2,bdver3,bdver4,broadwell,btver1,btver2,cannonlake,cascadelake,cooperlake,core-avx-i,core-avx2,core2,core_2_duo_sse4_1,core_2_duo_ssse3,core_2nd_gen_avx,core_3rd_gen_avx,core_4th_gen_avx,core_4th_gen_avx_tsx,core_5th_gen_avx,core_5th_gen_avx_tsx,core_aes_pclmulqdq,core_i7_sse4_2,corei7,corei7-avx,generic,goldmont,goldmont-plus,goldmont_plus,grandridge,graniterapids,graniterapids-d,graniterapids_d,haswell,icelake-client,icelake-server,icelake_client,icelake_server,ivybridge,k8,k8-sse3,knl,knm,meteorlake,mic_avx512,native,nehalem,nocona,opteron,opteron-sse3,penryn,raptorlake,rocketlake,sandybridge,sapphirerapids,sierraforest,silvermont,skx,skylake,skylake-avx512,skylake_avx512,slm,tigerlake,tremont,westmere,x86-64,x86-64-v2,x86-64-v3,x86-64-v4,znver1,znver2,znver3,znver4"), - // TargetArch_i386, - str_lit("athlon,athlon-4,athlon-mp,athlon-tbird,athlon-xp,atom,bonnell,c3,c3-2,generic,geode,i386,i486,i586,i686,k6,k6-2,k6-3,lakemont,native,pentium,pentium-m,pentium-mmx,pentium2,pentium3,pentium3m,pentium4,pentium4m,pentium_4,pentium_4_sse3,pentium_ii,pentium_iii,pentium_iii_no_xmm_regs,pentium_m,pentium_mmx,pentium_pro,pentiumpro,prescott,winchip-c6,winchip2,yonah"), - // TargetArch_arm32, - str_lit("arm1020e,arm1020t,arm1022e,arm10e,arm10tdmi,arm1136j-s,arm1136jf-s,arm1156t2-s,arm1156t2f-s,arm1176jz-s,arm1176jzf-s,arm710t,arm720t,arm7tdmi,arm7tdmi-s,arm8,arm810,arm9,arm920,arm920t,arm922t,arm926ej-s,arm940t,arm946e-s,arm966e-s,arm968e-s,arm9e,arm9tdmi,cortex-a12,cortex-a15,cortex-a17,cortex-a32,cortex-a35,cortex-a5,cortex-a53,cortex-a55,cortex-a57,cortex-a7,cortex-a710,cortex-a72,cortex-a73,cortex-a75,cortex-a76,cortex-a76ae,cortex-a77,cortex-a78,cortex-a78c,cortex-a8,cortex-a9,cortex-m0,cortex-m0plus,cortex-m1,cortex-m23,cortex-m3,cortex-m33,cortex-m35p,cortex-m4,cortex-m55,cortex-m7,cortex-m85,cortex-r4,cortex-r4f,cortex-r5,cortex-r52,cortex-r7,cortex-r8,cortex-x1,cortex-x1c,cyclone,ep9312,exynos-m3,exynos-m4,exynos-m5,generic,iwmmxt,krait,kryo,mpcore,mpcorenovfp,native,neoverse-n1,neoverse-n2,neoverse-v1,sc000,sc300,strongarm,strongarm110,strongarm1100,strongarm1110,swift,xscale"), - // TargetArch_arm64, - str_lit("a64fx,ampere1,ampere1a,apple-a10,apple-a11,apple-a12,apple-a13,apple-a14,apple-a15,apple-a16,apple-a7,apple-a8,apple-a9,apple-latest,apple-m1,apple-m2,apple-s4,apple-s5,carmel,cortex-a34,cortex-a35,cortex-a510,cortex-a53,cortex-a55,cortex-a57,cortex-a65,cortex-a65ae,cortex-a710,cortex-a715,cortex-a72,cortex-a73,cortex-a75,cortex-a76,cortex-a76ae,cortex-a77,cortex-a78,cortex-a78c,cortex-r82,cortex-x1,cortex-x1c,cortex-x2,cortex-x3,cyclone,exynos-m3,exynos-m4,exynos-m5,falkor,generic,kryo,native,neoverse-512tvb,neoverse-e1,neoverse-n1,neoverse-n2,neoverse-v1,neoverse-v2,saphira,thunderx,thunderx2t99,thunderx3t110,thunderxt81,thunderxt83,thunderxt88,tsv110"), - // TargetArch_wasm32, - str_lit("generic"), - // TargetArch_wasm64p32, - str_lit("generic"), + // TargetArch_Invalid: + str_lit(""), + // TargetArch_amd64: + str_lit("alderlake,amdfam10,athlon,athlon-4,athlon-fx,athlon-mp,athlon-tbird,athlon-xp,athlon64,athlon64-sse3,atom,atom_sse4_2,atom_sse4_2_movbe,barcelona,bdver1,bdver2,bdver3,bdver4,bonnell,broadwell,btver1,btver2,c3,c3-2,cannonlake,cascadelake,cooperlake,core-avx-i,core-avx2,core2,core_2_duo_sse4_1,core_2_duo_ssse3,core_2nd_gen_avx,core_3rd_gen_avx,core_4th_gen_avx,core_4th_gen_avx_tsx,core_5th_gen_avx,core_5th_gen_avx_tsx,core_aes_pclmulqdq,core_i7_sse4_2,corei7,corei7-avx,emeraldrapids,generic,geode,goldmont,goldmont-plus,goldmont_plus,grandridge,graniterapids,graniterapids-d,graniterapids_d,haswell,i386,i486,i586,i686,icelake-client,icelake-server,icelake_client,icelake_server,ivybridge,k6,k6-2,k6-3,k8,k8-sse3,knl,knm,lakemont,meteorlake,mic_avx512,nehalem,nocona,opteron,opteron-sse3,penryn,pentium,pentium-m,pentium-mmx,pentium2,pentium3,pentium3m,pentium4,pentium4m,pentium_4,pentium_4_sse3,pentium_ii,pentium_iii,pentium_iii_no_xmm_regs,pentium_m,pentium_mmx,pentium_pro,pentiumpro,prescott,raptorlake,rocketlake,sandybridge,sapphirerapids,sierraforest,silvermont,skx,skylake,skylake-avx512,skylake_avx512,slm,tigerlake,tremont,westmere,winchip-c6,winchip2,x86-64,x86-64-v2,x86-64-v3,x86-64-v4,yonah,znver1,znver2,znver3,znver4"), + // TargetArch_i386: + str_lit("alderlake,amdfam10,athlon,athlon-4,athlon-fx,athlon-mp,athlon-tbird,athlon-xp,athlon64,athlon64-sse3,atom,atom_sse4_2,atom_sse4_2_movbe,barcelona,bdver1,bdver2,bdver3,bdver4,bonnell,broadwell,btver1,btver2,c3,c3-2,cannonlake,cascadelake,cooperlake,core-avx-i,core-avx2,core2,core_2_duo_sse4_1,core_2_duo_ssse3,core_2nd_gen_avx,core_3rd_gen_avx,core_4th_gen_avx,core_4th_gen_avx_tsx,core_5th_gen_avx,core_5th_gen_avx_tsx,core_aes_pclmulqdq,core_i7_sse4_2,corei7,corei7-avx,emeraldrapids,generic,geode,goldmont,goldmont-plus,goldmont_plus,grandridge,graniterapids,graniterapids-d,graniterapids_d,haswell,i386,i486,i586,i686,icelake-client,icelake-server,icelake_client,icelake_server,ivybridge,k6,k6-2,k6-3,k8,k8-sse3,knl,knm,lakemont,meteorlake,mic_avx512,nehalem,nocona,opteron,opteron-sse3,penryn,pentium,pentium-m,pentium-mmx,pentium2,pentium3,pentium3m,pentium4,pentium4m,pentium_4,pentium_4_sse3,pentium_ii,pentium_iii,pentium_iii_no_xmm_regs,pentium_m,pentium_mmx,pentium_pro,pentiumpro,prescott,raptorlake,rocketlake,sandybridge,sapphirerapids,sierraforest,silvermont,skx,skylake,skylake-avx512,skylake_avx512,slm,tigerlake,tremont,westmere,winchip-c6,winchip2,x86-64,x86-64-v2,x86-64-v3,x86-64-v4,yonah,znver1,znver2,znver3,znver4"), + // TargetArch_arm32: + str_lit("arm1020e,arm1020t,arm1022e,arm10e,arm10tdmi,arm1136j-s,arm1136jf-s,arm1156t2-s,arm1156t2f-s,arm1176jz-s,arm1176jzf-s,arm710t,arm720t,arm7tdmi,arm7tdmi-s,arm8,arm810,arm9,arm920,arm920t,arm922t,arm926ej-s,arm940t,arm946e-s,arm966e-s,arm968e-s,arm9e,arm9tdmi,cortex-a12,cortex-a15,cortex-a17,cortex-a32,cortex-a35,cortex-a5,cortex-a53,cortex-a55,cortex-a57,cortex-a7,cortex-a710,cortex-a72,cortex-a73,cortex-a75,cortex-a76,cortex-a76ae,cortex-a77,cortex-a78,cortex-a78c,cortex-a8,cortex-a9,cortex-m0,cortex-m0plus,cortex-m1,cortex-m23,cortex-m3,cortex-m33,cortex-m35p,cortex-m4,cortex-m55,cortex-m7,cortex-m85,cortex-r4,cortex-r4f,cortex-r5,cortex-r52,cortex-r7,cortex-r8,cortex-x1,cortex-x1c,cyclone,ep9312,exynos-m3,exynos-m4,exynos-m5,generic,iwmmxt,krait,kryo,mpcore,mpcorenovfp,neoverse-n1,neoverse-n2,neoverse-v1,sc000,sc300,strongarm,strongarm110,strongarm1100,strongarm1110,swift,xscale"), + // TargetArch_arm64: + str_lit("a64fx,ampere1,ampere1a,apple-a10,apple-a11,apple-a12,apple-a13,apple-a14,apple-a15,apple-a16,apple-a7,apple-a8,apple-a9,apple-latest,apple-m1,apple-m2,apple-s4,apple-s5,carmel,cortex-a34,cortex-a35,cortex-a510,cortex-a53,cortex-a55,cortex-a57,cortex-a65,cortex-a65ae,cortex-a710,cortex-a715,cortex-a72,cortex-a73,cortex-a75,cortex-a76,cortex-a76ae,cortex-a77,cortex-a78,cortex-a78c,cortex-r82,cortex-x1,cortex-x1c,cortex-x2,cortex-x3,cyclone,exynos-m3,exynos-m4,exynos-m5,falkor,generic,kryo,neoverse-512tvb,neoverse-e1,neoverse-n1,neoverse-n2,neoverse-v1,neoverse-v2,saphira,thunderx,thunderx2t99,thunderx3t110,thunderxt81,thunderxt83,thunderxt88,tsv110"), + // TargetArch_wasm32: + str_lit("bleeding-edge,generic,mvp"), + // TargetArch_wasm64p32: + str_lit("bleeding-edge,generic,mvp"), +}; + +// Generated with the featuregen script in `misc/featuregen` +gb_global String target_features_list[TargetArch_COUNT] = { + // TargetArch_Invalid: + str_lit(""), + // TargetArch_amd64: + str_lit("16bit-mode,32bit-mode,3dnow,3dnowa,64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512er,avx512f,avx512fp16,avx512ifma,avx512pf,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vp2intersect,avx512vpopcntdq,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,branchfusion,cldemote,clflushopt,clwb,clzero,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,ermsb,f16c,false-deps-getmant,false-deps-lzcnt-tzcnt,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-popcnt,false-deps-range,fast-11bytenop,fast-15bytenop,fast-7bytenop,fast-bextr,fast-gather,fast-hops,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fast-vector-shift-masks,faster-shift-than-shuffle,fma,fma4,fsgsbase,fsrm,fxsr,gfni,harden-sls-ijmp,harden-sls-ret,hreset,idivl-to-divb,idivq-to-divl,invpcid,kl,lea-sp,lea-uses-ag,lvi-cfi,lvi-load-hardening,lwp,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,mwaitx,no-bypass-delay,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pad-short-functions,pclmul,pconfig,pku,popcnt,prefer-128-bit,prefer-256-bit,prefer-mask-registers,prefer-movmsk-over-vtest,prefer-no-gather,prefer-no-scatter,prefetchi,prefetchwt1,prfchw,ptwrite,raoint,rdpid,rdpru,rdrnd,rdseed,retpoline,retpoline-external-thunk,retpoline-indirect-branches,retpoline-indirect-calls,rtm,sahf,sbb-dep-breaking,serialize,seses,sgx,sha,sha512,shstk,slow-3ops-lea,slow-incdec,slow-lea,slow-pmaddwd,slow-pmulld,slow-shld,slow-two-mem-ops,slow-unaligned-mem-16,slow-unaligned-mem-32,sm3,sm4,soft-float,sse,sse-unaligned-mem,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tagged-globals,tbm,tsxldtrk,tuning-fast-imm-vector-shift,uintr,use-glm-div-sqrt-costs,use-slm-arith-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,widekl,x87,xop,xsave,xsavec,xsaveopt,xsaves"), + // TargetArch_i386: + str_lit("16bit-mode,32bit-mode,3dnow,3dnowa,64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512er,avx512f,avx512fp16,avx512ifma,avx512pf,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vp2intersect,avx512vpopcntdq,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,branchfusion,cldemote,clflushopt,clwb,clzero,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,ermsb,f16c,false-deps-getmant,false-deps-lzcnt-tzcnt,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-popcnt,false-deps-range,fast-11bytenop,fast-15bytenop,fast-7bytenop,fast-bextr,fast-gather,fast-hops,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fast-vector-shift-masks,faster-shift-than-shuffle,fma,fma4,fsgsbase,fsrm,fxsr,gfni,harden-sls-ijmp,harden-sls-ret,hreset,idivl-to-divb,idivq-to-divl,invpcid,kl,lea-sp,lea-uses-ag,lvi-cfi,lvi-load-hardening,lwp,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,mwaitx,no-bypass-delay,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pad-short-functions,pclmul,pconfig,pku,popcnt,prefer-128-bit,prefer-256-bit,prefer-mask-registers,prefer-movmsk-over-vtest,prefer-no-gather,prefer-no-scatter,prefetchi,prefetchwt1,prfchw,ptwrite,raoint,rdpid,rdpru,rdrnd,rdseed,retpoline,retpoline-external-thunk,retpoline-indirect-branches,retpoline-indirect-calls,rtm,sahf,sbb-dep-breaking,serialize,seses,sgx,sha,sha512,shstk,slow-3ops-lea,slow-incdec,slow-lea,slow-pmaddwd,slow-pmulld,slow-shld,slow-two-mem-ops,slow-unaligned-mem-16,slow-unaligned-mem-32,sm3,sm4,soft-float,sse,sse-unaligned-mem,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tagged-globals,tbm,tsxldtrk,tuning-fast-imm-vector-shift,uintr,use-glm-div-sqrt-costs,use-slm-arith-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,widekl,x87,xop,xsave,xsavec,xsaveopt,xsaves"), + // TargetArch_arm32: + str_lit("32bit,8msecext,a12,a15,a17,a32,a35,a5,a53,a55,a57,a7,a72,a73,a75,a76,a77,a78c,a8,a9,aapcs-frame-chain,aapcs-frame-chain-leaf,aclass,acquire-release,aes,armv4,armv4t,armv5t,armv5te,armv5tej,armv6,armv6-m,armv6j,armv6k,armv6kz,armv6s-m,armv6t2,armv7-a,armv7-m,armv7-r,armv7e-m,armv7k,armv7s,armv7ve,armv8-a,armv8-m.base,armv8-m.main,armv8-r,armv8.1-a,armv8.1-m.main,armv8.2-a,armv8.3-a,armv8.4-a,armv8.5-a,armv8.6-a,armv8.7-a,armv8.8-a,armv8.9-a,armv9-a,armv9.1-a,armv9.2-a,armv9.3-a,armv9.4-a,atomics-32,avoid-movs-shop,avoid-partial-cpsr,bf16,big-endian-instructions,cde,cdecp0,cdecp1,cdecp2,cdecp3,cdecp4,cdecp5,cdecp6,cdecp7,cheap-predicable-cpsr,clrbhb,cortex-a710,cortex-a78,cortex-x1,cortex-x1c,crc,crypto,d32,db,dfb,disable-postra-scheduler,dont-widen-vmovs,dotprod,dsp,execute-only,expand-fp-mlx,exynos,fix-cmse-cve-2021-35465,fix-cortex-a57-aes-1742098,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp16fml,fp64,fpao,fpregs,fpregs16,fpregs64,fullfp16,fuse-aes,fuse-literals,harden-sls-blr,harden-sls-nocomdat,harden-sls-retbr,hwdiv,hwdiv-arm,i8mm,iwmmxt,iwmmxt2,krait,kryo,lob,long-calls,loop-align,m3,m7,mclass,mp,muxed-units,mve,mve.fp,mve1beat,mve2beat,mve4beat,nacl-trap,neon,neon-fpmovs,neonfp,neoverse-v1,no-branch-predictor,no-bti-at-return-twice,no-movt,no-neg-immediates,noarm,nonpipelined-vfp,pacbti,perfmon,prefer-ishst,prefer-vmovsr,prof-unpr,r4,r5,r52,r7,ras,rclass,read-tp-tpidrprw,read-tp-tpidruro,read-tp-tpidrurw,reserve-r9,ret-addr-stack,sb,sha2,slow-fp-brcc,slow-load-D-subreg,slow-odd-reg,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,soft-float,splat-vfp-neon,strict-align,swift,thumb-mode,thumb2,trustzone,use-mipipeliner,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.1m.main,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8.7a,v8.8a,v8.9a,v8m,v8m.main,v9.1a,v9.2a,v9.3a,v9.4a,v9a,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vldn-align,vmlx-forwarding,vmlx-hazards,wide-stride-vfp,xscale,zcz"), + // TargetArch_arm64: + str_lit("CONTEXTIDREL2,a35,a510,a53,a55,a57,a64fx,a65,a710,a715,a72,a73,a75,a76,a77,a78,a78c,aes,aggressive-fma,all,alternate-sextload-cvt-f32-pattern,altnzcv,am,ampere1,ampere1a,amvs,apple-a10,apple-a11,apple-a12,apple-a13,apple-a14,apple-a15,apple-a16,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,ascend-store-address,b16b16,balance-fp-ops,bf16,brbe,bti,call-saved-x10,call-saved-x11,call-saved-x12,call-saved-x13,call-saved-x14,call-saved-x15,call-saved-x18,call-saved-x8,call-saved-x9,carmel,ccdp,ccidx,ccpp,chk,clrbhb,cmp-bcc-fusion,complxnum,cortex-r82,cortex-x1,cortex-x2,cortex-x3,crc,crypto,cssc,custom-cheap-as-move,d128,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,enable-select-opt,ete,exynos-cheap-as-move,exynosm3,exynosm4,f32mm,f64mm,falkor,fgt,fix-cortex-a53-835769,flagm,fmv,force-32bit-jump-tables,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-addsub-2reg-const1,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,gcs,harden-sls-blr,harden-sls-nocomdat,harden-sls-retbr,hbc,hcx,i8mm,ite,jsconv,kryo,lor,ls64,lse,lse128,lse2,lsl-fast,mec,mops,mpam,mte,neon,neoverse512tvb,neoversee1,neoversen1,neoversen2,neoversev1,neoversev2,nmi,no-bti-at-return-twice,no-neg-immediates,no-sve-fp-ld1r,no-zcz-fp,nv,outline-atomics,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,prfm-slc-target,rand,ras,rasv2,rcpc,rcpc-immo,rcpc3,rdm,reserve-x1,reserve-x10,reserve-x11,reserve-x12,reserve-x13,reserve-x14,reserve-x15,reserve-x18,reserve-x2,reserve-x20,reserve-x21,reserve-x22,reserve-x23,reserve-x24,reserve-x25,reserve-x26,reserve-x27,reserve-x28,reserve-x3,reserve-x30,reserve-x4,reserve-x5,reserve-x6,reserve-x7,reserve-x9,rme,saphira,sb,sel2,sha2,sha3,slow-misaligned-128store,slow-paired-128,slow-strqro-store,sm4,sme,sme-f16f16,sme-f64f64,sme-i16i64,sme2,sme2p1,spe,spe-eef,specres2,specrestrict,ssbs,strict-align,sve,sve2,sve2-aes,sve2-bitperm,sve2-sha3,sve2-sm4,sve2p1,tagged-globals,the,thunderx,thunderx2t99,thunderx3t110,thunderxt81,thunderxt83,thunderxt88,tlb-rmi,tme,tpidr-el1,tpidr-el2,tpidr-el3,tpidrro-el0,tracev8.4,trbe,tsv110,uaops,use-experimental-zeroing-pseudos,use-postra-scheduler,use-reciprocal-square-root,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8.7a,v8.8a,v8.9a,v8a,v8r,v9.1a,v9.2a,v9.3a,v9.4a,v9a,vh,wfxt,xs,zcm,zcz,zcz-fp-workaround,zcz-gp"), + // TargetArch_wasm32: + str_lit("atomics,bulk-memory,exception-handling,extended-const,multivalue,mutable-globals,nontrapping-fptoint,reference-types,relaxed-simd,sign-ext,simd128,tail-call"), + // TargetArch_wasm64p32: + str_lit("atomics,bulk-memory,exception-handling,extended-const,multivalue,mutable-globals,nontrapping-fptoint,reference-types,relaxed-simd,sign-ext,simd128,tail-call"), +}; + +// Generated with the featuregen script in `misc/featuregen` +gb_global int target_microarch_counts[TargetArch_COUNT] = { + // TargetArch_Invalid: + 0, + // TargetArch_amd64: + 120, + // TargetArch_i386: + 120, + // TargetArch_arm32: + 90, + // TargetArch_arm64: + 63, + // TargetArch_wasm32: + 3, + // TargetArch_wasm64p32: + 3, +}; + +// Generated with the featuregen script in `misc/featuregen` +gb_global MicroarchFeatureList microarch_features_list[] = { + // TargetArch_amd64: + { str_lit("alderlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("amdfam10"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,vzeroupper,x87") }, + { str_lit("athlon"), str_lit("3dnow,3dnowa,64bit-mode,cmov,cx8,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon-4"), str_lit("3dnow,3dnowa,64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon-fx"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon-mp"), str_lit("3dnow,3dnowa,64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon-tbird"), str_lit("3dnow,3dnowa,64bit-mode,cmov,cx8,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon-xp"), str_lit("3dnow,3dnowa,64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon64"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon64-sse3"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("atom"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,idivl-to-divb,idivq-to-divl,lea-sp,lea-uses-ag,mmx,movbe,no-bypass-delay,nopl,pad-short-functions,sahf,slow-two-mem-ops,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("atom_sse4_2"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87") }, + { str_lit("atom_sse4_2_movbe"), str_lit("64bit,64bit-mode,aes,clflushopt,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fsgsbase,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("barcelona"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,vzeroupper,x87") }, + { str_lit("bdver1"), str_lit("64bit,64bit-mode,aes,avx,branchfusion,cmov,crc32,cx16,cx8,fast-11bytenop,fast-scalar-shift-masks,fma4,fxsr,lwp,lzcnt,mmx,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vzeroupper,x87,xop,xsave") }, + { str_lit("bdver2"), str_lit("64bit,64bit-mode,aes,avx,bmi,branchfusion,cmov,crc32,cx16,cx8,f16c,fast-11bytenop,fast-bextr,fast-movbe,fast-scalar-shift-masks,fma,fma4,fxsr,lwp,lzcnt,mmx,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tbm,vzeroupper,x87,xop,xsave") }, + { str_lit("bdver3"), str_lit("64bit,64bit-mode,aes,avx,bmi,branchfusion,cmov,crc32,cx16,cx8,f16c,fast-11bytenop,fast-bextr,fast-movbe,fast-scalar-shift-masks,fma,fma4,fsgsbase,fxsr,lwp,lzcnt,mmx,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tbm,vzeroupper,x87,xop,xsave,xsaveopt") }, + { str_lit("bdver4"), str_lit("64bit,64bit-mode,aes,avx,avx2,bmi,bmi2,branchfusion,cmov,crc32,cx16,cx8,f16c,fast-11bytenop,fast-bextr,fast-movbe,fast-scalar-shift-masks,fma,fma4,fsgsbase,fxsr,lwp,lzcnt,mmx,movbe,mwaitx,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tbm,vzeroupper,x87,xop,xsave,xsaveopt") }, + { str_lit("bonnell"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,idivl-to-divb,idivq-to-divl,lea-sp,lea-uses-ag,mmx,movbe,no-bypass-delay,nopl,pad-short-functions,sahf,slow-two-mem-ops,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("broadwell"), str_lit("64bit,64bit-mode,adx,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("btver1"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fast-15bytenop,fast-scalar-shift-masks,fast-vector-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,ssse3,vzeroupper,x87") }, + { str_lit("btver2"), str_lit("64bit,64bit-mode,aes,avx,bmi,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-hops,fast-lzcnt,fast-movbe,fast-scalar-shift-masks,fast-vector-shift-masks,fxsr,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,x87,xsave,xsaveopt") }, + { str_lit("c3"), str_lit("3dnow,64bit-mode,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("c3-2"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("cannonlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vl,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,sha,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("cascadelake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,avx512vnni,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("cooperlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bf16,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,avx512vnni,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("core-avx-i"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core-avx2"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core2"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("core_2_duo_sse4_1"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,sse4.1,ssse3,vzeroupper,x87") }, + { str_lit("core_2_duo_ssse3"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("core_2nd_gen_avx"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_3rd_gen_avx"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_4th_gen_avx"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_4th_gen_avx_tsx"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_5th_gen_avx"), str_lit("64bit,64bit-mode,adx,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_5th_gen_avx_tsx"), str_lit("64bit,64bit-mode,adx,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_aes_pclmulqdq"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("core_i7_sse4_2"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("corei7"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("corei7-avx"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("emeraldrapids"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("generic"), str_lit("64bit,64bit-mode,cx8,fast-15bytenop,fast-scalar-fsqrt,idivq-to-divl,macrofusion,slow-3ops-lea,sse,sse2,vzeroupper,x87") }, + { str_lit("geode"), str_lit("3dnow,3dnowa,64bit-mode,cx8,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("goldmont"), str_lit("64bit,64bit-mode,aes,clflushopt,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("goldmont-plus"), str_lit("64bit,64bit-mode,aes,clflushopt,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("goldmont_plus"), str_lit("64bit,64bit-mode,aes,clflushopt,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("grandridge"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,fast-movbe,fma,fsgsbase,fxsr,gfni,hreset,invpcid,kl,lzcnt,mmx,movbe,movdir64b,movdiri,no-bypass-delay,nopl,pclmul,pconfig,pku,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,uintr,use-glm-div-sqrt-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("graniterapids"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("graniterapids-d"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("graniterapids_d"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("haswell"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("i386"), str_lit("64bit-mode,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("i486"), str_lit("64bit-mode,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("i586"), str_lit("64bit-mode,cx8,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("i686"), str_lit("64bit-mode,cmov,cx8,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("icelake-client"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("icelake-server"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("icelake_client"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("icelake_server"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("ivybridge"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("k6"), str_lit("64bit-mode,cx8,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("k6-2"), str_lit("3dnow,64bit-mode,cx8,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("k6-3"), str_lit("3dnow,64bit-mode,cx8,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("k8"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("k8-sse3"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("knl"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, + { str_lit("knm"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,avx512vpopcntdq,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, + { str_lit("lakemont"), str_lit("64bit-mode,cx8,slow-unaligned-mem-16,sse,sse2,vzeroupper") }, + { str_lit("meteorlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("mic_avx512"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, + { str_lit("nehalem"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("nocona"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("opteron"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("opteron-sse3"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("penryn"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,sse4.1,ssse3,vzeroupper,x87") }, + { str_lit("pentium"), str_lit("64bit-mode,cx8,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium-m"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium-mmx"), str_lit("64bit-mode,cx8,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium2"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium3"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium3m"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium4"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium4m"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_4"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_4_sse3"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("pentium_ii"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_iii"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_iii_no_xmm_regs"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_m"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_mmx"), str_lit("64bit-mode,cx8,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_pro"), str_lit("64bit-mode,cmov,cx8,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentiumpro"), str_lit("64bit-mode,cmov,cx8,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("prescott"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("raptorlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("rocketlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("sandybridge"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("sapphirerapids"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("sierraforest"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,fast-movbe,fma,fsgsbase,fxsr,gfni,hreset,invpcid,kl,lzcnt,mmx,movbe,movdir64b,movdiri,no-bypass-delay,nopl,pclmul,pconfig,pku,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,uintr,use-glm-div-sqrt-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("silvermont"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87") }, + { str_lit("skx"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("skylake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("skylake-avx512"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("skylake_avx512"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("slm"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87") }, + { str_lit("tigerlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vp2intersect,avx512vpopcntdq,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("tremont"), str_lit("64bit,64bit-mode,aes,clflushopt,clwb,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,gfni,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("westmere"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("winchip-c6"), str_lit("64bit-mode,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("winchip2"), str_lit("3dnow,64bit-mode,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("x86-64"), str_lit("64bit,64bit-mode,cmov,cx8,fxsr,idivq-to-divl,macrofusion,mmx,nopl,slow-3ops-lea,slow-incdec,sse,sse2,vzeroupper,x87") }, + { str_lit("x86-64-v2"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,nopl,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("x86-64-v3"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fxsr,idivq-to-divl,lzcnt,macrofusion,mmx,movbe,nopl,popcnt,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave") }, + { str_lit("x86-64-v4"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fxsr,idivq-to-divl,lzcnt,macrofusion,mmx,movbe,nopl,popcnt,prefer-256-bit,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave") }, + { str_lit("yonah"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("znver1"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,branchfusion,clflushopt,clzero,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,lzcnt,mmx,movbe,mwaitx,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("znver2"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,branchfusion,clflushopt,clwb,clzero,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,lzcnt,mmx,movbe,mwaitx,nopl,pclmul,popcnt,prfchw,rdpid,rdpru,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("znver3"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,branchfusion,clflushopt,clwb,clzero,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,invpcid,lzcnt,macrofusion,mmx,movbe,mwaitx,nopl,pclmul,pku,popcnt,prfchw,rdpid,rdpru,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("znver4"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,branchfusion,clflushopt,clwb,clzero,cmov,crc32,cx16,cx8,evex512,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,invpcid,lzcnt,macrofusion,mmx,movbe,mwaitx,nopl,pclmul,pku,popcnt,prfchw,rdpid,rdpru,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,shstk,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + // TargetArch_i386: + { str_lit("alderlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("amdfam10"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,vzeroupper,x87") }, + { str_lit("athlon"), str_lit("32bit-mode,3dnow,3dnowa,cmov,cx8,mmx,nopl,slow-shld,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("athlon-4"), str_lit("32bit-mode,3dnow,3dnowa,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("athlon-fx"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon-mp"), str_lit("32bit-mode,3dnow,3dnowa,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("athlon-tbird"), str_lit("32bit-mode,3dnow,3dnowa,cmov,cx8,mmx,nopl,slow-shld,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("athlon-xp"), str_lit("32bit-mode,3dnow,3dnowa,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("athlon64"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("athlon64-sse3"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("atom"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,idivl-to-divb,idivq-to-divl,lea-sp,lea-uses-ag,mmx,movbe,no-bypass-delay,nopl,pad-short-functions,sahf,slow-two-mem-ops,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("atom_sse4_2"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87") }, + { str_lit("atom_sse4_2_movbe"), str_lit("32bit-mode,64bit,aes,clflushopt,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fsgsbase,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("barcelona"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,vzeroupper,x87") }, + { str_lit("bdver1"), str_lit("32bit-mode,64bit,aes,avx,branchfusion,cmov,crc32,cx16,cx8,fast-11bytenop,fast-scalar-shift-masks,fma4,fxsr,lwp,lzcnt,mmx,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vzeroupper,x87,xop,xsave") }, + { str_lit("bdver2"), str_lit("32bit-mode,64bit,aes,avx,bmi,branchfusion,cmov,crc32,cx16,cx8,f16c,fast-11bytenop,fast-bextr,fast-movbe,fast-scalar-shift-masks,fma,fma4,fxsr,lwp,lzcnt,mmx,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tbm,vzeroupper,x87,xop,xsave") }, + { str_lit("bdver3"), str_lit("32bit-mode,64bit,aes,avx,bmi,branchfusion,cmov,crc32,cx16,cx8,f16c,fast-11bytenop,fast-bextr,fast-movbe,fast-scalar-shift-masks,fma,fma4,fsgsbase,fxsr,lwp,lzcnt,mmx,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tbm,vzeroupper,x87,xop,xsave,xsaveopt") }, + { str_lit("bdver4"), str_lit("32bit-mode,64bit,aes,avx,avx2,bmi,bmi2,branchfusion,cmov,crc32,cx16,cx8,f16c,fast-11bytenop,fast-bextr,fast-movbe,fast-scalar-shift-masks,fma,fma4,fsgsbase,fxsr,lwp,lzcnt,mmx,movbe,mwaitx,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tbm,vzeroupper,x87,xop,xsave,xsaveopt") }, + { str_lit("bonnell"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,idivl-to-divb,idivq-to-divl,lea-sp,lea-uses-ag,mmx,movbe,no-bypass-delay,nopl,pad-short-functions,sahf,slow-two-mem-ops,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("broadwell"), str_lit("32bit-mode,64bit,adx,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("btver1"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fast-15bytenop,fast-scalar-shift-masks,fast-vector-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,ssse3,vzeroupper,x87") }, + { str_lit("btver2"), str_lit("32bit-mode,64bit,aes,avx,bmi,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-hops,fast-lzcnt,fast-movbe,fast-scalar-shift-masks,fast-vector-shift-masks,fxsr,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,x87,xsave,xsaveopt") }, + { str_lit("c3"), str_lit("32bit-mode,3dnow,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("c3-2"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("cannonlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vl,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,sha,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("cascadelake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,avx512vnni,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("cooperlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bf16,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,avx512vnni,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("core-avx-i"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core-avx2"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core2"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("core_2_duo_sse4_1"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,sse4.1,ssse3,vzeroupper,x87") }, + { str_lit("core_2_duo_ssse3"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,ssse3,vzeroupper,x87") }, + { str_lit("core_2nd_gen_avx"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_3rd_gen_avx"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_4th_gen_avx"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_4th_gen_avx_tsx"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_5th_gen_avx"), str_lit("32bit-mode,64bit,adx,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_5th_gen_avx_tsx"), str_lit("32bit-mode,64bit,adx,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("core_aes_pclmulqdq"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("core_i7_sse4_2"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("corei7"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("corei7-avx"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("emeraldrapids"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,amx-bf16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("generic"), str_lit("32bit-mode,64bit,cx8,fast-15bytenop,fast-scalar-fsqrt,idivq-to-divl,macrofusion,slow-3ops-lea,vzeroupper,x87") }, + { str_lit("geode"), str_lit("32bit-mode,3dnow,3dnowa,cx8,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("goldmont"), str_lit("32bit-mode,64bit,aes,clflushopt,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("goldmont-plus"), str_lit("32bit-mode,64bit,aes,clflushopt,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("goldmont_plus"), str_lit("32bit-mode,64bit,aes,clflushopt,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("grandridge"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,fast-movbe,fma,fsgsbase,fxsr,gfni,hreset,invpcid,kl,lzcnt,mmx,movbe,movdir64b,movdiri,no-bypass-delay,nopl,pclmul,pconfig,pku,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,uintr,use-glm-div-sqrt-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("graniterapids"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,amx-bf16,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("graniterapids-d"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("graniterapids_d"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("haswell"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("i386"), str_lit("32bit-mode,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("i486"), str_lit("32bit-mode,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("i586"), str_lit("32bit-mode,cx8,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("i686"), str_lit("32bit-mode,cmov,cx8,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("icelake-client"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("icelake-server"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("icelake_client"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("icelake_server"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("ivybridge"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("k6"), str_lit("32bit-mode,cx8,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("k6-2"), str_lit("32bit-mode,3dnow,cx8,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("k6-3"), str_lit("32bit-mode,3dnow,cx8,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("k8"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("k8-sse3"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("knl"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, + { str_lit("knm"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,avx512vpopcntdq,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, + { str_lit("lakemont"), str_lit("32bit-mode,cx8,slow-unaligned-mem-16,vzeroupper") }, + { str_lit("meteorlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("mic_avx512"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, + { str_lit("nehalem"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("nocona"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("opteron"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("opteron-sse3"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("penryn"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,sse4.1,ssse3,vzeroupper,x87") }, + { str_lit("pentium"), str_lit("32bit-mode,cx8,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("pentium-m"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium-mmx"), str_lit("32bit-mode,cx8,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("pentium2"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("pentium3"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("pentium3m"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("pentium4"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium4m"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_4"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_4_sse3"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("pentium_ii"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("pentium_iii"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("pentium_iii_no_xmm_regs"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,vzeroupper,x87") }, + { str_lit("pentium_m"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, + { str_lit("pentium_mmx"), str_lit("32bit-mode,cx8,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("pentium_pro"), str_lit("32bit-mode,cmov,cx8,nopl,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("pentiumpro"), str_lit("32bit-mode,cmov,cx8,nopl,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("prescott"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("raptorlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("rocketlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("sandybridge"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, + { str_lit("sapphirerapids"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,amx-bf16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("sierraforest"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,fast-movbe,fma,fsgsbase,fxsr,gfni,hreset,invpcid,kl,lzcnt,mmx,movbe,movdir64b,movdiri,no-bypass-delay,nopl,pclmul,pconfig,pku,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,uintr,use-glm-div-sqrt-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("silvermont"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87") }, + { str_lit("skx"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("skylake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("skylake-avx512"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("skylake_avx512"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("slm"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-7bytenop,fast-movbe,fxsr,idivq-to-divl,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,sahf,slow-incdec,slow-lea,slow-pmulld,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-slm-arith-costs,vzeroupper,x87") }, + { str_lit("tigerlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vp2intersect,avx512vpopcntdq,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdpid,rdrnd,rdseed,sahf,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("tremont"), str_lit("32bit-mode,64bit,aes,clflushopt,clwb,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,gfni,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("westmere"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("winchip-c6"), str_lit("32bit-mode,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("winchip2"), str_lit("32bit-mode,3dnow,mmx,slow-unaligned-mem-16,vzeroupper,x87") }, + { str_lit("x86-64"), str_lit("32bit-mode,64bit,cmov,cx8,fxsr,idivq-to-divl,macrofusion,mmx,nopl,slow-3ops-lea,slow-incdec,sse,sse2,vzeroupper,x87") }, + { str_lit("x86-64-v2"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fxsr,idivq-to-divl,macrofusion,mmx,nopl,popcnt,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, + { str_lit("x86-64-v3"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fxsr,idivq-to-divl,lzcnt,macrofusion,mmx,movbe,nopl,popcnt,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave") }, + { str_lit("x86-64-v4"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fxsr,idivq-to-divl,lzcnt,macrofusion,mmx,movbe,nopl,popcnt,prefer-256-bit,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave") }, + { str_lit("yonah"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("znver1"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,branchfusion,clflushopt,clzero,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,lzcnt,mmx,movbe,mwaitx,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("znver2"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,branchfusion,clflushopt,clwb,clzero,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,lzcnt,mmx,movbe,mwaitx,nopl,pclmul,popcnt,prfchw,rdpid,rdpru,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("znver3"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,bmi,bmi2,branchfusion,clflushopt,clwb,clzero,cmov,crc32,cx16,cx8,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,invpcid,lzcnt,macrofusion,mmx,movbe,mwaitx,nopl,pclmul,pku,popcnt,prfchw,rdpid,rdpru,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("znver4"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,bmi,bmi2,branchfusion,clflushopt,clwb,clzero,cmov,crc32,cx16,cx8,evex512,f16c,fast-15bytenop,fast-bextr,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,invpcid,lzcnt,macrofusion,mmx,movbe,mwaitx,nopl,pclmul,pku,popcnt,prfchw,rdpid,rdpru,rdrnd,rdseed,sahf,sbb-dep-breaking,sha,shstk,slow-shld,sse,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,vaes,vpclmulqdq,vzeroupper,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, + // TargetArch_arm32: + { str_lit("arm1020e"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm1020t"), str_lit("armv5t,v4t,v5t") }, + { str_lit("arm1022e"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm10e"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm10tdmi"), str_lit("armv5t,v4t,v5t") }, + { str_lit("arm1136j-s"), str_lit("armv6,dsp,v4t,v5t,v5te,v6") }, + { str_lit("arm1136jf-s"), str_lit("armv6,dsp,fp64,fpregs,fpregs64,slowfpvmlx,v4t,v5t,v5te,v6,vfp2,vfp2sp") }, + { str_lit("arm1156t2-s"), str_lit("armv6t2,dsp,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v8m") }, + { str_lit("arm1156t2f-s"), str_lit("armv6t2,dsp,fp64,fpregs,fpregs64,slowfpvmlx,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v8m,vfp2,vfp2sp") }, + { str_lit("arm1176jz-s"), str_lit("armv6kz,trustzone,v4t,v5t,v5te,v6,v6k") }, + { str_lit("arm1176jzf-s"), str_lit("armv6kz,fp64,fpregs,fpregs64,slowfpvmlx,trustzone,v4t,v5t,v5te,v6,v6k,vfp2,vfp2sp") }, + { str_lit("arm710t"), str_lit("armv4t,v4t") }, + { str_lit("arm720t"), str_lit("armv4t,v4t") }, + { str_lit("arm7tdmi"), str_lit("armv4t,v4t") }, + { str_lit("arm7tdmi-s"), str_lit("armv4t,v4t") }, + { str_lit("arm8"), str_lit("armv4") }, + { str_lit("arm810"), str_lit("armv4") }, + { str_lit("arm9"), str_lit("armv4t,v4t") }, + { str_lit("arm920"), str_lit("armv4t,v4t") }, + { str_lit("arm920t"), str_lit("armv4t,v4t") }, + { str_lit("arm922t"), str_lit("armv4t,v4t") }, + { str_lit("arm926ej-s"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm940t"), str_lit("armv4t,v4t") }, + { str_lit("arm946e-s"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm966e-s"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm968e-s"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm9e"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("arm9tdmi"), str_lit("armv4t,v4t") }, + { str_lit("cortex-a12"), str_lit("a12,aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ret-addr-stack,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vmlx-forwarding") }, + { str_lit("cortex-a15"), str_lit("a15,aclass,armv7-a,avoid-partial-cpsr,d32,db,dont-widen-vmovs,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,muxed-units,neon,perfmon,ret-addr-stack,splat-vfp-neon,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vldn-align") }, + { str_lit("cortex-a17"), str_lit("a17,aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ret-addr-stack,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vmlx-forwarding") }, + { str_lit("cortex-a32"), str_lit("aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a35"), str_lit("a35,aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a5"), str_lit("a5,aclass,armv7-a,d32,db,dsp,fp16,fp64,fpregs,fpregs64,mp,neon,perfmon,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,vmlx-forwarding") }, + { str_lit("cortex-a53"), str_lit("a53,aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpao,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a55"), str_lit("a55,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a57"), str_lit("a57,aclass,acquire-release,aes,armv8-a,avoid-partial-cpsr,cheap-predicable-cpsr,crc,crypto,d32,db,dsp,fix-cortex-a57-aes-1742098,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpao,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a7"), str_lit("a7,aclass,armv7-a,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vmlx-forwarding,vmlx-hazards") }, + { str_lit("cortex-a710"), str_lit("aclass,acquire-release,armv9-a,bf16,cortex-a710,crc,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp16fml,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,i8mm,mp,neon,perfmon,ras,sb,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8m,v9a,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a72"), str_lit("a72,aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fix-cortex-a57-aes-1742098,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a73"), str_lit("a73,aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a75"), str_lit("a75,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a76"), str_lit("a76,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a76ae"), str_lit("a76,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a77"), str_lit("a77,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a78"), str_lit("aclass,acquire-release,aes,armv8.2-a,cortex-a78,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a78c"), str_lit("a78c,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-a8"), str_lit("a8,aclass,armv7-a,d32,db,dsp,fp64,fpregs,fpregs64,neon,nonpipelined-vfp,perfmon,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vmlx-forwarding,vmlx-hazards") }, + { str_lit("cortex-a9"), str_lit("a9,aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,expand-fp-mlx,fp16,fp64,fpregs,fpregs64,mp,muxed-units,neon,neon-fpmovs,perfmon,prefer-vmovsr,ret-addr-stack,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vldn-align,vmlx-forwarding,vmlx-hazards") }, + { str_lit("cortex-m0"), str_lit("armv6-m,db,mclass,no-branch-predictor,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m") }, + { str_lit("cortex-m0plus"), str_lit("armv6-m,db,mclass,no-branch-predictor,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m") }, + { str_lit("cortex-m1"), str_lit("armv6-m,db,mclass,no-branch-predictor,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m") }, + { str_lit("cortex-m23"), str_lit("8msecext,acquire-release,armv8-m.base,db,hwdiv,mclass,no-branch-predictor,no-movt,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m,v7clrex,v8m") }, + { str_lit("cortex-m3"), str_lit("armv7-m,db,hwdiv,loop-align,m3,mclass,no-branch-predictor,noarm,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m") }, + { str_lit("cortex-m33"), str_lit("8msecext,acquire-release,armv8-m.main,db,dsp,fix-cmse-cve-2021-35465,fp-armv8d16sp,fp16,fpregs,hwdiv,loop-align,mclass,no-branch-predictor,noarm,slowfpvfmx,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,v8m.main,vfp2sp,vfp3d16sp,vfp4d16sp") }, + { str_lit("cortex-m35p"), str_lit("8msecext,acquire-release,armv8-m.main,db,dsp,fix-cmse-cve-2021-35465,fp-armv8d16sp,fp16,fpregs,hwdiv,loop-align,mclass,no-branch-predictor,noarm,slowfpvfmx,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,v8m.main,vfp2sp,vfp3d16sp,vfp4d16sp") }, + { str_lit("cortex-m4"), str_lit("armv7e-m,db,dsp,fp16,fpregs,hwdiv,loop-align,mclass,no-branch-predictor,noarm,slowfpvfmx,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2sp,vfp3d16sp,vfp4d16sp") }, + { str_lit("cortex-m55"), str_lit("8msecext,acquire-release,armv8.1-m.main,db,dsp,fix-cmse-cve-2021-35465,fp-armv8d16,fp-armv8d16sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,lob,loop-align,mclass,mve,mve.fp,no-branch-predictor,noarm,ras,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8.1m.main,v8m,v8m.main,vfp2,vfp2sp,vfp3d16,vfp3d16sp,vfp4d16,vfp4d16sp") }, + { str_lit("cortex-m7"), str_lit("armv7e-m,db,dsp,fp-armv8d16,fp-armv8d16sp,fp16,fp64,fpregs,fpregs64,hwdiv,m7,mclass,noarm,thumb-mode,thumb2,use-mipipeliner,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3d16,vfp3d16sp,vfp4d16,vfp4d16sp") }, + { str_lit("cortex-m85"), str_lit("8msecext,acquire-release,armv8.1-m.main,db,dsp,fp-armv8d16,fp-armv8d16sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,lob,mclass,mve,mve.fp,noarm,pacbti,ras,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8.1m.main,v8m,v8m.main,vfp2,vfp2sp,vfp3d16,vfp3d16sp,vfp4d16,vfp4d16sp") }, + { str_lit("cortex-r4"), str_lit("armv7-r,avoid-partial-cpsr,db,dsp,hwdiv,perfmon,r4,rclass,ret-addr-stack,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m") }, + { str_lit("cortex-r4f"), str_lit("armv7-r,avoid-partial-cpsr,db,dsp,fp64,fpregs,fpregs64,hwdiv,perfmon,r4,rclass,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3d16,vfp3d16sp") }, + { str_lit("cortex-r5"), str_lit("armv7-r,avoid-partial-cpsr,db,dsp,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,perfmon,r5,rclass,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3d16,vfp3d16sp") }, + { str_lit("cortex-r52"), str_lit("acquire-release,armv8-r,crc,d32,db,dfb,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpao,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,r52,rclass,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-r7"), str_lit("armv7-r,avoid-partial-cpsr,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,perfmon,r7,rclass,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3d16,vfp3d16sp") }, + { str_lit("cortex-r8"), str_lit("armv7-r,avoid-partial-cpsr,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,perfmon,rclass,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3d16,vfp3d16sp") }, + { str_lit("cortex-x1"), str_lit("aclass,acquire-release,aes,armv8.2-a,cortex-x1,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cortex-x1c"), str_lit("aclass,acquire-release,aes,armv8.2-a,cortex-x1c,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("cyclone"), str_lit("aclass,acquire-release,aes,armv8-a,avoid-movs-shop,avoid-partial-cpsr,crc,crypto,d32,db,disable-postra-scheduler,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,neonfp,perfmon,ret-addr-stack,sha2,slowfpvfmx,slowfpvmlx,swift,thumb2,trustzone,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,zcz") }, + { str_lit("ep9312"), str_lit("armv4t,v4t") }, + { str_lit("exynos-m3"), str_lit("aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dont-widen-vmovs,dsp,expand-fp-mlx,exynos,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,fuse-aes,fuse-literals,hwdiv,hwdiv-arm,mp,neon,perfmon,prof-unpr,ret-addr-stack,sha2,slow-fp-brcc,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,splat-vfp-neon,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,wide-stride-vfp,zcz") }, + { str_lit("exynos-m4"), str_lit("aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dont-widen-vmovs,dotprod,dsp,expand-fp-mlx,exynos,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,fuse-aes,fuse-literals,hwdiv,hwdiv-arm,mp,neon,perfmon,prof-unpr,ras,ret-addr-stack,sha2,slow-fp-brcc,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,splat-vfp-neon,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,wide-stride-vfp,zcz") }, + { str_lit("exynos-m5"), str_lit("aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dont-widen-vmovs,dotprod,dsp,expand-fp-mlx,exynos,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,fuse-aes,fuse-literals,hwdiv,hwdiv-arm,mp,neon,perfmon,prof-unpr,ras,ret-addr-stack,sha2,slow-fp-brcc,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,splat-vfp-neon,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,wide-stride-vfp,zcz") }, + { str_lit("generic"), str_lit("") }, + { str_lit("iwmmxt"), str_lit("armv5te,v4t,v5t,v5te") }, + { str_lit("krait"), str_lit("aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,krait,muxed-units,neon,perfmon,ret-addr-stack,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,vldn-align,vmlx-forwarding") }, + { str_lit("kryo"), str_lit("aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,kryo,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("mpcore"), str_lit("armv6k,fp64,fpregs,fpregs64,slowfpvmlx,v4t,v5t,v5te,v6,v6k,vfp2,vfp2sp") }, + { str_lit("mpcorenovfp"), str_lit("armv6k,v4t,v5t,v5te,v6,v6k") }, + { str_lit("neoverse-n1"), str_lit("aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("neoverse-n2"), str_lit("aclass,acquire-release,armv9-a,bf16,crc,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,i8mm,mp,neon,perfmon,ras,sb,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8m,v9a,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("neoverse-v1"), str_lit("aclass,acquire-release,aes,armv8.4-a,bf16,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,i8mm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8.3a,v8.4a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, + { str_lit("sc000"), str_lit("armv6-m,db,mclass,no-branch-predictor,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m") }, + { str_lit("sc300"), str_lit("armv7-m,db,hwdiv,m3,mclass,no-branch-predictor,noarm,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m") }, + { str_lit("strongarm"), str_lit("armv4") }, + { str_lit("strongarm110"), str_lit("armv4") }, + { str_lit("strongarm1100"), str_lit("armv4") }, + { str_lit("strongarm1110"), str_lit("armv4") }, + { str_lit("swift"), str_lit("aclass,armv7-a,avoid-movs-shop,avoid-partial-cpsr,d32,db,disable-postra-scheduler,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,neonfp,perfmon,prefer-ishst,prof-unpr,ret-addr-stack,slow-load-D-subreg,slow-odd-reg,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,swift,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,vmlx-hazards,wide-stride-vfp") }, + { str_lit("xscale"), str_lit("armv5te,v4t,v5t,v5te") }, + // TargetArch_arm64: + { str_lit("a64fx"), str_lit("CONTEXTIDREL2,a64fx,aggressive-fma,arith-bcc-fusion,ccpp,complxnum,crc,el2vmsa,el3,fp-armv8,fullfp16,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rdm,sha2,store-pair-suppress,sve,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("ampere1"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,aggressive-fma,altnzcv,alu-lsl-fast,am,ampere1,amvs,arith-bcc-fusion,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fptoint,fuse-address,fuse-aes,fuse-literals,i8mm,jsconv,ldp-aligned-only,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,rand,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,stp-aligned-only,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh") }, + { str_lit("ampere1a"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,aggressive-fma,altnzcv,alu-lsl-fast,am,ampere1a,amvs,arith-bcc-fusion,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fptoint,fuse-address,fuse-aes,fuse-literals,i8mm,jsconv,ldp-aligned-only,lor,lse,lse2,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predres,rand,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,sm4,specrestrict,ssbs,store-pair-suppress,stp-aligned-only,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh") }, + { str_lit("apple-a10"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a10,arith-bcc-fusion,arith-cbz-fusion,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,lor,neon,pan,perfmon,rdm,sha2,store-pair-suppress,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a11"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a11,arith-bcc-fusion,arith-cbz-fusion,ccpp,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fullfp16,fuse-aes,fuse-crypto-eor,lor,lse,neon,pan,pan-rwv,perfmon,ras,rdm,sha2,store-pair-suppress,uaops,v8.1a,v8.2a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a12"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a12,arith-bcc-fusion,arith-cbz-fusion,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fullfp16,fuse-aes,fuse-crypto-eor,jsconv,lor,lse,neon,pan,pan-rwv,pauth,perfmon,ras,rcpc,rdm,sha2,store-pair-suppress,uaops,v8.1a,v8.2a,v8.3a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a13"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,am,apple-a13,arith-bcc-fusion,arith-cbz-fusion,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,el2vmsa,el3,flagm,fp-armv8,fp16fml,fullfp16,fuse-aes,fuse-crypto-eor,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,ras,rcpc,rcpc-immo,rdm,sel2,sha2,sha3,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a14"), str_lit("CONTEXTIDREL2,aes,aggressive-fma,alternate-sextload-cvt-f32-pattern,altnzcv,am,apple-a14,arith-bcc-fusion,arith-cbz-fusion,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,el2vmsa,el3,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a15"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a15,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a16"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a16,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,hcx,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a7"), str_lit("aes,alternate-sextload-cvt-f32-pattern,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,neon,perfmon,sha2,store-pair-suppress,v8a,zcm,zcz,zcz-fp-workaround,zcz-gp") }, + { str_lit("apple-a8"), str_lit("aes,alternate-sextload-cvt-f32-pattern,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,neon,perfmon,sha2,store-pair-suppress,v8a,zcm,zcz,zcz-fp-workaround,zcz-gp") }, + { str_lit("apple-a9"), str_lit("aes,alternate-sextload-cvt-f32-pattern,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,neon,perfmon,sha2,store-pair-suppress,v8a,zcm,zcz,zcz-fp-workaround,zcz-gp") }, + { str_lit("apple-latest"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a16,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,hcx,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-m1"), str_lit("CONTEXTIDREL2,aes,aggressive-fma,alternate-sextload-cvt-f32-pattern,altnzcv,am,apple-a14,arith-bcc-fusion,arith-cbz-fusion,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,el2vmsa,el3,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-m2"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a15,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-s4"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a12,arith-bcc-fusion,arith-cbz-fusion,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fullfp16,fuse-aes,fuse-crypto-eor,jsconv,lor,lse,neon,pan,pan-rwv,pauth,perfmon,ras,rcpc,rdm,sha2,store-pair-suppress,uaops,v8.1a,v8.2a,v8.3a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-s5"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a12,arith-bcc-fusion,arith-cbz-fusion,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fullfp16,fuse-aes,fuse-crypto-eor,jsconv,lor,lse,neon,pan,pan-rwv,pauth,perfmon,ras,rcpc,rdm,sha2,store-pair-suppress,uaops,v8.1a,v8.2a,v8.3a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("carmel"), str_lit("CONTEXTIDREL2,aes,carmel,ccpp,crc,crypto,el2vmsa,el3,fp-armv8,fullfp16,lor,lse,neon,pan,pan-rwv,ras,rdm,sha2,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a34"), str_lit("a35,aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,sha2,v8a") }, + { str_lit("cortex-a35"), str_lit("a35,aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,sha2,v8a") }, + { str_lit("cortex-a510"), str_lit("CONTEXTIDREL2,a510,altnzcv,am,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,dit,dotprod,el2vmsa,el3,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("cortex-a53"), str_lit("a53,aes,balance-fp-ops,crc,crypto,el2vmsa,el3,fp-armv8,fuse-adrp-add,fuse-aes,neon,perfmon,sha2,use-postra-scheduler,v8a") }, + { str_lit("cortex-a55"), str_lit("CONTEXTIDREL2,a55,aes,ccpp,crc,crypto,dotprod,el2vmsa,el3,fp-armv8,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,ras,rcpc,rdm,sha2,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a57"), str_lit("a57,aes,balance-fp-ops,crc,crypto,el2vmsa,el3,enable-select-opt,fp-armv8,fuse-adrp-add,fuse-aes,fuse-literals,neon,perfmon,predictable-select-expensive,sha2,use-postra-scheduler,v8a") }, + { str_lit("cortex-a65"), str_lit("CONTEXTIDREL2,a65,aes,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-literals,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,ssbs,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a65ae"), str_lit("CONTEXTIDREL2,a65,aes,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-literals,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,ssbs,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a710"), str_lit("CONTEXTIDREL2,a710,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("cortex-a715"), str_lit("CONTEXTIDREL2,a715,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,spe,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("cortex-a72"), str_lit("a72,aes,crc,crypto,el2vmsa,el3,enable-select-opt,fp-armv8,fuse-adrp-add,fuse-aes,fuse-literals,neon,perfmon,predictable-select-expensive,sha2,v8a") }, + { str_lit("cortex-a73"), str_lit("a73,aes,crc,crypto,el2vmsa,el3,enable-select-opt,fp-armv8,fuse-adrp-add,fuse-aes,neon,perfmon,predictable-select-expensive,sha2,v8a") }, + { str_lit("cortex-a75"), str_lit("CONTEXTIDREL2,a75,aes,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a76"), str_lit("CONTEXTIDREL2,a76,addr-lsl-fast,aes,alu-lsl-fast,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,ssbs,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a76ae"), str_lit("CONTEXTIDREL2,a76,addr-lsl-fast,aes,alu-lsl-fast,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,ssbs,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a77"), str_lit("CONTEXTIDREL2,a77,addr-lsl-fast,aes,alu-lsl-fast,ccpp,cmp-bcc-fusion,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,ssbs,uaops,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a78"), str_lit("CONTEXTIDREL2,a78,addr-lsl-fast,aes,alu-lsl-fast,ccpp,cmp-bcc-fusion,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,spe,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-a78c"), str_lit("CONTEXTIDREL2,a78c,addr-lsl-fast,aes,alu-lsl-fast,ccpp,cmp-bcc-fusion,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,flagm,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,spe,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-r82"), str_lit("CONTEXTIDREL2,ccidx,ccpp,complxnum,cortex-r82,crc,dit,dotprod,flagm,fp-armv8,fp16fml,fullfp16,jsconv,lse,neon,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,specrestrict,ssbs,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8r") }, + { str_lit("cortex-x1"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,ccpp,cmp-bcc-fusion,cortex-x1,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,spe,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-x1c"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,ccpp,cmp-bcc-fusion,cortex-x1,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,flagm,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,lse2,neon,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,ras,rcpc,rcpc-immo,rdm,sha2,spe,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("cortex-x2"), str_lit("CONTEXTIDREL2,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,cortex-x2,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("cortex-x3"), str_lit("CONTEXTIDREL2,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,complxnum,cortex-x3,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,spe,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("cyclone"), str_lit("aes,alternate-sextload-cvt-f32-pattern,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,neon,perfmon,sha2,store-pair-suppress,v8a,zcm,zcz,zcz-fp-workaround,zcz-gp") }, + { str_lit("exynos-m3"), str_lit("addr-lsl-fast,aes,alu-lsl-fast,crc,crypto,el2vmsa,el3,exynos-cheap-as-move,exynosm3,force-32bit-jump-tables,fp-armv8,fuse-address,fuse-adrp-add,fuse-aes,fuse-csel,fuse-literals,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,use-postra-scheduler,v8a") }, + { str_lit("exynos-m4"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,arith-bcc-fusion,arith-cbz-fusion,ccpp,crc,crypto,dotprod,el2vmsa,el3,exynos-cheap-as-move,exynosm4,force-32bit-jump-tables,fp-armv8,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-csel,fuse-literals,lor,lse,neon,pan,pan-rwv,perfmon,ras,rdm,sha2,store-pair-suppress,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh,zcz,zcz-gp") }, + { str_lit("exynos-m5"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,arith-bcc-fusion,arith-cbz-fusion,ccpp,crc,crypto,dotprod,el2vmsa,el3,exynos-cheap-as-move,exynosm4,force-32bit-jump-tables,fp-armv8,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-csel,fuse-literals,lor,lse,neon,pan,pan-rwv,perfmon,ras,rdm,sha2,store-pair-suppress,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh,zcz,zcz-gp") }, + { str_lit("falkor"), str_lit("addr-lsl-fast,aes,alu-lsl-fast,crc,crypto,el2vmsa,el3,falkor,fp-armv8,neon,perfmon,predictable-select-expensive,rdm,sha2,slow-strqro-store,store-pair-suppress,use-postra-scheduler,v8a,zcz,zcz-gp") }, + { str_lit("generic"), str_lit("enable-select-opt,ete,fp-armv8,fuse-adrp-add,fuse-aes,neon,trbe,use-postra-scheduler") }, + { str_lit("kryo"), str_lit("addr-lsl-fast,aes,alu-lsl-fast,crc,crypto,el2vmsa,el3,fp-armv8,kryo,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,use-postra-scheduler,v8a,zcz,zcz-gp") }, + { str_lit("neoverse-512tvb"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,am,bf16,ccdp,ccidx,ccpp,complxnum,crc,crypto,dit,dotprod,el2vmsa,el3,enable-select-opt,flagm,fp-armv8,fp16fml,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mpam,neon,neoverse512tvb,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,rand,ras,rcpc,rcpc-immo,rdm,sel2,sha2,spe,ssbs,sve,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh") }, + { str_lit("neoverse-e1"), str_lit("CONTEXTIDREL2,aes,ccpp,crc,crypto,dotprod,el2vmsa,el3,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,neoversee1,pan,pan-rwv,perfmon,ras,rcpc,rdm,sha2,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("neoverse-n1"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,neoversen1,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,spe,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + { str_lit("neoverse-n2"), str_lit("CONTEXTIDREL2,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,neoversen2,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("neoverse-v1"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,am,bf16,ccdp,ccidx,ccpp,complxnum,crc,crypto,dit,dotprod,el2vmsa,el3,enable-select-opt,flagm,fp-armv8,fp16fml,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mpam,neon,neoversev1,no-sve-fp-ld1r,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,rand,ras,rcpc,rcpc-immo,rdm,sel2,sha2,spe,ssbs,sve,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh") }, + { str_lit("neoverse-v2"), str_lit("CONTEXTIDREL2,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,neoversev2,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,rand,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,spe,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("saphira"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,am,ccidx,ccpp,complxnum,crc,crypto,dit,dotprod,el2vmsa,el3,flagm,fp-armv8,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,ras,rcpc,rcpc-immo,rdm,saphira,sel2,sha2,spe,store-pair-suppress,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh,zcz,zcz-gp") }, + { str_lit("thunderx"), str_lit("aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,thunderx,use-postra-scheduler,v8a") }, + { str_lit("thunderx2t99"), str_lit("CONTEXTIDREL2,aes,aggressive-fma,arith-bcc-fusion,crc,crypto,el2vmsa,el3,fp-armv8,lor,lse,neon,pan,predictable-select-expensive,rdm,sha2,store-pair-suppress,thunderx2t99,use-postra-scheduler,v8.1a,v8a,vh") }, + { str_lit("thunderx3t110"), str_lit("CONTEXTIDREL2,aes,aggressive-fma,arith-bcc-fusion,balance-fp-ops,ccidx,ccpp,complxnum,crc,crypto,el2vmsa,el3,fp-armv8,jsconv,lor,lse,neon,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,store-pair-suppress,strict-align,thunderx3t110,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8a,vh") }, + { str_lit("thunderxt81"), str_lit("aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,thunderxt81,use-postra-scheduler,v8a") }, + { str_lit("thunderxt83"), str_lit("aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,thunderxt83,use-postra-scheduler,v8a") }, + { str_lit("thunderxt88"), str_lit("aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,thunderxt88,use-postra-scheduler,v8a") }, + { str_lit("tsv110"), str_lit("CONTEXTIDREL2,aes,ccpp,complxnum,crc,crypto,dotprod,el2vmsa,el3,fp-armv8,fp16fml,fullfp16,fuse-aes,jsconv,lor,lse,neon,pan,pan-rwv,perfmon,ras,rdm,sha2,spe,store-pair-suppress,tsv110,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, + // TargetArch_wasm32: + { str_lit("bleeding-edge"), str_lit("atomics,bulk-memory,mutable-globals,nontrapping-fptoint,sign-ext,simd128,tail-call") }, + { str_lit("generic"), str_lit("mutable-globals,sign-ext") }, + { str_lit("mvp"), str_lit("") }, + // TargetArch_wasm64p32: + { str_lit("bleeding-edge"), str_lit("atomics,bulk-memory,mutable-globals,nontrapping-fptoint,sign-ext,simd128,tail-call") }, + { str_lit("generic"), str_lit("mutable-globals,sign-ext") }, + { str_lit("mvp"), str_lit("") }, }; gb_global String target_endian_names[TargetEndian_COUNT] = { @@ -443,9 +894,9 @@ struct BuildContext { PtrMap defined_values; - BlockingMutex target_features_mutex; StringSet target_features_set; String target_features_string; + bool strict_target_features; String minimum_os_version_string; bool minimum_os_version_string_given; @@ -1596,48 +2047,53 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta #include "microsoft_craziness.h" #endif +// NOTE: the target feature and microarch lists are all sorted, so if it turns out to be slow (I don't think it will) +// a binary search is possible. -gb_internal Array split_by_comma(String const &list) { - isize n = 1; - for (isize i = 0; i < list.len; i++) { - if (list.text[i] == ',') { - n++; +gb_internal bool check_single_target_feature_is_valid(String const &feature_list, String const &feature) { + String_Iterator it = {feature_list, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (str == feature) { + return true; } } - auto res = array_make(heap_allocator(), n); - String s = list; - for (isize i = 0; i < n; i++) { - isize m = string_index_byte(s, ','); - if (m < 0) { - res[i] = s; - break; + return false; +} + +gb_internal bool check_target_feature_is_valid(String const &feature, TargetArchKind arch, String *invalid) { + String feature_list = target_features_list[arch]; + String_Iterator it = {feature, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (!check_single_target_feature_is_valid(feature_list, str)) { + if (invalid) *invalid = str; + return false; } - res[i] = substring(s, 0, m); - s = substring(s, m+1, s.len); } - return res; -} -gb_internal bool check_target_feature_is_valid(TokenPos pos, String const &feature) { - // TODO(bill): check_target_feature_is_valid return true; } -gb_internal bool check_target_feature_is_enabled(TokenPos pos, String const &target_feature_list) { - BuildContext *bc = &build_context; - mutex_lock(&bc->target_features_mutex); - defer (mutex_unlock(&bc->target_features_mutex)); - - auto items = split_by_comma(target_feature_list); - array_free(&items); - for (String const &item : items) { - if (!check_target_feature_is_valid(pos, item)) { - error(pos, "Target feature '%.*s' is not valid", LIT(item)); - return false; +gb_internal bool check_target_feature_is_valid_globally(String const &feature, String *invalid) { + String_Iterator it = {feature, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + + bool valid = false; + for (int arch = TargetArch_Invalid; arch < TargetArch_COUNT; arch += 1) { + if (check_target_feature_is_valid(str, cast(TargetArchKind)arch, invalid)) { + valid = true; + break; + } } - if (!string_set_exists(&bc->target_features_set, item)) { - error(pos, "Target feature '%.*s' is not enabled", LIT(item)); + + if (!valid) { + if (invalid) *invalid = str; return false; } } @@ -1645,54 +2101,35 @@ gb_internal bool check_target_feature_is_enabled(TokenPos pos, String const &tar return true; } -gb_internal void enable_target_feature(TokenPos pos, String const &target_feature_list) { - BuildContext *bc = &build_context; - mutex_lock(&bc->target_features_mutex); - defer (mutex_unlock(&bc->target_features_mutex)); - - auto items = split_by_comma(target_feature_list); - for (String const &item : items) { - if (!check_target_feature_is_valid(pos, item)) { - error(pos, "Target feature '%.*s' is not valid", LIT(item)); - continue; - } - - string_set_add(&bc->target_features_set, item); - } - array_free(&items); +gb_internal bool check_target_feature_is_valid_for_target_arch(String const &feature, String *invalid) { + return check_target_feature_is_valid(feature, build_context.metrics.arch, invalid); } - -gb_internal char const *target_features_set_to_cstring(gbAllocator allocator, bool with_quotes, bool with_plus) { - isize len = 0; - isize i = 0; - for (String const &feature : build_context.target_features_set) { - if (i != 0) { - len += 1; +gb_internal bool check_target_feature_is_enabled(String const &feature, String *not_enabled) { + String_Iterator it = {feature, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (!string_set_exists(&build_context.target_features_set, str)) { + if (not_enabled) *not_enabled = str; + return false; } - len += feature.len; - if (with_quotes) len += 2; - if (with_plus) len += 1; - i += 1; } - char *features = gb_alloc_array(allocator, char, len+1); - len = 0; - i = 0; - for (String const &feature : build_context.target_features_set) { - if (i != 0) { - features[len++] = ','; - } - if (with_quotes) features[len++] = '"'; - if (with_plus) features[len++] = '+'; - gb_memmove(features + len, feature.text, feature.len); - len += feature.len; - if (with_quotes) features[len++] = '"'; - i += 1; - } - features[len++] = 0; + return true; +} - return features; +gb_internal bool check_target_feature_is_superset_of(String const &superset, String const &of, String *missing) { + String_Iterator it = {of, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (!check_single_target_feature_is_valid(superset, str)) { + if (missing) *missing = str; + return false; + } + } + return true; } // NOTE(Jeroen): Set/create the output and other paths and report an error as appropriate. @@ -1983,10 +2420,6 @@ gb_internal bool init_build_paths(String init_filename) { } } - if (bc->target_features_string.len != 0) { - enable_target_feature({}, bc->target_features_string); - } - return true; } diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index c3c217ec7..825fc6448 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -1719,6 +1719,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As case BuiltinProc_objc_register_selector: case BuiltinProc_objc_register_class: case BuiltinProc_atomic_type_is_lock_free: + case BuiltinProc_has_target_feature: // NOTE(bill): The first arg may be a Type, this will be checked case by case break; @@ -3663,6 +3664,41 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As break; } + case BuiltinProc_has_target_feature: { + String features = str_lit(""); + + check_expr_or_type(c, operand, ce->args[0]); + + if (is_type_string(operand->type) && operand->mode == Addressing_Constant) { + GB_ASSERT(operand->value.kind == ExactValue_String); + features = operand->value.value_string; + } else { + Type *pt = base_type(operand->type); + if (pt->kind == Type_Proc) { + if (pt->Proc.require_target_feature.len != 0) { + GB_ASSERT(pt->Proc.enable_target_feature.len == 0); + features = pt->Proc.require_target_feature; + } else if (pt->Proc.enable_target_feature.len != 0) { + features = pt->Proc.enable_target_feature; + } else { + error(ce->args[0], "Expected the procedure type given to '%.*s' to have @(require_target_feature=\"...\") or @(enable_target_feature=\"...\")", LIT(builtin_name)); + } + } else { + error(ce->args[0], "Expected a constant string or procedure type for '%.*s'", LIT(builtin_name)); + } + } + + String invalid; + if (!check_target_feature_is_valid_globally(features, &invalid)) { + error(ce->args[0], "Target feature '%.*s' is not a valid target feature", LIT(invalid)); + } + + operand->value = exact_value_bool(check_target_feature_is_enabled(features, nullptr)); + operand->mode = Addressing_Constant; + operand->type = t_untyped_bool; + break; + } + case BuiltinProc_soa_struct: { Operand x = {}; Operand y = {}; @@ -6014,7 +6050,10 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As return false; } - enable_target_feature({}, str_lit("atomics")); + if (!check_target_feature_is_enabled(str_lit("atomics"), nullptr)) { + error(call, "'%.*s' requires target feature 'atomics' to be enabled, enable it with -target-features:\"atomics\" or choose a different -microarch", LIT(builtin_name)); + return false; + } Operand ptr = {}; Operand expected = {}; @@ -6068,7 +6107,10 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As return false; } - enable_target_feature({}, str_lit("atomics")); + if (!check_target_feature_is_enabled(str_lit("atomics"), nullptr)) { + error(call, "'%.*s' requires target feature 'atomics' to be enabled, enable it with -target-features:\"atomics\" or choose a different -microarch", LIT(builtin_name)); + return false; + } Operand ptr = {}; Operand waiters = {}; diff --git a/src/check_decl.cpp b/src/check_decl.cpp index 952a877a4..5b9486873 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -886,17 +886,37 @@ gb_internal void check_proc_decl(CheckerContext *ctx, Entity *e, DeclInfo *d) { check_objc_methods(ctx, e, ac); - if (ac.require_target_feature.len != 0 && ac.enable_target_feature.len != 0) { - error(e->token, "Attributes @(require_target_feature=...) and @(enable_target_feature=...) cannot be used together"); - } else if (ac.require_target_feature.len != 0) { - if (check_target_feature_is_enabled(e->token.pos, ac.require_target_feature)) { - e->Procedure.target_feature = ac.require_target_feature; - } else { - e->Procedure.target_feature_disabled = true; + { + if (ac.require_target_feature.len != 0 && ac.enable_target_feature.len != 0) { + error(e->token, "A procedure cannot have both @(require_target_feature=\"...\") and @(enable_target_feature=\"...\")"); + } + + if (build_context.strict_target_features && ac.enable_target_feature.len != 0) { + ac.require_target_feature = ac.enable_target_feature; + ac.enable_target_feature.len = 0; + } + + if (ac.require_target_feature.len != 0) { + pt->require_target_feature = ac.require_target_feature; + String invalid; + if (!check_target_feature_is_valid_globally(ac.require_target_feature, &invalid)) { + error(e->token, "Required target feature '%.*s' is not a valid target feature", LIT(invalid)); + } else if (!check_target_feature_is_enabled(ac.require_target_feature, nullptr)) { + e->flags |= EntityFlag_Disabled; + } + } else if (ac.enable_target_feature.len != 0) { + + // NOTE: disallow wasm, features on that arch are always global to the module. + if (is_arch_wasm()) { + error(e->token, "@(enable_target_feature=\"...\") is not allowed on wasm, features for wasm must be declared globally"); + } + + pt->enable_target_feature = ac.enable_target_feature; + String invalid; + if (!check_target_feature_is_valid_globally(ac.enable_target_feature, &invalid)) { + error(e->token, "Procedure enabled target feature '%.*s' is not a valid target feature", LIT(invalid)); + } } - } else if (ac.enable_target_feature.len != 0) { - enable_target_feature(e->token.pos, ac.enable_target_feature); - e->Procedure.target_feature = ac.enable_target_feature; } switch (e->Procedure.optimization_mode) { @@ -1370,6 +1390,10 @@ gb_internal void check_proc_group_decl(CheckerContext *ctx, Entity *pg_entity, D continue; } + if (p->flags & EntityFlag_Disabled) { + continue; + } + String name = p->token.string; for (isize k = j+1; k < pge->entities.count; k++) { @@ -1387,6 +1411,10 @@ gb_internal void check_proc_group_decl(CheckerContext *ctx, Entity *pg_entity, D ERROR_BLOCK(); + if (q->flags & EntityFlag_Disabled) { + continue; + } + ProcTypeOverloadKind kind = are_proc_types_overload_safe(p->type, q->type); bool both_have_where_clauses = false; if (p->decl_info->proc_lit != nullptr && q->decl_info->proc_lit != nullptr) { @@ -1423,6 +1451,7 @@ gb_internal void check_proc_group_decl(CheckerContext *ctx, Entity *pg_entity, D break; case ProcOverload_ParamCount: case ProcOverload_ParamTypes: + case ProcOverload_TargetFeatures: // This is okay :) break; diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 06d0a8b12..490c9aae7 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -6526,12 +6526,17 @@ gb_internal CallArgumentData check_call_arguments_proc_group(CheckerContext *c, array_add(&proc_entities, proc); } + int max_matched_features = 0; gbString expr_name = expr_to_string(operand->expr); defer (gb_string_free(expr_name)); for_array(i, procs) { Entity *p = procs[i]; + if (p->flags & EntityFlag_Disabled) { + continue; + } + Type *pt = base_type(p->type); if (pt != nullptr && is_type_proc(pt)) { CallArgumentData data = {}; @@ -6562,11 +6567,24 @@ gb_internal CallArgumentData check_call_arguments_proc_group(CheckerContext *c, item.score += assign_score_function(1); } + max_matched_features = gb_max(max_matched_features, matched_target_features(&pt->Proc)); + item.index = index; array_add(&valids, item); } } + if (max_matched_features > 0) { + for_array(i, valids) { + Entity *p = procs[valids[i].index]; + Type *t = base_type(p->type); + GB_ASSERT(t->kind == Type_Proc); + + int matched = matched_target_features(&t->Proc); + valids[i].score += assign_score_function(max_matched_features-matched); + } + } + if (valids.count > 1) { array_sort(valids, valid_index_and_score_cmp); i64 best_score = valids[0].score; @@ -6708,7 +6726,11 @@ gb_internal CallArgumentData check_call_arguments_proc_group(CheckerContext *c, ERROR_BLOCK(); error(operand->expr, "Ambiguous procedure group call '%s' that match with the given arguments", expr_name); - print_argument_types(); + if (positional_operands.count == 0 && named_operands.count == 0) { + error_line("\tNo given arguments\n"); + } else { + print_argument_types(); + } for (auto const &valid : valids) { Entity *proc = proc_entities[valid.index]; @@ -7553,8 +7575,11 @@ gb_internal ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *c } } + bool is_call_inlined = false; + switch (inlining) { case ProcInlining_inline: + is_call_inlined = true; if (proc != nullptr) { Entity *e = entity_from_expr(proc); if (e != nullptr && e->kind == Entity_Procedure) { @@ -7570,6 +7595,47 @@ gb_internal ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *c break; case ProcInlining_no_inline: break; + case ProcInlining_none: + if (proc != nullptr) { + Entity *e = entity_from_expr(proc); + if (e != nullptr && e->kind == Entity_Procedure) { + DeclInfo *decl = e->decl_info; + if (decl->proc_lit) { + ast_node(pl, ProcLit, decl->proc_lit); + if (pl->inlining == ProcInlining_inline) { + is_call_inlined = true; + } + } + } + } + } + + { + String invalid; + if (pt->kind == Type_Proc && pt->Proc.require_target_feature.len != 0) { + if (!check_target_feature_is_valid_for_target_arch(pt->Proc.require_target_feature, &invalid)) { + error(call, "Called procedure requires target feature '%.*s' which is invalid for the build target", LIT(invalid)); + } else if (!check_target_feature_is_enabled(pt->Proc.require_target_feature, &invalid)) { + error(call, "Calling this procedure requires target feature '%.*s' to be enabled", LIT(invalid)); + } + } + + if (pt->kind == Type_Proc && pt->Proc.enable_target_feature.len != 0) { + if (!check_target_feature_is_valid_for_target_arch(pt->Proc.enable_target_feature, &invalid)) { + error(call, "Called procedure enables target feature '%.*s' which is invalid for the build target", LIT(invalid)); + } + + // NOTE: Due to restrictions in LLVM you can not inline calls with a superset of features. + if (is_call_inlined) { + GB_ASSERT(c->curr_proc_decl); + GB_ASSERT(c->curr_proc_decl->entity); + GB_ASSERT(c->curr_proc_decl->entity->type->kind == Type_Proc); + String scope_features = c->curr_proc_decl->entity->type->Proc.enable_target_feature; + if (!check_target_feature_is_superset_of(scope_features, pt->Proc.enable_target_feature, &invalid)) { + error(call, "Inlined procedure enables target feature '%.*s', this requires the calling procedure to at least enable the same feature", LIT(invalid)); + } + } + } } operand->expr = call; diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index c15ec7137..8419c6568 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -44,6 +44,8 @@ enum BuiltinProcId { // "Intrinsics" BuiltinProc_is_package_imported, + BuiltinProc_has_target_feature, + BuiltinProc_transpose, BuiltinProc_outer_product, BuiltinProc_hadamard_product, @@ -354,6 +356,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { // "Intrinsics" {STR_LIT("is_package_imported"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("has_target_feature"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("transpose"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("outer_product"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("hadamard_product"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, diff --git a/src/entity.cpp b/src/entity.cpp index a12e1d0a6..d76d5f441 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -252,10 +252,8 @@ struct Entity { bool is_foreign : 1; bool is_export : 1; bool generated_from_polymorphic : 1; - bool target_feature_disabled : 1; bool entry_point_only : 1; bool has_instrumentation : 1; - String target_feature; } Procedure; struct { Array entities; @@ -502,4 +500,4 @@ gb_internal bool is_entity_local_variable(Entity *e) { return ((e->scope->flags &~ ScopeFlag_ContextDefined) == 0) || (e->scope->flags & ScopeFlag_Proc) != 0; -} \ No newline at end of file +} diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 4b94cf020..fad130b99 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -41,6 +41,37 @@ String get_default_microarchitecture() { return default_march; } +String get_final_microarchitecture() { + BuildContext *bc = &build_context; + + String microarch = bc->microarch; + if (microarch.len == 0) { + microarch = get_default_microarchitecture(); + } else if (microarch == str_lit("native")) { + microarch = make_string_c(LLVMGetHostCPUName()); + } + return microarch; +} + +gb_internal String get_default_features() { + BuildContext *bc = &build_context; + + int off = 0; + for (int i = 0; i < bc->metrics.arch; i += 1) { + off += target_microarch_counts[i]; + } + + String microarch = get_final_microarchitecture(); + for (int i = off; i < off+target_microarch_counts[bc->metrics.arch]; i += 1) { + if (microarch_features_list[i].microarch == microarch) { + return microarch_features_list[i].features; + } + } + + GB_PANIC("unknown microarch"); + return {}; +} + gb_internal void lb_add_foreign_library_path(lbModule *m, Entity *e) { if (e == nullptr) { return; @@ -2468,69 +2499,24 @@ gb_internal bool lb_generate_code(lbGenerator *gen) { code_mode = LLVMCodeModelKernel; } - String host_cpu_name = copy_string(permanent_allocator(), make_string_c(LLVMGetHostCPUName())); - String llvm_cpu = get_default_microarchitecture(); - char const *llvm_features = ""; - if (build_context.microarch.len != 0) { - if (build_context.microarch == "native") { - llvm_cpu = host_cpu_name; - } else { - llvm_cpu = copy_string(permanent_allocator(), build_context.microarch); - } - if (llvm_cpu == host_cpu_name) { - llvm_features = LLVMGetHostCPUFeatures(); + String llvm_cpu = get_final_microarchitecture(); + + gbString llvm_features = gb_string_make(temporary_allocator(), ""); + String_Iterator it = {build_context.target_features_string, 0}; + bool first = true; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (!first) { + llvm_features = gb_string_appendc(llvm_features, ","); } - } + first = false; - // NOTE(Jeroen): Uncomment to get the list of supported microarchitectures. - /* - if (build_context.microarch == "?") { - string_set_add(&build_context.target_features_set, str_lit("+cpuhelp")); + llvm_features = gb_string_appendc(llvm_features, "+"); + llvm_features = gb_string_append_length(llvm_features, str.text, str.len); } - */ - if (build_context.target_features_set.entries.count != 0) { - // Prefix all of the features with a `+`, because we are - // enabling additional features. - char const *additional_features = target_features_set_to_cstring(permanent_allocator(), false, true); - - String f_string = make_string_c(llvm_features); - String a_string = make_string_c(additional_features); - isize f_len = f_string.len; - - if (f_len == 0) { - // The common case is that llvm_features is empty, so - // the target_features_set additions can be used as is. - llvm_features = additional_features; - } else { - // The user probably specified `-microarch:native`, so - // llvm_features is populated by LLVM's idea of what - // the host CPU supports. - // - // As far as I can tell, (which is barely better than - // wild guessing), a bitset is formed by parsing the - // string left to right. - // - // So, llvm_features + ',' + additonal_features, will - // makes the target_features_set override llvm_features. - - char *tmp = gb_alloc_array(permanent_allocator(), char, f_len + 1 + a_string.len + 1); - isize len = 0; - - // tmp = f_string - gb_memmove(tmp, f_string.text, f_string.len); - len += f_string.len; - // tmp += ',' - tmp[len++] = ','; - // tmp += a_string - gb_memmove(tmp + len, a_string.text, a_string.len); - len += a_string.len; - // tmp += NUL - tmp[len++] = 0; - - llvm_features = tmp; - } - } + debugf("CPU: %.*s, Features: %s\n", LIT(llvm_cpu), llvm_features); // GB_ASSERT_MSG(LLVMTargetHasAsmBackend(target)); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index f73698d34..898c9ac31 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -177,17 +177,24 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i break; } - if (!entity->Procedure.target_feature_disabled && - entity->Procedure.target_feature.len != 0) { - auto features = split_by_comma(entity->Procedure.target_feature); - for_array(i, features) { - String feature = features[i]; - LLVMAttributeRef ref = LLVMCreateStringAttribute( - m->ctx, - cast(char const *)feature.text, cast(unsigned)feature.len, - "", 0); - LLVMAddAttributeAtIndex(p->value, LLVMAttributeIndex_FunctionIndex, ref); + if (pt->Proc.enable_target_feature.len != 0) { + gbString feature_str = gb_string_make(temporary_allocator(), ""); + + String_Iterator it = {pt->Proc.enable_target_feature, 0}; + bool first = true; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (!first) { + feature_str = gb_string_appendc(feature_str, ","); + } + first = false; + + feature_str = gb_string_appendc(feature_str, "+"); + feature_str = gb_string_append_length(feature_str, str.text, str.len); } + + lb_add_attribute_to_proc_with_string(m, p->value, make_string_c("target-features"), make_string_c(feature_str)); } if (entity->flags & EntityFlag_Cold) { diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index c01ab0692..db99ebc99 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1708,7 +1708,8 @@ gb_internal lbValue lb_emit_mul_add(lbProcedure *p, lbValue a, lbValue b, lbValu if (is_possible) { switch (build_context.metrics.arch) { case TargetArch_amd64: - if (type_size_of(t) == 2) { + // NOTE: using the intrinsic when not supported causes slow codegen (See #2928). + if (type_size_of(t) == 2 || !check_target_feature_is_enabled(str_lit("fma"), nullptr)) { is_possible = false; } break; diff --git a/src/main.cpp b/src/main.cpp index ee7de7f81..93685acb9 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -272,6 +272,7 @@ enum BuildFlagKind { BuildFlag_ExtraAssemblerFlags, BuildFlag_Microarch, BuildFlag_TargetFeatures, + BuildFlag_StrictTargetFeatures, BuildFlag_MinimumOSVersion, BuildFlag_NoThreadLocal, @@ -467,6 +468,7 @@ gb_internal bool parse_build_flags(Array args) { add_flag(&build_flags, BuildFlag_ExtraAssemblerFlags, str_lit("extra-assembler-flags"), BuildFlagParam_String, Command__does_build); add_flag(&build_flags, BuildFlag_Microarch, str_lit("microarch"), BuildFlagParam_String, Command__does_build); add_flag(&build_flags, BuildFlag_TargetFeatures, str_lit("target-features"), BuildFlagParam_String, Command__does_build); + add_flag(&build_flags, BuildFlag_StrictTargetFeatures, str_lit("strict-target-features"), BuildFlagParam_None, Command__does_build); add_flag(&build_flags, BuildFlag_MinimumOSVersion, str_lit("minimum-os-version"), BuildFlagParam_String, Command__does_build); add_flag(&build_flags, BuildFlag_RelocMode, str_lit("reloc-mode"), BuildFlagParam_String, Command__does_build); @@ -1083,6 +1085,9 @@ gb_internal bool parse_build_flags(Array args) { string_to_lower(&build_context.target_features_string); break; } + case BuildFlag_StrictTargetFeatures: + build_context.strict_target_features = true; + break; case BuildFlag_MinimumOSVersion: { GB_ASSERT(value.kind == ExactValue_String); build_context.minimum_os_version_string = value.value_string; @@ -1981,7 +1986,20 @@ gb_internal void print_show_help(String const arg0, String const &command) { print_usage_line(2, "Examples:"); print_usage_line(3, "-microarch:sandybridge"); print_usage_line(3, "-microarch:native"); - print_usage_line(3, "-microarch:? for a list"); + print_usage_line(3, "-microarch:\"?\" for a list"); + print_usage_line(0, ""); + + print_usage_line(1, "-target-features:"); + print_usage_line(2, "Specifies CPU features to enable on top of the enabled features implied by -microarch."); + print_usage_line(2, "Examples:"); + print_usage_line(3, "-target-features:atomics"); + print_usage_line(3, "-target-features:\"sse2,aes\""); + print_usage_line(3, "-target-features:\"?\" for a list"); + print_usage_line(0, ""); + + print_usage_line(1, "-strict-target-features"); + print_usage_line(2, "Makes @(enable_target_features=\"...\") behave the same way as @(require_target_features=\"...\")."); + print_usage_line(2, "This enforces that all generated code uses features supported by the combination of -target, -microarch, and -target-features."); print_usage_line(0, ""); print_usage_line(1, "-reloc-mode:"); @@ -2663,7 +2681,7 @@ int main(int arg_count, char const **arg_ptr) { // Check chosen microarchitecture. If not found or ?, print list. bool print_microarch_list = true; - if (build_context.microarch.len == 0) { + if (build_context.microarch.len == 0 || build_context.microarch == str_lit("native")) { // Autodetect, no need to print list. print_microarch_list = false; } else { @@ -2680,6 +2698,11 @@ int main(int arg_count, char const **arg_ptr) { } } + // Set and check build paths... + if (!init_build_paths(init_filename)) { + return 1; + } + String default_march = get_default_microarchitecture(); if (print_microarch_list) { if (build_context.microarch != "?") { @@ -2703,13 +2726,57 @@ int main(int arg_count, char const **arg_ptr) { return 0; } - // Set and check build paths... - if (!init_build_paths(init_filename)) { - return 1; + String march = get_final_microarchitecture(); + String default_features = get_default_features(); + { + String_Iterator it = {default_features, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + string_set_add(&build_context.target_features_set, str); + } + } + + if (build_context.target_features_string.len != 0) { + String_Iterator target_it = {build_context.target_features_string, 0}; + for (;;) { + String item = string_split_iterator(&target_it, ','); + if (item == "") break; + + String invalid; + if (!check_target_feature_is_valid_for_target_arch(item, &invalid) && item != str_lit("help")) { + if (item != str_lit("?")) { + gb_printf_err("Unkown target feature '%.*s'.\n", LIT(invalid)); + } + gb_printf("Possible -target-features for target %.*s are:\n", LIT(target_arch_names[build_context.metrics.arch])); + gb_printf("\n"); + + String feature_list = target_features_list[build_context.metrics.arch]; + String_Iterator it = {feature_list, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (check_single_target_feature_is_valid(default_features, str)) { + if (has_ansi_terminal_colours()) { + gb_printf("\t%.*s\x1b[38;5;244m (implied by target microarch %.*s)\x1b[0m\n", LIT(str), LIT(march)); + } else { + gb_printf("\t%.*s (implied by current microarch %.*s)\n", LIT(str), LIT(march)); + } + } else { + gb_printf("\t%.*s\n", LIT(str)); + } + } + + return 1; + } + + string_set_add(&build_context.target_features_set, item); + } } if (build_context.show_debug_messages) { - debugf("Selected microarch: %.*s\n", LIT(default_march)); + debugf("Selected microarch: %.*s\n", LIT(march)); + debugf("Default microarch features: %.*s\n", LIT(default_features)); for_array(i, build_context.build_paths) { String build_path = path_to_string(heap_allocator(), build_context.build_paths[i]); debugf("build_paths[%ld]: %.*s\n", i, LIT(build_path)); diff --git a/src/types.cpp b/src/types.cpp index 18cb12ea1..3ec05059f 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -184,6 +184,8 @@ struct TypeProc { isize specialization_count; ProcCallingConvention calling_convention; i32 variadic_index; + String require_target_feature; + String enable_target_feature; // TODO(bill): Make this a flag set rather than bools bool variadic; bool require_results; @@ -2991,7 +2993,22 @@ gb_internal Type *union_tag_type(Type *u) { return t_uint; } +gb_internal int matched_target_features(TypeProc *t) { + if (t->require_target_feature.len == 0) { + return 0; + } + int matches = 0; + String_Iterator it = {t->require_target_feature, 0}; + for (;;) { + String str = string_split_iterator(&it, ','); + if (str == "") break; + if (check_target_feature_is_valid_for_target_arch(str, nullptr)) { + matches += 1; + } + } + return matches; +} enum ProcTypeOverloadKind { ProcOverload_Identical, // The types are identical @@ -3003,6 +3020,7 @@ enum ProcTypeOverloadKind { ProcOverload_ResultCount, ProcOverload_ResultTypes, ProcOverload_Polymorphic, + ProcOverload_TargetFeatures, ProcOverload_NotProcedure, @@ -3060,6 +3078,10 @@ gb_internal ProcTypeOverloadKind are_proc_types_overload_safe(Type *x, Type *y) } } + if (matched_target_features(&px) != matched_target_features(&py)) { + return ProcOverload_TargetFeatures; + } + if (px.params != nullptr && py.params != nullptr) { Entity *ex = px.params->Tuple.variables[0]; Entity *ey = py.params->Tuple.variables[0]; -- cgit v1.2.3 From 2183140e7162b93066e93b0e65da86b220d45ad0 Mon Sep 17 00:00:00 2001 From: jasonkercher Date: Fri, 10 May 2024 13:24:43 -0400 Subject: arm32 now compiles and runs demo --- base/runtime/internal.odin | 6 +++++- src/bug_report.cpp | 2 +- src/build_settings.cpp | 12 +++++++++++- src/checker.cpp | 4 ++++ src/gb/gb.h | 7 ++++--- src/llvm_backend_proc.cpp | 4 ++-- 6 files changed, 27 insertions(+), 8 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/base/runtime/internal.odin b/base/runtime/internal.odin index 3e9c524bd..aaca1e703 100644 --- a/base/runtime/internal.odin +++ b/base/runtime/internal.odin @@ -801,6 +801,10 @@ truncsfhf2 :: proc "c" (value: f32) -> __float16 { } } +@(link_name="__aeabi_d2h", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE) +aeabi_d2h :: proc "c" (value: f64) -> __float16 { + return truncsfhf2(f32(value)) +} @(link_name="__truncdfhf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE) truncdfhf2 :: proc "c" (value: f64) -> __float16 { @@ -1055,4 +1059,4 @@ __read_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uin dst[j>>3] &~= 1<<(j&7) dst[j>>3] |= the_bit<<(j&7) } -} \ No newline at end of file +} diff --git a/src/bug_report.cpp b/src/bug_report.cpp index 88ab9492c..8c706f493 100644 --- a/src/bug_report.cpp +++ b/src/bug_report.cpp @@ -251,7 +251,7 @@ gb_internal void report_ram_info() { int result = sysinfo(&info); if (result == 0x0) { - gb_printf("%lu MiB\n", info.totalram * info.mem_unit / gb_megabytes(1)); + gb_printf("%lu MiB\n", (unsigned long)(info.totalram * info.mem_unit / gb_megabytes(1))); } else { gb_printf("Unknown.\n"); } diff --git a/src/build_settings.cpp b/src/build_settings.cpp index c6ef33af2..1857520c9 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -972,7 +972,7 @@ gb_global TargetMetrics target_linux_arm32 = { TargetOs_linux, TargetArch_arm32, 4, 4, 4, 8, - str_lit("arm-linux-gnu"), + str_lit("arm-unknown-linux-gnueabihf"), }; gb_global TargetMetrics target_darwin_amd64 = { @@ -1890,6 +1890,16 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta #else metrics = &target_linux_amd64; #endif + #elif defined(GB_CPU_ARM) + #if defined(GB_SYSTEM_WINDOWS) + #error "Build Error: Unsupported architecture" + #elif defined(GB_SYSTEM_OSX) + #error "Build Error: Unsupported architecture" + #elif defined(GB_SYSTEM_FREEBSD) + #error "Build Error: Unsupported architecture" + #else + metrics = &target_linux_arm32; + #endif #else #if defined(GB_SYSTEM_WINDOWS) metrics = &target_windows_i386; diff --git a/src/checker.cpp b/src/checker.cpp index 70ca4fc47..24ed08ad7 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -2643,6 +2643,10 @@ gb_internal void generate_minimum_dependency_set(Checker *c, Entity *start) { str_lit("memmove"), ); + FORCE_ADD_RUNTIME_ENTITIES(build_context.metrics.arch == TargetArch_arm32, + str_lit("aeabi_d2h") + ); + FORCE_ADD_RUNTIME_ENTITIES(is_arch_wasm() && !build_context.tilde_backend, // // Extended data type internal procedures // str_lit("umodti3"), diff --git a/src/gb/gb.h b/src/gb/gb.h index c55ff8a9a..4cea51f18 100644 --- a/src/gb/gb.h +++ b/src/gb/gb.h @@ -6220,11 +6220,12 @@ gb_no_inline isize gb_snprintf_va(char *text, isize max_len, char const *fmt, va #elif defined(__aarch64__) gb_inline u64 gb_rdtsc(void) { int64_t virtual_timer_value; - asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); - return virtual_timer_value; + asm volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value)); + return virtual_timer_value; } #else -#error "gb_rdtsc not supported" +#warning "gb_rdtsc not supported" + gb_inline u64 gb_rdtsc(void) { return 0; } #endif #if defined(GB_SYSTEM_WINDOWS) diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 898c9ac31..3e0e19a18 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2909,7 +2909,6 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu break; case TargetArch_arm32: { - // TODO(bill): Check this is correct GB_ASSERT(arg_count <= 7); char asm_string[] = "svc #0"; @@ -2917,13 +2916,14 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu for (unsigned i = 0; i < arg_count; i++) { constraints = gb_string_appendc(constraints, ",{"); static char const *regs[] = { - "r8", + "r7", "r0", "r1", "r2", "r3", "r4", "r5", + "r6", }; constraints = gb_string_appendc(constraints, regs[i]); constraints = gb_string_appendc(constraints, "}"); -- cgit v1.2.3 From 20752d904b58d59757412bfe6714107f4653222a Mon Sep 17 00:00:00 2001 From: jason Date: Fri, 10 May 2024 23:06:49 -0400 Subject: fix 6 argument syscall on i386 --- src/llvm_backend_proc.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 898c9ac31..736c54e52 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2835,8 +2835,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu { GB_ASSERT(arg_count <= 7); - char asm_string_default[] = "int $$0x80"; - char *asm_string = asm_string_default; + char asm_string[] = "int $$0x80"; gbString constraints = gb_string_make(heap_allocator(), "={eax}"); for (unsigned i = 0; i < gb_min(arg_count, 6); i++) { @@ -2848,16 +2847,11 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu "edx", "esi", "edi", + "ebp", }; constraints = gb_string_appendc(constraints, regs[i]); constraints = gb_string_appendc(constraints, "}"); } - if (arg_count == 7) { - char asm_string7[] = "push %[arg6]\npush %%ebp\nmov 4(%%esp), %%ebp\nint $0x80\npop %%ebp\nadd $4, %%esp"; - asm_string = asm_string7; - - constraints = gb_string_appendc(constraints, ",rm"); - } inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } -- cgit v1.2.3 From 1207d64c64033b0a1cee8eecb70d068a72eddb62 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 19 May 2024 23:55:45 +0100 Subject: Fix `intrinsics.ptr_sub` --- src/llvm_backend_proc.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 736c54e52..b4b5b591b 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2384,9 +2384,10 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu lbValue ptr0 = lb_emit_conv(p, lb_build_expr(p, ce->args[0]), t_uintptr); lbValue ptr1 = lb_emit_conv(p, lb_build_expr(p, ce->args[1]), t_uintptr); + ptr0 = lb_emit_conv(p, ptr0, t_int); + ptr1 = lb_emit_conv(p, ptr1, t_int); - lbValue diff = lb_emit_arith(p, Token_Sub, ptr0, ptr1, t_uintptr); - diff = lb_emit_conv(p, diff, t_int); + lbValue diff = lb_emit_arith(p, Token_Sub, ptr0, ptr1, t_int); return lb_emit_arith(p, Token_Quo, diff, lb_const_int(p->module, t_int, type_size_of(elem)), t_int); } -- cgit v1.2.3 From 2a526058b3cbcd645dd35f3541ba1fc305dd02f9 Mon Sep 17 00:00:00 2001 From: laytan Date: Tue, 4 Jun 2024 20:15:47 +0200 Subject: fix passing pointer to constant in non-odin cc --- src/llvm_backend_proc.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 3b9b1be05..6cb1efab2 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1097,15 +1097,7 @@ gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array c ptr = lb_address_from_load_or_generate_local(p, x); } } else { - if (LLVMIsConstant(x.value)) { - // NOTE(bill): if the value is already constant, then just it as a global variable - // and pass it by pointer - lbAddr addr = lb_add_global_generated(p->module, original_type, x); - lb_make_global_private_const(addr); - ptr = addr.addr; - } else { - ptr = lb_copy_value_to_ptr(p, x, original_type, 16); - } + ptr = lb_copy_value_to_ptr(p, x, original_type, 16); } array_add(&processed_args, ptr); } -- cgit v1.2.3 From 6d862cc4e529b71e65356c73d44fc39c61cbbb77 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sun, 9 Jun 2024 04:43:19 +0200 Subject: fix unreachable hit when param and/or return have complex inits Fixes #3630 --- src/llvm_backend_proc.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 6cb1efab2..87f75fb1d 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -710,13 +710,12 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { lb_set_debug_position_to_procedure_begin(p); if (p->debug_info != nullptr) { if (p->context_stack.count != 0) { + lbBlock *prev_block = p->curr_block; p->curr_block = p->decl_block; lb_add_debug_context_variable(p, lb_find_or_generate_context_ptr(p)); + p->curr_block = prev_block; } - } - - lb_start_block(p, p->entry_block); } gb_internal void lb_end_procedure_body(lbProcedure *p) { -- cgit v1.2.3 From 5b5402fb23d0902810f18f805a72120f5243bddd Mon Sep 17 00:00:00 2001 From: Feoramund <161657516+Feoramund@users.noreply.github.com> Date: Wed, 12 Jun 2024 13:07:13 -0400 Subject: Add `intrinsics.syscall_bsd` This is a BSD-style syscall that checks for a high Carry Flag as the error state. If the CF is high, the boolean return value is false, and if it is low (no errors) then the boolean return value is true. --- base/intrinsics/intrinsics.odin | 2 + core/sys/unix/sysctl_freebsd.odin | 7 +- src/check_builtin.cpp | 58 +++++++++++-- src/checker_builtin_procs.hpp | 4 +- src/llvm_backend_proc.cpp | 178 ++++++++++++++++++++++++++++---------- 5 files changed, 195 insertions(+), 54 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index 4f6fa2713..635c85e6f 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -73,6 +73,8 @@ expect :: proc(val, expected_val: T) -> T --- // Linux and Darwin Only syscall :: proc(id: uintptr, args: ..uintptr) -> uintptr --- +// FreeBSD, NetBSD, et cetera +syscall_bsd :: proc(id: uintptr, args: ..uintptr) -> (uintptr, bool) --- // Atomics diff --git a/core/sys/unix/sysctl_freebsd.odin b/core/sys/unix/sysctl_freebsd.odin index d1acbc2a1..35c5db02c 100644 --- a/core/sys/unix/sysctl_freebsd.odin +++ b/core/sys/unix/sysctl_freebsd.odin @@ -5,14 +5,15 @@ import "base:intrinsics" sysctl :: proc(mib: []i32, val: ^$T) -> (ok: bool) { mib := mib - result_size := i64(size_of(T)) + result_size := u64(size_of(T)) - res := intrinsics.syscall(SYS_sysctl, + res: uintptr + res, ok = intrinsics.syscall_bsd(SYS_sysctl, uintptr(raw_data(mib)), uintptr(len(mib)), uintptr(val), uintptr(&result_size), uintptr(0), uintptr(0), ) - return res == 0 + return } // See /usr/include/sys/sysctl.h for details diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 3aee804df..396a8bd80 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -5089,15 +5089,9 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As isize max_arg_count = 32; switch (build_context.metrics.os) { - case TargetOs_windows: - case TargetOs_freestanding: - error(call, "'%.*s' is not supported on this platform (%.*s)", LIT(builtin_name), LIT(target_os_names[build_context.metrics.os])); - break; case TargetOs_darwin: case TargetOs_linux: case TargetOs_essence: - case TargetOs_freebsd: - case TargetOs_openbsd: case TargetOs_haiku: switch (build_context.metrics.arch) { case TargetArch_i386: @@ -5107,6 +5101,9 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As break; } break; + default: + error(call, "'%.*s' is not supported on this platform (%.*s)", LIT(builtin_name), LIT(target_os_names[build_context.metrics.os])); + break; } if (ce->args.count > max_arg_count) { @@ -5120,6 +5117,55 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As return true; } break; + case BuiltinProc_syscall_bsd: + { + convert_to_typed(c, operand, t_uintptr); + if (!is_type_uintptr(operand->type)) { + gbString t = type_to_string(operand->type); + error(operand->expr, "Argument 0 must be of type 'uintptr', got %s", t); + gb_string_free(t); + } + for (isize i = 1; i < ce->args.count; i++) { + Operand x = {}; + check_expr(c, &x, ce->args[i]); + if (x.mode != Addressing_Invalid) { + convert_to_typed(c, &x, t_uintptr); + } + convert_to_typed(c, &x, t_uintptr); + if (!is_type_uintptr(x.type)) { + gbString t = type_to_string(x.type); + error(x.expr, "Argument %td must be of type 'uintptr', got %s", i, t); + gb_string_free(t); + } + } + + isize max_arg_count = 32; + + switch (build_context.metrics.os) { + case TargetOs_freebsd: + case TargetOs_netbsd: + case TargetOs_openbsd: + switch (build_context.metrics.arch) { + case TargetArch_amd64: + case TargetArch_arm64: + max_arg_count = 7; + break; + } + break; + default: + error(call, "'%.*s' is not supported on this platform (%.*s)", LIT(builtin_name), LIT(target_os_names[build_context.metrics.os])); + break; + } + + if (ce->args.count > max_arg_count) { + error(ast_end_token(call), "'%.*s' has a maximum of %td arguments on this platform (%.*s), got %td", LIT(builtin_name), max_arg_count, LIT(target_os_names[build_context.metrics.os]), ce->args.count); + } + + operand->mode = Addressing_Value; + operand->type = make_optional_ok_type(t_uintptr); + return true; + } + break; case BuiltinProc_type_base_type: diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 35acad42f..a2b8a5361 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -192,6 +192,7 @@ BuiltinProc__simd_end, // Platform specific intrinsics BuiltinProc_syscall, + BuiltinProc_syscall_bsd, BuiltinProc_x86_cpuid, BuiltinProc_x86_xgetbv, @@ -512,7 +513,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT(""), 0, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, - {STR_LIT("syscall"), 1, true, Expr_Expr, BuiltinProcPkg_intrinsics, false, true}, + {STR_LIT("syscall"), 1, true, Expr_Expr, BuiltinProcPkg_intrinsics, false, true}, + {STR_LIT("syscall_bsd"), 1, true, Expr_Expr, BuiltinProcPkg_intrinsics, false, true}, {STR_LIT("x86_cpuid"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("x86_xgetbv"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 87f75fb1d..958b4fd38 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2747,26 +2747,10 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu { GB_ASSERT(arg_count <= 7); - // FreeBSD additionally clobbers r8, r9, r10, but they - // can also be used to pass in arguments, so this needs - // to be handled in two parts. - bool clobber_arg_regs[7] = { - false, false, false, false, false, false, false - }; - if (build_context.metrics.os == TargetOs_freebsd) { - clobber_arg_regs[4] = true; // r10 - clobber_arg_regs[5] = true; // r8 - clobber_arg_regs[6] = true; // r9 - } - char asm_string[] = "syscall"; gbString constraints = gb_string_make(heap_allocator(), "={rax}"); for (unsigned i = 0; i < arg_count; i++) { - if (!clobber_arg_regs[i]) { - constraints = gb_string_appendc(constraints, ",{"); - } else { - constraints = gb_string_appendc(constraints, ",+{"); - } + constraints = gb_string_appendc(constraints, ",{"); static char const *regs[] = { "rax", "rdi", @@ -2790,36 +2774,9 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu // Some but not all system calls will additionally // clobber memory. // - // As a fix for CVE-2019-5595, FreeBSD started - // clobbering R8, R9, and R10, instead of restoring - // them. Additionally unlike Linux, instead of - // returning negative errno, positive errno is - // returned and CF is set. - // // TODO: // * Figure out what Darwin does. - // * Add some extra handling to propagate CF back - // up to the caller on FreeBSD systems so that - // the caller knows that the return value is - // positive errno. constraints = gb_string_appendc(constraints, ",~{rcx},~{r11},~{memory}"); - if (build_context.metrics.os == TargetOs_freebsd) { - // Second half of dealing with FreeBSD's system - // call semantics. Explicitly clobber the registers - // that were not used to pass in arguments, and - // then clobber RFLAGS. - if (arg_count < 5) { - constraints = gb_string_appendc(constraints, ",~{r10}"); - } - if (arg_count < 6) { - constraints = gb_string_appendc(constraints, ",~{r8}"); - } - if (arg_count < 7) { - constraints = gb_string_appendc(constraints, ",~{r9}"); - } - constraints = gb_string_appendc(constraints, ",~{cc}"); - } - inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); } break; @@ -2927,6 +2884,139 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu res.type = t_uintptr; return res; } + case BuiltinProc_syscall_bsd: + { + // This is a BSD-style syscall where errors are indicated by a high + // Carry Flag and a positive return value, allowing the kernel to + // return any value that fits into a machine word. + // + // This is unlike Linux, where errors are indicated by a negative + // return value, limiting what can be expressed in one result. + unsigned arg_count = cast(unsigned)ce->args.count; + LLVMValueRef *args = gb_alloc_array(permanent_allocator(), LLVMValueRef, arg_count); + for_array(i, ce->args) { + lbValue arg = lb_build_expr(p, ce->args[i]); + arg = lb_emit_conv(p, arg, t_uintptr); + args[i] = arg.value; + } + + LLVMTypeRef llvm_uintptr = lb_type(p->module, t_uintptr); + LLVMTypeRef *llvm_arg_types = gb_alloc_array(permanent_allocator(), LLVMTypeRef, arg_count); + for (unsigned i = 0; i < arg_count; i++) { + llvm_arg_types[i] = llvm_uintptr; + } + + LLVMTypeRef *results = gb_alloc_array(permanent_allocator(), LLVMTypeRef, 2); + results[0] = lb_type(p->module, t_uintptr); + results[1] = lb_type(p->module, t_bool); + LLVMTypeRef llvm_results = LLVMStructTypeInContext(p->module->ctx, results, 2, false); + + LLVMTypeRef func_type = LLVMFunctionType(llvm_results, llvm_arg_types, arg_count, false); + + LLVMValueRef inline_asm = nullptr; + + switch (build_context.metrics.arch) { + case TargetArch_amd64: + { + GB_ASSERT(arg_count <= 7); + + char asm_string[] = "syscall; setnb %cl"; + + // Using CL as an output; RCX doesn't need to get clobbered later. + gbString constraints = gb_string_make(heap_allocator(), "={rax},={cl}"); + for (unsigned i = 0; i < arg_count; i++) { + constraints = gb_string_appendc(constraints, ",{"); + static char const *regs[] = { + "rax", + "rdi", + "rsi", + "rdx", + "r10", + "r8", + "r9", + }; + constraints = gb_string_appendc(constraints, regs[i]); + constraints = gb_string_appendc(constraints, "}"); + } + + // NOTE(Feoramund): If you're experiencing instability + // regarding syscalls during optimized builds, it is + // possible that the ABI has changed for your platform, + // or I've missed a register clobber. + // + // Documentation on this topic is sparse, but I was able to + // determine what registers were being clobbered by adding + // dummy values to them, setting a breakpoint after the + // syscall, and checking the state of the registers afterwards. + // + // Be advised that manually stepping through a debugger may + // cause the kernel to not return via sysret, which will + // preserve register state that normally would've been + // otherwise clobbered. + // + // It is also possible that some syscalls clobber different registers. + + if (build_context.metrics.os == TargetOs_freebsd) { + // As a fix for CVE-2019-5595, FreeBSD started + // clobbering R8, R9, and R10, instead of restoring + // them. + // + // More info here: + // + // https://www.freebsd.org/security/advisories/FreeBSD-SA-19:01.syscall.asc + // https://github.com/freebsd/freebsd-src/blob/098dbd7ff7f3da9dda03802cdb2d8755f816eada/sys/amd64/amd64/exception.S#L605 + // https://stackoverflow.com/q/66878250 + constraints = gb_string_appendc(constraints, ",~{r8},~{r9},~{r10}"); + } + + // Both FreeBSD and NetBSD might clobber RDX. + // + // For NetBSD, it was clobbered during a call to sysctl. + // + // For FreeBSD, it's listed as "return value 2" in their + // AMD64 assembly, so there's no guarantee that it will persist. + constraints = gb_string_appendc(constraints, ",~{rdx},~{r11},~{cc},~{memory}"); + + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); + } + break; + case TargetArch_arm64: + { + GB_ASSERT(arg_count <= 7); + + char asm_string[] = "svc #0; cset x8, cc"; + gbString constraints = gb_string_make(heap_allocator(), "={x0},={x8}"); + for (unsigned i = 0; i < arg_count; i++) { + constraints = gb_string_appendc(constraints, ",{"); + static char const *regs[] = { + "x8", + "x0", + "x1", + "x2", + "x3", + "x4", + "x5", + }; + constraints = gb_string_appendc(constraints, regs[i]); + constraints = gb_string_appendc(constraints, "}"); + } + + // FreeBSD clobbered x1 on a call to sysctl. + constraints = gb_string_appendc(constraints, ",~{x1},~{cc},~{memory}"); + + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); + } + break; + default: + GB_PANIC("Unsupported platform"); + } + + lbValue res = {}; + res.value = LLVMBuildCall2(p->builder, func_type, inline_asm, args, arg_count, ""); + res.type = make_optional_ok_type(t_uintptr, true); + + return res; + } case BuiltinProc_objc_send: return lb_handle_objc_send(p, expr); -- cgit v1.2.3 From b79d7e69177ccbc25b75f1aa7a90a62f64c6cee1 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Fri, 28 Jun 2024 01:16:26 +0200 Subject: disallow non-global foreign import of variables on wasm --- src/build_settings.cpp | 4 +++- src/check_decl.cpp | 8 ++++---- src/llvm_backend.cpp | 2 -- src/llvm_backend_proc.cpp | 2 +- src/llvm_backend_utility.cpp | 2 +- src/tilde.cpp | 1 - 6 files changed, 9 insertions(+), 10 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 0495cf3b5..1988e9cac 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -2053,10 +2053,12 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta gbString link_flags = gb_string_make(heap_allocator(), " "); // link_flags = gb_string_appendc(link_flags, "--export-all "); // link_flags = gb_string_appendc(link_flags, "--export-table "); - link_flags = gb_string_appendc(link_flags, "--allow-undefined "); // if (bc->metrics.arch == TargetArch_wasm64) { // link_flags = gb_string_appendc(link_flags, "-mwasm64 "); // } + if (bc->metrics.os != TargetOs_orca) { + link_flags = gb_string_appendc(link_flags, "--allow-undefined "); + } if (bc->no_entry_point || bc->metrics.os == TargetOs_orca) { link_flags = gb_string_appendc(link_flags, "--no-entry "); } diff --git a/src/check_decl.cpp b/src/check_decl.cpp index b5307838c..883cfcba9 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -709,7 +709,7 @@ gb_internal Entity *init_entity_foreign_library(CheckerContext *ctx, Entity *e) } if (ident == nullptr) { - error(e->token, "foreign entiies must declare which library they are from"); + error(e->token, "foreign entities must declare which library they are from"); } else if (ident->kind != Ast_Ident) { error(ident, "foreign library names must be an identifier"); } else { @@ -1320,9 +1320,9 @@ gb_internal void check_global_variable_decl(CheckerContext *ctx, Entity *&e, Ast error(e->token, "A foreign variable declaration cannot have a default value"); } init_entity_foreign_library(ctx, e); - // if (is_arch_wasm()) { - // error(e->token, "A foreign variable declaration are not allowed for the '%.*s' architecture", LIT(target_arch_names[build_context.metrics.arch])); - // } + if (is_arch_wasm() && e->Variable.foreign_library != nullptr) { + error(e->token, "A foreign variable declaration can not be scoped to a module and must be declared in a 'foreign {' (without a library) block"); + } } if (ac.link_name.len > 0) { e->Variable.link_name = ac.link_name; diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index d119633ec..92c8138c5 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -3217,8 +3217,6 @@ gb_internal bool lb_generate_code(lbGenerator *gen) { LLVMSetDLLStorageClass(g.value, LLVMDLLImportStorageClass); LLVMSetExternallyInitialized(g.value, true); lb_add_foreign_library_path(m, e->Variable.foreign_library); - - // lb_set_wasm_import_attributes(g.value, e, name); } else { LLVMSetInitializer(g.value, LLVMConstNull(lb_type(m, e->type))); } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 958b4fd38..68ba4f74e 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -233,7 +233,7 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i if (p->is_foreign) { - lb_set_wasm_import_attributes(p->value, entity, p->name); + lb_set_wasm_procedure_import_attributes(p->value, entity, p->name); } diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 94153e233..98ed0c57e 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -2018,7 +2018,7 @@ gb_internal LLVMValueRef llvm_get_inline_asm(LLVMTypeRef func_type, String const } -gb_internal void lb_set_wasm_import_attributes(LLVMValueRef value, Entity *entity, String import_name) { +gb_internal void lb_set_wasm_procedure_import_attributes(LLVMValueRef value, Entity *entity, String import_name) { if (!is_arch_wasm()) { return; } diff --git a/src/tilde.cpp b/src/tilde.cpp index 4fc7d1c9b..f6fed0f9a 100644 --- a/src/tilde.cpp +++ b/src/tilde.cpp @@ -363,7 +363,6 @@ gb_internal bool cg_global_variables_create(cgModule *m, Array if (is_foreign) { linkage = TB_LINKAGE_PUBLIC; // lb_add_foreign_library_path(m, e->Variable.foreign_library); - // lb_set_wasm_import_attributes(g.value, e, name); } else if (is_export) { linkage = TB_LINKAGE_PUBLIC; } -- cgit v1.2.3 From 1a20b78633038614635da99b5e634015d4ce7d6e Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Mon, 8 Jul 2024 21:06:57 +0200 Subject: remove misleading `@(optimization_mode)` values and make "none" inhibit optimizations --- src/check_decl.cpp | 1 - src/checker.cpp | 12 ++++++------ src/entity.cpp | 4 +--- src/llvm_backend.cpp | 17 +++++------------ src/llvm_backend.hpp | 5 ----- src/llvm_backend_general.cpp | 6 ++++++ src/llvm_backend_proc.cpp | 8 +------- 7 files changed, 19 insertions(+), 34 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/check_decl.cpp b/src/check_decl.cpp index 84c3d9ecc..7d81d102d 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -1030,7 +1030,6 @@ gb_internal void check_proc_decl(CheckerContext *ctx, Entity *e, DeclInfo *d) { switch (e->Procedure.optimization_mode) { case ProcedureOptimizationMode_None: - case ProcedureOptimizationMode_Minimal: if (pl->inlining == ProcInlining_inline) { error(e->token, "#force_inline cannot be used in conjunction with the attribute 'optimization_mode' with neither \"none\" nor \"minimal\""); } diff --git a/src/checker.cpp b/src/checker.cpp index 18b9db6ef..8756cce1a 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -3544,19 +3544,19 @@ gb_internal DECL_ATTRIBUTE_PROC(proc_decl_attribute) { String mode = ev.value_string; if (mode == "none") { ac->optimization_mode = ProcedureOptimizationMode_None; + } else if (mode == "favor_size") { + ac->optimization_mode = ProcedureOptimizationMode_FavorSize; } else if (mode == "minimal") { - ac->optimization_mode = ProcedureOptimizationMode_Minimal; + error(elem, "Invalid optimization_mode 'minimal' for '%.*s', mode has been removed due to confusion, but 'none' has the same behaviour", LIT(name)); } else if (mode == "size") { - ac->optimization_mode = ProcedureOptimizationMode_Size; + error(elem, "Invalid optimization_mode 'size' for '%.*s', mode has been removed due to confusion, but 'favor_size' has the same behaviour", LIT(name)); } else if (mode == "speed") { - ac->optimization_mode = ProcedureOptimizationMode_Speed; + error(elem, "Invalid optimization_mode 'speed' for '%.*s', mode has been removed due to confusion, but 'favor_size' has the same behaviour", LIT(name)); } else { ERROR_BLOCK(); error(elem, "Invalid optimization_mode for '%.*s'. Valid modes:", LIT(name)); error_line("\tnone\n"); - error_line("\tminimal\n"); - error_line("\tsize\n"); - error_line("\tspeed\n"); + error_line("\tfavor_size\n"); } } else { error(elem, "Expected a string for '%.*s'", LIT(name)); diff --git a/src/entity.cpp b/src/entity.cpp index 8f55c1faf..62a190437 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -133,9 +133,7 @@ enum EntityConstantFlags : u32 { enum ProcedureOptimizationMode : u8 { ProcedureOptimizationMode_Default, ProcedureOptimizationMode_None, - ProcedureOptimizationMode_Minimal, - ProcedureOptimizationMode_Size, - ProcedureOptimizationMode_Speed, + ProcedureOptimizationMode_FavorSize, }; diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 8c82d7117..cc9254269 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -1486,10 +1486,6 @@ gb_internal WORKER_TASK_PROC(lb_llvm_function_pass_per_module) { lb_populate_function_pass_manager(m, m->function_pass_managers[lbFunctionPassManager_default], false, build_context.optimization_level); lb_populate_function_pass_manager(m, m->function_pass_managers[lbFunctionPassManager_default_without_memcpy], true, build_context.optimization_level); lb_populate_function_pass_manager_specific(m, m->function_pass_managers[lbFunctionPassManager_none], -1); - lb_populate_function_pass_manager_specific(m, m->function_pass_managers[lbFunctionPassManager_minimal], 0); - lb_populate_function_pass_manager_specific(m, m->function_pass_managers[lbFunctionPassManager_size], 1); - lb_populate_function_pass_manager_specific(m, m->function_pass_managers[lbFunctionPassManager_speed], 2); - lb_populate_function_pass_manager_specific(m, m->function_pass_managers[lbFunctionPassManager_aggressive], 3); for (i32 i = 0; i < lbFunctionPassManager_COUNT; i++) { LLVMFinalizeFunctionPassManager(m->function_pass_managers[i]); @@ -1513,15 +1509,12 @@ gb_internal WORKER_TASK_PROC(lb_llvm_function_pass_per_module) { if (p->entity && p->entity->kind == Entity_Procedure) { switch (p->entity->Procedure.optimization_mode) { case ProcedureOptimizationMode_None: - case ProcedureOptimizationMode_Minimal: - pass_manager_kind = lbFunctionPassManager_minimal; + pass_manager_kind = lbFunctionPassManager_none; + GB_ASSERT(lb_proc_has_attribute(p->module, p->value, "optnone")); + GB_ASSERT(lb_proc_has_attribute(p->module, p->value, "noinline")); break; - case ProcedureOptimizationMode_Size: - pass_manager_kind = lbFunctionPassManager_size; - lb_add_attribute_to_proc(p->module, p->value, "optsize"); - break; - case ProcedureOptimizationMode_Speed: - pass_manager_kind = lbFunctionPassManager_speed; + case ProcedureOptimizationMode_FavorSize: + GB_ASSERT(lb_proc_has_attribute(p->module, p->value, "optsize")); break; } } diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 447e93d42..022f47857 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -134,11 +134,6 @@ enum lbFunctionPassManagerKind { lbFunctionPassManager_default, lbFunctionPassManager_default_without_memcpy, lbFunctionPassManager_none, - lbFunctionPassManager_minimal, - lbFunctionPassManager_size, - lbFunctionPassManager_speed, - lbFunctionPassManager_aggressive, - lbFunctionPassManager_COUNT }; diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index f65bc9ac7..f8a5e1ebf 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -2522,6 +2522,12 @@ gb_internal void lb_add_proc_attribute_at_index(lbProcedure *p, isize index, cha gb_internal void lb_add_attribute_to_proc(lbModule *m, LLVMValueRef proc_value, char const *name, u64 value=0) { LLVMAddAttributeAtIndex(proc_value, LLVMAttributeIndex_FunctionIndex, lb_create_enum_attribute(m->ctx, name, value)); } + +gb_internal bool lb_proc_has_attribute(lbModule *m, LLVMValueRef proc_value, char const *name) { + LLVMAttributeRef ref = LLVMGetEnumAttributeAtIndex(proc_value, LLVMAttributeIndex_FunctionIndex, LLVMGetEnumAttributeKindForName(name, gb_strlen(name))); + return ref != nullptr; +} + gb_internal void lb_add_attribute_to_proc_with_string(lbModule *m, LLVMValueRef proc_value, String const &name, String const &value) { LLVMAttributeRef attr = lb_create_string_attribute(m->ctx, name, value); LLVMAddAttributeAtIndex(proc_value, LLVMAttributeIndex_FunctionIndex, attr); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 68ba4f74e..610c34de2 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -163,16 +163,10 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i switch (entity->Procedure.optimization_mode) { case ProcedureOptimizationMode_None: - break; - case ProcedureOptimizationMode_Minimal: lb_add_attribute_to_proc(m, p->value, "optnone"); lb_add_attribute_to_proc(m, p->value, "noinline"); break; - case ProcedureOptimizationMode_Size: - lb_add_attribute_to_proc(m, p->value, "optsize"); - break; - case ProcedureOptimizationMode_Speed: - // TODO(bill): handle this correctly + case ProcedureOptimizationMode_FavorSize: lb_add_attribute_to_proc(m, p->value, "optsize"); break; } -- cgit v1.2.3 From edc793d7c123a38826860ef72684308902a7012c Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 14 Jul 2024 11:39:05 +0100 Subject: Add `#no_capture args: ..T` to reuse the backing array stack memory --- core/fmt/fmt.odin | 58 +++++++++++++++++++++++------------------------ core/fmt/fmt_js.odin | 24 ++++++++++---------- core/fmt/fmt_os.odin | 24 ++++++++++---------- src/check_expr.cpp | 17 ++++++++++++++ src/check_type.cpp | 17 ++++++++++++++ src/checker.cpp | 1 + src/checker.hpp | 7 ++++++ src/entity.cpp | 2 +- src/llvm_backend.hpp | 7 ++++++ src/llvm_backend_proc.cpp | 27 +++++++++++++++++++++- src/parser.cpp | 1 + src/parser.hpp | 8 +++++-- 12 files changed, 136 insertions(+), 57 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index 234f4afbd..e56211346 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -125,7 +125,7 @@ register_user_formatter :: proc(id: typeid, formatter: User_Formatter) -> Regist // Returns: A formatted string. // @(require_results) -aprint :: proc(args: ..any, sep := " ", allocator := context.allocator) -> string { +aprint :: proc(#no_capture args: ..any, sep := " ", allocator := context.allocator) -> string { str: strings.Builder strings.builder_init(&str, allocator) return sbprint(&str, ..args, sep=sep) @@ -141,7 +141,7 @@ aprint :: proc(args: ..any, sep := " ", allocator := context.allocator) -> strin // Returns: A formatted string with a newline character at the end. // @(require_results) -aprintln :: proc(args: ..any, sep := " ", allocator := context.allocator) -> string { +aprintln :: proc(#no_capture args: ..any, sep := " ", allocator := context.allocator) -> string { str: strings.Builder strings.builder_init(&str, allocator) return sbprintln(&str, ..args, sep=sep) @@ -158,7 +158,7 @@ aprintln :: proc(args: ..any, sep := " ", allocator := context.allocator) -> str // Returns: A formatted string. The returned string must be freed accordingly. // @(require_results) -aprintf :: proc(fmt: string, args: ..any, allocator := context.allocator, newline := false) -> string { +aprintf :: proc(fmt: string, #no_capture args: ..any, allocator := context.allocator, newline := false) -> string { str: strings.Builder strings.builder_init(&str, allocator) return sbprintf(&str, fmt, ..args, newline=newline) @@ -174,7 +174,7 @@ aprintf :: proc(fmt: string, args: ..any, allocator := context.allocator, newlin // Returns: A formatted string. The returned string must be freed accordingly. // @(require_results) -aprintfln :: proc(fmt: string, args: ..any, allocator := context.allocator) -> string { +aprintfln :: proc(fmt: string, #no_capture args: ..any, allocator := context.allocator) -> string { return aprintf(fmt, ..args, allocator=allocator, newline=true) } // Creates a formatted string @@ -188,7 +188,7 @@ aprintfln :: proc(fmt: string, args: ..any, allocator := context.allocator) -> s // Returns: A formatted string. // @(require_results) -tprint :: proc(args: ..any, sep := " ") -> string { +tprint :: proc(#no_capture args: ..any, sep := " ") -> string { str: strings.Builder strings.builder_init(&str, context.temp_allocator) return sbprint(&str, ..args, sep=sep) @@ -204,7 +204,7 @@ tprint :: proc(args: ..any, sep := " ") -> string { // Returns: A formatted string with a newline character at the end. // @(require_results) -tprintln :: proc(args: ..any, sep := " ") -> string { +tprintln :: proc(#no_capture args: ..any, sep := " ") -> string { str: strings.Builder strings.builder_init(&str, context.temp_allocator) return sbprintln(&str, ..args, sep=sep) @@ -221,7 +221,7 @@ tprintln :: proc(args: ..any, sep := " ") -> string { // Returns: A formatted string. // @(require_results) -tprintf :: proc(fmt: string, args: ..any, newline := false) -> string { +tprintf :: proc(fmt: string, #no_capture args: ..any, newline := false) -> string { str: strings.Builder strings.builder_init(&str, context.temp_allocator) return sbprintf(&str, fmt, ..args, newline=newline) @@ -237,7 +237,7 @@ tprintf :: proc(fmt: string, args: ..any, newline := false) -> string { // Returns: A formatted string. // @(require_results) -tprintfln :: proc(fmt: string, args: ..any) -> string { +tprintfln :: proc(fmt: string, #no_capture args: ..any) -> string { return tprintf(fmt, ..args, newline=true) } // Creates a formatted string using a supplied buffer as the backing array. Writes into the buffer. @@ -249,7 +249,7 @@ tprintfln :: proc(fmt: string, args: ..any) -> string { // // Returns: A formatted string // -bprint :: proc(buf: []byte, args: ..any, sep := " ") -> string { +bprint :: proc(buf: []byte, #no_capture args: ..any, sep := " ") -> string { sb := strings.builder_from_bytes(buf) return sbprint(&sb, ..args, sep=sep) } @@ -262,7 +262,7 @@ bprint :: proc(buf: []byte, args: ..any, sep := " ") -> string { // // Returns: A formatted string with a newline character at the end // -bprintln :: proc(buf: []byte, args: ..any, sep := " ") -> string { +bprintln :: proc(buf: []byte, #no_capture args: ..any, sep := " ") -> string { sb := strings.builder_from_bytes(buf) return sbprintln(&sb, ..args, sep=sep) } @@ -276,7 +276,7 @@ bprintln :: proc(buf: []byte, args: ..any, sep := " ") -> string { // // Returns: A formatted string // -bprintf :: proc(buf: []byte, fmt: string, args: ..any, newline := false) -> string { +bprintf :: proc(buf: []byte, fmt: string, #no_capture args: ..any, newline := false) -> string { sb := strings.builder_from_bytes(buf) return sbprintf(&sb, fmt, ..args, newline=newline) } @@ -289,7 +289,7 @@ bprintf :: proc(buf: []byte, fmt: string, args: ..any, newline := false) -> stri // // Returns: A formatted string // -bprintfln :: proc(buf: []byte, fmt: string, args: ..any) -> string { +bprintfln :: proc(buf: []byte, fmt: string, #no_capture args: ..any) -> string { return bprintf(buf, fmt, ..args, newline=true) } // Runtime assertion with a formatted message @@ -301,14 +301,14 @@ bprintfln :: proc(buf: []byte, fmt: string, args: ..any) -> string { // - loc: The location of the caller // @(disabled=ODIN_DISABLE_ASSERT) -assertf :: proc(condition: bool, fmt: string, args: ..any, loc := #caller_location) { +assertf :: proc(condition: bool, fmt: string, #no_capture args: ..any, loc := #caller_location) { if !condition { // NOTE(dragos): We are using the same trick as in builtin.assert // to improve performance to make the CPU not // execute speculatively, making it about an order of // magnitude faster @(cold) - internal :: proc(loc: runtime.Source_Code_Location, fmt: string, args: ..any) { + internal :: proc(loc: runtime.Source_Code_Location, fmt: string, #no_capture args: ..any) { p := context.assertion_failure_proc if p == nil { p = runtime.default_assertion_failure_proc @@ -326,7 +326,7 @@ assertf :: proc(condition: bool, fmt: string, args: ..any, loc := #caller_locati // - args: A variadic list of arguments to be formatted // - loc: The location of the caller // -panicf :: proc(fmt: string, args: ..any, loc := #caller_location) -> ! { +panicf :: proc(fmt: string, #no_capture args: ..any, loc := #caller_location) -> ! { p := context.assertion_failure_proc if p == nil { p = runtime.default_assertion_failure_proc @@ -346,7 +346,7 @@ panicf :: proc(fmt: string, args: ..any, loc := #caller_location) -> ! { // Returns: A formatted C string // @(require_results) -caprintf :: proc(format: string, args: ..any, newline := false) -> cstring { +caprintf :: proc(format: string, #no_capture args: ..any, newline := false) -> cstring { str: strings.Builder strings.builder_init(&str) sbprintf(&str, format, ..args, newline=newline) @@ -365,7 +365,7 @@ caprintf :: proc(format: string, args: ..any, newline := false) -> cstring { // Returns: A formatted C string // @(require_results) -caprintfln :: proc(format: string, args: ..any) -> cstring { +caprintfln :: proc(format: string, #no_capture args: ..any) -> cstring { return caprintf(format, ..args, newline=true) } // Creates a formatted C string @@ -379,7 +379,7 @@ caprintfln :: proc(format: string, args: ..any) -> cstring { // Returns: A formatted C string. // @(require_results) -ctprint :: proc(args: ..any, sep := " ") -> cstring { +ctprint :: proc(#no_capture args: ..any, sep := " ") -> cstring { str: strings.Builder strings.builder_init(&str, context.temp_allocator) sbprint(&str, ..args, sep=sep) @@ -399,7 +399,7 @@ ctprint :: proc(args: ..any, sep := " ") -> cstring { // Returns: A formatted C string // @(require_results) -ctprintf :: proc(format: string, args: ..any, newline := false) -> cstring { +ctprintf :: proc(format: string, #no_capture args: ..any, newline := false) -> cstring { str: strings.Builder strings.builder_init(&str, context.temp_allocator) sbprintf(&str, format, ..args, newline=newline) @@ -418,7 +418,7 @@ ctprintf :: proc(format: string, args: ..any, newline := false) -> cstring { // Returns: A formatted C string // @(require_results) -ctprintfln :: proc(format: string, args: ..any) -> cstring { +ctprintfln :: proc(format: string, #no_capture args: ..any) -> cstring { return ctprintf(format, ..args, newline=true) } // Formats using the default print settings and writes to the given strings.Builder @@ -430,7 +430,7 @@ ctprintfln :: proc(format: string, args: ..any) -> cstring { // // Returns: A formatted string // -sbprint :: proc(buf: ^strings.Builder, args: ..any, sep := " ") -> string { +sbprint :: proc(buf: ^strings.Builder, #no_capture args: ..any, sep := " ") -> string { wprint(strings.to_writer(buf), ..args, sep=sep, flush=true) return strings.to_string(buf^) } @@ -443,7 +443,7 @@ sbprint :: proc(buf: ^strings.Builder, args: ..any, sep := " ") -> string { // // Returns: The resulting formatted string // -sbprintln :: proc(buf: ^strings.Builder, args: ..any, sep := " ") -> string { +sbprintln :: proc(buf: ^strings.Builder, #no_capture args: ..any, sep := " ") -> string { wprintln(strings.to_writer(buf), ..args, sep=sep, flush=true) return strings.to_string(buf^) } @@ -457,7 +457,7 @@ sbprintln :: proc(buf: ^strings.Builder, args: ..any, sep := " ") -> string { // // Returns: The resulting formatted string // -sbprintf :: proc(buf: ^strings.Builder, fmt: string, args: ..any, newline := false) -> string { +sbprintf :: proc(buf: ^strings.Builder, fmt: string, #no_capture args: ..any, newline := false) -> string { wprintf(strings.to_writer(buf), fmt, ..args, flush=true, newline=newline) return strings.to_string(buf^) } @@ -469,7 +469,7 @@ sbprintf :: proc(buf: ^strings.Builder, fmt: string, args: ..any, newline := fal // // Returns: A formatted string // -sbprintfln :: proc(buf: ^strings.Builder, format: string, args: ..any) -> string { +sbprintfln :: proc(buf: ^strings.Builder, format: string, #no_capture args: ..any) -> string { return sbprintf(buf, format, ..args, newline=true) } // Formats and writes to an io.Writer using the default print settings @@ -481,7 +481,7 @@ sbprintfln :: proc(buf: ^strings.Builder, format: string, args: ..any) -> string // // Returns: The number of bytes written // -wprint :: proc(w: io.Writer, args: ..any, sep := " ", flush := true) -> int { +wprint :: proc(w: io.Writer, #no_capture args: ..any, sep := " ", flush := true) -> int { fi: Info fi.writer = w @@ -522,7 +522,7 @@ wprint :: proc(w: io.Writer, args: ..any, sep := " ", flush := true) -> int { // // Returns: The number of bytes written // -wprintln :: proc(w: io.Writer, args: ..any, sep := " ", flush := true) -> int { +wprintln :: proc(w: io.Writer, #no_capture args: ..any, sep := " ", flush := true) -> int { fi: Info fi.writer = w @@ -549,11 +549,11 @@ wprintln :: proc(w: io.Writer, args: ..any, sep := " ", flush := true) -> int { // // Returns: The number of bytes written // -wprintf :: proc(w: io.Writer, fmt: string, args: ..any, flush := true, newline := false) -> int { +wprintf :: proc(w: io.Writer, fmt: string, #no_capture args: ..any, flush := true, newline := false) -> int { MAX_CHECKED_ARGS :: 64 assert(len(args) <= MAX_CHECKED_ARGS, "number of args > 64 is unsupported") - parse_options :: proc(fi: ^Info, fmt: string, index, end: int, unused_args: ^bit_set[0 ..< MAX_CHECKED_ARGS], args: ..any) -> int { + parse_options :: proc(fi: ^Info, fmt: string, index, end: int, unused_args: ^bit_set[0 ..< MAX_CHECKED_ARGS], #no_capture args: ..any) -> int { i := index // Prefix @@ -809,7 +809,7 @@ wprintf :: proc(w: io.Writer, fmt: string, args: ..any, flush := true, newline : // // Returns: The number of bytes written. // -wprintfln :: proc(w: io.Writer, format: string, args: ..any, flush := true) -> int { +wprintfln :: proc(w: io.Writer, format: string, #no_capture args: ..any, flush := true) -> int { return wprintf(w, format, ..args, flush=flush, newline=true) } // Writes a ^runtime.Type_Info value to an io.Writer diff --git a/core/fmt/fmt_js.odin b/core/fmt/fmt_js.odin index acf218eb5..4389b8d87 100644 --- a/core/fmt/fmt_js.odin +++ b/core/fmt/fmt_js.odin @@ -43,7 +43,7 @@ fd_to_writer :: proc(fd: os.Handle, loc := #caller_location) -> io.Writer { } // fprint formats using the default print settings and writes to fd -fprint :: proc(fd: os.Handle, args: ..any, sep := " ", flush := true, loc := #caller_location) -> int { +fprint :: proc(fd: os.Handle, #no_capture args: ..any, sep := " ", flush := true, loc := #caller_location) -> int { buf: [1024]byte b: bufio.Writer defer bufio.writer_flush(&b) @@ -54,7 +54,7 @@ fprint :: proc(fd: os.Handle, args: ..any, sep := " ", flush := true, loc := #ca } // fprintln formats using the default print settings and writes to fd -fprintln :: proc(fd: os.Handle, args: ..any, sep := " ", flush := true, loc := #caller_location) -> int { +fprintln :: proc(fd: os.Handle, #no_capture args: ..any, sep := " ", flush := true, loc := #caller_location) -> int { buf: [1024]byte b: bufio.Writer defer bufio.writer_flush(&b) @@ -66,7 +66,7 @@ fprintln :: proc(fd: os.Handle, args: ..any, sep := " ", flush := true, loc := # } // fprintf formats according to the specified format string and writes to fd -fprintf :: proc(fd: os.Handle, fmt: string, args: ..any, flush := true, newline := false, loc := #caller_location) -> int { +fprintf :: proc(fd: os.Handle, fmt: string, #no_capture args: ..any, flush := true, newline := false, loc := #caller_location) -> int { buf: [1024]byte b: bufio.Writer defer bufio.writer_flush(&b) @@ -78,24 +78,24 @@ fprintf :: proc(fd: os.Handle, fmt: string, args: ..any, flush := true, newline } // fprintfln formats according to the specified format string and writes to fd, followed by a newline. -fprintfln :: proc(fd: os.Handle, fmt: string, args: ..any, flush := true, loc := #caller_location) -> int { +fprintfln :: proc(fd: os.Handle, fmt: string, #no_capture args: ..any, flush := true, loc := #caller_location) -> int { return fprintf(fd, fmt, ..args, flush=flush, newline=true, loc=loc) } // print formats using the default print settings and writes to stdout -print :: proc(args: ..any, sep := " ", flush := true) -> int { return wprint(w=stdout, args=args, sep=sep, flush=flush) } +print :: proc(#no_capture args: ..any, sep := " ", flush := true) -> int { return wprint(w=stdout, args=args, sep=sep, flush=flush) } // println formats using the default print settings and writes to stdout -println :: proc(args: ..any, sep := " ", flush := true) -> int { return wprintln(w=stdout, args=args, sep=sep, flush=flush) } +println :: proc(#no_capture args: ..any, sep := " ", flush := true) -> int { return wprintln(w=stdout, args=args, sep=sep, flush=flush) } // printf formats according to the specififed format string and writes to stdout -printf :: proc(fmt: string, args: ..any, flush := true) -> int { return wprintf(stdout, fmt, ..args, flush=flush) } +printf :: proc(fmt: string, #no_capture args: ..any, flush := true) -> int { return wprintf(stdout, fmt, ..args, flush=flush) } // printfln formats according to the specified format string and writes to stdout, followed by a newline. -printfln :: proc(fmt: string, args: ..any, flush := true) -> int { return wprintf(stdout, fmt, ..args, flush=flush, newline=true) } +printfln :: proc(fmt: string, #no_capture args: ..any, flush := true) -> int { return wprintf(stdout, fmt, ..args, flush=flush, newline=true) } // eprint formats using the default print settings and writes to stderr -eprint :: proc(args: ..any, sep := " ", flush := true) -> int { return wprint(w=stderr, args=args, sep=sep, flush=flush) } +eprint :: proc(#no_capture args: ..any, sep := " ", flush := true) -> int { return wprint(w=stderr, args=args, sep=sep, flush=flush) } // eprintln formats using the default print settings and writes to stderr -eprintln :: proc(args: ..any, sep := " ", flush := true) -> int { return wprintln(w=stderr, args=args, sep=sep, flush=flush) } +eprintln :: proc(#no_capture args: ..any, sep := " ", flush := true) -> int { return wprintln(w=stderr, args=args, sep=sep, flush=flush) } // eprintf formats according to the specififed format string and writes to stderr -eprintf :: proc(fmt: string, args: ..any, flush := true) -> int { return wprintf(stderr, fmt, ..args, flush=flush) } +eprintf :: proc(fmt: string, #no_capture args: ..any, flush := true) -> int { return wprintf(stderr, fmt, ..args, flush=flush) } // eprintfln formats according to the specified format string and writes to stderr, followed by a newline. -eprintfln :: proc(fmt: string, args: ..any, flush := true) -> int { return wprintf(stdout, fmt, ..args, flush=flush, newline=true) } +eprintfln :: proc(fmt: string, #no_capture args: ..any, flush := true) -> int { return wprintf(stdout, fmt, ..args, flush=flush, newline=true) } diff --git a/core/fmt/fmt_os.odin b/core/fmt/fmt_os.odin index 9de0d43be..538f7a08b 100644 --- a/core/fmt/fmt_os.odin +++ b/core/fmt/fmt_os.odin @@ -9,7 +9,7 @@ import "core:io" import "core:bufio" // fprint formats using the default print settings and writes to fd -fprint :: proc(fd: os.Handle, args: ..any, sep := " ", flush := true) -> int { +fprint :: proc(fd: os.Handle, #no_capture args: ..any, sep := " ", flush := true) -> int { buf: [1024]byte b: bufio.Writer defer bufio.writer_flush(&b) @@ -20,7 +20,7 @@ fprint :: proc(fd: os.Handle, args: ..any, sep := " ", flush := true) -> int { } // fprintln formats using the default print settings and writes to fd -fprintln :: proc(fd: os.Handle, args: ..any, sep := " ", flush := true) -> int { +fprintln :: proc(fd: os.Handle, #no_capture args: ..any, sep := " ", flush := true) -> int { buf: [1024]byte b: bufio.Writer defer bufio.writer_flush(&b) @@ -31,7 +31,7 @@ fprintln :: proc(fd: os.Handle, args: ..any, sep := " ", flush := true) -> int { return wprintln(w, ..args, sep=sep, flush=flush) } // fprintf formats according to the specified format string and writes to fd -fprintf :: proc(fd: os.Handle, fmt: string, args: ..any, flush := true, newline := false) -> int { +fprintf :: proc(fd: os.Handle, fmt: string, #no_capture args: ..any, flush := true, newline := false) -> int { buf: [1024]byte b: bufio.Writer defer bufio.writer_flush(&b) @@ -42,7 +42,7 @@ fprintf :: proc(fd: os.Handle, fmt: string, args: ..any, flush := true, newline return wprintf(w, fmt, ..args, flush=flush, newline=newline) } // fprintfln formats according to the specified format string and writes to fd, followed by a newline. -fprintfln :: proc(fd: os.Handle, fmt: string, args: ..any, flush := true) -> int { +fprintfln :: proc(fd: os.Handle, fmt: string, #no_capture args: ..any, flush := true) -> int { return fprintf(fd, fmt, ..args, flush=flush, newline=true) } fprint_type :: proc(fd: os.Handle, info: ^runtime.Type_Info, flush := true) -> (n: int, err: io.Error) { @@ -67,19 +67,19 @@ fprint_typeid :: proc(fd: os.Handle, id: typeid, flush := true) -> (n: int, err: } // print formats using the default print settings and writes to os.stdout -print :: proc(args: ..any, sep := " ", flush := true) -> int { return fprint(os.stdout, ..args, sep=sep, flush=flush) } +print :: proc(#no_capture args: ..any, sep := " ", flush := true) -> int { return fprint(os.stdout, ..args, sep=sep, flush=flush) } // println formats using the default print settings and writes to os.stdout -println :: proc(args: ..any, sep := " ", flush := true) -> int { return fprintln(os.stdout, ..args, sep=sep, flush=flush) } +println :: proc(#no_capture args: ..any, sep := " ", flush := true) -> int { return fprintln(os.stdout, ..args, sep=sep, flush=flush) } // printf formats according to the specified format string and writes to os.stdout -printf :: proc(fmt: string, args: ..any, flush := true) -> int { return fprintf(os.stdout, fmt, ..args, flush=flush) } +printf :: proc(fmt: string, #no_capture args: ..any, flush := true) -> int { return fprintf(os.stdout, fmt, ..args, flush=flush) } // printfln formats according to the specified format string and writes to os.stdout, followed by a newline. -printfln :: proc(fmt: string, args: ..any, flush := true) -> int { return fprintf(os.stdout, fmt, ..args, flush=flush, newline=true) } +printfln :: proc(fmt: string, #no_capture args: ..any, flush := true) -> int { return fprintf(os.stdout, fmt, ..args, flush=flush, newline=true) } // eprint formats using the default print settings and writes to os.stderr -eprint :: proc(args: ..any, sep := " ", flush := true) -> int { return fprint(os.stderr, ..args, sep=sep, flush=flush) } +eprint :: proc(#no_capture args: ..any, sep := " ", flush := true) -> int { return fprint(os.stderr, ..args, sep=sep, flush=flush) } // eprintln formats using the default print settings and writes to os.stderr -eprintln :: proc(args: ..any, sep := " ", flush := true) -> int { return fprintln(os.stderr, ..args, sep=sep, flush=flush) } +eprintln :: proc(#no_capture args: ..any, sep := " ", flush := true) -> int { return fprintln(os.stderr, ..args, sep=sep, flush=flush) } // eprintf formats according to the specified format string and writes to os.stderr -eprintf :: proc(fmt: string, args: ..any, flush := true) -> int { return fprintf(os.stderr, fmt, ..args, flush=flush) } +eprintf :: proc(fmt: string, #no_capture args: ..any, flush := true) -> int { return fprintf(os.stderr, fmt, ..args, flush=flush) } // eprintfln formats according to the specified format string and writes to os.stderr, followed by a newline. -eprintfln :: proc(fmt: string, args: ..any, flush := true) -> int { return fprintf(os.stderr, fmt, ..args, flush=flush, newline=true) } +eprintfln :: proc(fmt: string, #no_capture args: ..any, flush := true) -> int { return fprintf(os.stderr, fmt, ..args, flush=flush, newline=true) } diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 12acca0cb..645d8ac5a 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -6033,6 +6033,23 @@ gb_internal CallArgumentError check_call_arguments_internal(CheckerContext *c, A Entity *vt = pt->params->Tuple.variables[pt->variadic_index]; o.type = vt->type; + + // NOTE(bill, 2024-07-14): minimize the stack usage for variadic parameter that use `#no_capture` + // on the variadic parameter + if (c->decl && (vt->flags & EntityFlag_NoCapture)) { + bool found = false; + for (NoCaptureData &nc : c->decl->no_captures) { + if (are_types_identical(vt->type, nc.slice_type)) { + nc.max_count = gb_max(nc.max_count, variadic_operands.count); + found = true; + break; + } + } + if (!found) { + array_add(&c->decl->no_captures, NoCaptureData{vt->type, variadic_operands.count}); + } + } + } else { dummy_argument_count += 1; o.type = t_untyped_nil; diff --git a/src/check_type.cpp b/src/check_type.cpp index dd8559114..d1c9bb381 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -1953,6 +1953,10 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para error(name, "'#by_ptr' can only be applied to variable fields"); p->flags &= ~FieldFlag_by_ptr; } + if (p->flags&FieldFlag_no_capture) { + error(name, "'#no_capture' can only be applied to variable variadic fields"); + p->flags &= ~FieldFlag_no_capture; + } param = alloc_entity_type_name(scope, name->Ident.token, type, EntityState_Resolved); param->TypeName.is_type_alias = true; @@ -2054,6 +2058,15 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para p->flags &= ~FieldFlag_by_ptr; // Remove the flag } } + if (p->flags&FieldFlag_no_capture) { + if (!(is_variadic && variadic_index == variables.count)) { + error(name, "'#no_capture' can only be applied to a variadic parameter"); + p->flags &= ~FieldFlag_no_capture; + } else if (p->flags & FieldFlag_c_vararg) { + error(name, "'#no_capture' cannot be applied to a #c_vararg parameter"); + p->flags &= ~FieldFlag_no_capture; + } + } if (is_poly_name) { if (p->flags&FieldFlag_no_alias) { @@ -2115,6 +2128,10 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para if (p->flags&FieldFlag_by_ptr) { param->flags |= EntityFlag_ByPtr; } + if (p->flags&FieldFlag_no_capture) { + param->flags |= EntityFlag_NoCapture; + } + param->state = EntityState_Resolved; // NOTE(bill): This should have be resolved whilst determining it add_entity(ctx, scope, name, param); diff --git a/src/checker.cpp b/src/checker.cpp index 8756cce1a..abacc13cb 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -184,6 +184,7 @@ gb_internal void init_decl_info(DeclInfo *d, Scope *scope, DeclInfo *parent) { ptr_set_init(&d->deps, 0); ptr_set_init(&d->type_info_deps, 0); d->labels.allocator = heap_allocator(); + d->no_captures.allocator = heap_allocator(); } gb_internal DeclInfo *make_decl_info(Scope *scope, DeclInfo *parent) { diff --git a/src/checker.hpp b/src/checker.hpp index 781737140..17722f6b6 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -181,6 +181,11 @@ char const *ProcCheckedState_strings[ProcCheckedState_COUNT] { "Checked", }; +struct NoCaptureData { + Type *slice_type; // ..elem_type + isize max_count; +}; + // DeclInfo is used to store information of certain declarations to allow for "any order" usage struct DeclInfo { DeclInfo * parent; // NOTE(bill): only used for procedure literals at the moment @@ -219,6 +224,8 @@ struct DeclInfo { Array labels; + Array no_captures; + // NOTE(bill): this is to prevent a race condition since these procedure literals can be created anywhere at any time struct lbModule *code_gen_module; }; diff --git a/src/entity.cpp b/src/entity.cpp index 41d84e0f7..db6ffdd52 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -45,7 +45,7 @@ enum EntityFlag : u64 { EntityFlag_Value = 1ull<<11, EntityFlag_BitFieldField = 1ull<<12, - + EntityFlag_NoCapture = 1ull<<13, // #no_capture EntityFlag_PolyConst = 1ull<<15, EntityFlag_NotExported = 1ull<<16, diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 005358734..dd1041702 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -296,6 +296,11 @@ enum lbProcedureFlag : u32 { lbProcedureFlag_DebugAllocaCopy = 1<<1, }; +struct lbNoCaptureData { + Type *slice_type; + lbAddr base_array; +}; + struct lbProcedure { u32 flags; u16 state_flags; @@ -336,6 +341,8 @@ struct lbProcedure { bool in_multi_assignment; Array raw_input_parameters; + Array no_captures; + LLVMValueRef temp_callee_return_struct_memory; Ast *curr_stmt; diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 610c34de2..ec244e185 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -517,6 +517,7 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { lb_start_block(p, p->entry_block); map_init(&p->direct_parameters); + p->no_captures.allocator = heap_allocator(); GB_ASSERT(p->type != nullptr); @@ -3450,8 +3451,32 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { } isize slice_len = var_args.count; if (slice_len > 0) { + lbAddr base_array = {}; + if (e->flags & EntityFlag_NoCapture) { + for (lbNoCaptureData const &nc : p->no_captures) { + if (are_types_identical(nc.slice_type, slice_type)) { + base_array = nc.base_array; + break; + } + } + DeclInfo *d = decl_info_of_entity(p->entity); + if (d != nullptr && base_array.addr.value == nullptr) { + for (NoCaptureData const &nc : d->no_captures) { + if (are_types_identical(nc.slice_type, slice_type)) { + base_array = lb_add_local_generated(p, alloc_type_array(elem_type, nc.max_count), true); + array_add(&p->no_captures, lbNoCaptureData{slice_type, base_array}); + break; + } + } + } + } + + if (base_array.addr.value == nullptr) { + base_array = lb_add_local_generated(p, alloc_type_array(elem_type, slice_len), true); + } + GB_ASSERT(base_array.addr.value != nullptr); + lbAddr slice = lb_add_local_generated(p, slice_type, true); - lbAddr base_array = lb_add_local_generated(p, alloc_type_array(elem_type, slice_len), true); for (isize i = 0; i < var_args.count; i++) { lbValue addr = lb_emit_array_epi(p, base_array.addr, cast(i32)i); diff --git a/src/parser.cpp b/src/parser.cpp index 9ce3d563d..a6a146cfd 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -4014,6 +4014,7 @@ struct ParseFieldPrefixMapping { gb_global ParseFieldPrefixMapping const parse_field_prefix_mappings[] = { {str_lit("using"), Token_using, FieldFlag_using}, {str_lit("no_alias"), Token_Hash, FieldFlag_no_alias}, + {str_lit("no_capture"), Token_Hash, FieldFlag_no_capture}, {str_lit("c_vararg"), Token_Hash, FieldFlag_c_vararg}, {str_lit("const"), Token_Hash, FieldFlag_const}, {str_lit("any_int"), Token_Hash, FieldFlag_any_int}, diff --git a/src/parser.hpp b/src/parser.hpp index 86b3393af..15176f789 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -330,9 +330,10 @@ enum FieldFlag : u32 { FieldFlag_subtype = 1<<7, FieldFlag_by_ptr = 1<<8, FieldFlag_no_broadcast = 1<<9, // disallow array programming + FieldFlag_no_capture = 1<<10, // Internal use by the parser only - FieldFlag_Tags = 1<<10, + FieldFlag_Tags = 1<<11, FieldFlag_Results = 1<<16, @@ -340,7 +341,10 @@ enum FieldFlag : u32 { FieldFlag_Invalid = 1u<<31, // Parameter List Restrictions - FieldFlag_Signature = FieldFlag_ellipsis|FieldFlag_using|FieldFlag_no_alias|FieldFlag_c_vararg|FieldFlag_const|FieldFlag_any_int|FieldFlag_by_ptr|FieldFlag_no_broadcast, + FieldFlag_Signature = FieldFlag_ellipsis|FieldFlag_using|FieldFlag_no_alias|FieldFlag_c_vararg| + FieldFlag_const|FieldFlag_any_int|FieldFlag_by_ptr|FieldFlag_no_broadcast| + FieldFlag_no_capture, + FieldFlag_Struct = FieldFlag_using|FieldFlag_subtype|FieldFlag_Tags, }; -- cgit v1.2.3 From 3dff83f3dc2914cdfb9a8f19cf990682cda41b03 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 14 Jul 2024 12:39:30 +0100 Subject: Mock out `#no_capture` for future use --- src/check_expr.cpp | 13 ++++++------- src/check_type.cpp | 32 ++++++++++++++++++++++++-------- src/checker.cpp | 2 +- src/checker.hpp | 4 ++-- src/llvm_abi.cpp | 6 ++++++ src/llvm_backend.hpp | 4 ++-- src/llvm_backend_proc.cpp | 32 +++++++++++++------------------- 7 files changed, 54 insertions(+), 39 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 645d8ac5a..4edd34990 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -6034,19 +6034,18 @@ gb_internal CallArgumentError check_call_arguments_internal(CheckerContext *c, A Entity *vt = pt->params->Tuple.variables[pt->variadic_index]; o.type = vt->type; - // NOTE(bill, 2024-07-14): minimize the stack usage for variadic parameter that use `#no_capture` - // on the variadic parameter - if (c->decl && (vt->flags & EntityFlag_NoCapture)) { + // NOTE(bill, 2024-07-14): minimize the stack usage for variadic parameters with the backing array + if (c->decl) { bool found = false; - for (NoCaptureData &nc : c->decl->no_captures) { - if (are_types_identical(vt->type, nc.slice_type)) { - nc.max_count = gb_max(nc.max_count, variadic_operands.count); + for (auto &vr : c->decl->variadic_reuses) { + if (are_types_identical(vt->type, vr.slice_type)) { + vr.max_count = gb_max(vr.max_count, variadic_operands.count); found = true; break; } } if (!found) { - array_add(&c->decl->no_captures, NoCaptureData{vt->type, variadic_operands.count}); + array_add(&c->decl->variadic_reuses, VariadicReuseData{vt->type, variadic_operands.count}); } } diff --git a/src/check_type.cpp b/src/check_type.cpp index 466b9b3cd..7b75bf503 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -1954,7 +1954,7 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para p->flags &= ~FieldFlag_by_ptr; } if (p->flags&FieldFlag_no_capture) { - error(name, "'#no_capture' can only be applied to variable variadic fields"); + error(name, "'#no_capture' can only be applied to variable fields"); p->flags &= ~FieldFlag_no_capture; } @@ -2059,16 +2059,32 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para } } if (p->flags&FieldFlag_no_capture) { - if (!(is_variadic && variadic_index == variables.count)) { - error(name, "'#no_capture' can only be applied to a variadic parameter"); - p->flags &= ~FieldFlag_no_capture; - } else if (p->flags & FieldFlag_c_vararg) { - error(name, "'#no_capture' cannot be applied to a #c_vararg parameter"); - p->flags &= ~FieldFlag_no_capture; + if (is_variadic && variadic_index == variables.count) { + if (p->flags & FieldFlag_c_vararg) { + error(name, "'#no_capture' cannot be applied to a #c_vararg parameter"); + p->flags &= ~FieldFlag_no_capture; + } else { + error(name, "'#no_capture' is already implied on all variadic parameter"); + } + } else if (is_type_polymorphic(type)) { + // ignore } else { - error(name, "'#no_capture' is already implied on all variadic parameter"); + if (is_type_internally_pointer_like(type)) { + // okay + } else if (is_type_slice(type) || is_type_string(type)) { + // okay + } else if (is_type_dynamic_array(type)) { + // okay + } else { + ERROR_BLOCK(); + error(name, "'#no_capture' can only be applied to pointer-like types, slices, strings, and dynamic arrays"); + error_line("\t'#no_capture' does not currently do anything useful\n"); + p->flags &= ~FieldFlag_no_capture; + } } } + + if (is_poly_name) { if (p->flags&FieldFlag_no_alias) { error(name, "'#no_alias' can only be applied to non constant values"); diff --git a/src/checker.cpp b/src/checker.cpp index abacc13cb..9adf4ef3c 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -184,7 +184,7 @@ gb_internal void init_decl_info(DeclInfo *d, Scope *scope, DeclInfo *parent) { ptr_set_init(&d->deps, 0); ptr_set_init(&d->type_info_deps, 0); d->labels.allocator = heap_allocator(); - d->no_captures.allocator = heap_allocator(); + d->variadic_reuses.allocator = heap_allocator(); } gb_internal DeclInfo *make_decl_info(Scope *scope, DeclInfo *parent) { diff --git a/src/checker.hpp b/src/checker.hpp index 17722f6b6..2fadbe56a 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -181,7 +181,7 @@ char const *ProcCheckedState_strings[ProcCheckedState_COUNT] { "Checked", }; -struct NoCaptureData { +struct VariadicReuseData { Type *slice_type; // ..elem_type isize max_count; }; @@ -224,7 +224,7 @@ struct DeclInfo { Array labels; - Array no_captures; + Array variadic_reuses; // NOTE(bill): this is to prevent a race condition since these procedure literals can be created anywhere at any time struct lbModule *code_gen_module; diff --git a/src/llvm_abi.cpp b/src/llvm_abi.cpp index b2e485d01..9a3479b34 100644 --- a/src/llvm_abi.cpp +++ b/src/llvm_abi.cpp @@ -15,6 +15,7 @@ struct lbArgType { LLVMAttributeRef align_attribute; // Optional i64 byval_alignment; bool is_byval; + bool no_capture; }; @@ -159,6 +160,11 @@ gb_internal void lb_add_function_type_attributes(LLVMValueRef fn, lbFunctionType LLVMAddAttributeAtIndex(fn, arg_index+1, arg->align_attribute); } + if (arg->no_capture) { + LLVMAddAttributeAtIndex(fn, arg_index+1, nocapture_attr); + } + + if (ft->multiple_return_original_type) { if (ft->original_arg_count <= i) { LLVMAddAttributeAtIndex(fn, arg_index+1, noalias_attr); diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 71fa1dbd0..24494e2af 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -296,7 +296,7 @@ enum lbProcedureFlag : u32 { lbProcedureFlag_DebugAllocaCopy = 1<<1, }; -struct lbNoCaptureData { +struct lbVariadicReuseData { Type *slice_type; lbAddr base_array; }; @@ -341,7 +341,7 @@ struct lbProcedure { bool in_multi_assignment; Array raw_input_parameters; - Array no_captures; + Array variadic_reuses; LLVMValueRef temp_callee_return_struct_memory; Ast *curr_stmt; diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index ec244e185..1585df865 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -517,7 +517,7 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { lb_start_block(p, p->entry_block); map_init(&p->direct_parameters); - p->no_captures.allocator = heap_allocator(); + p->variadic_reuses.allocator = heap_allocator(); GB_ASSERT(p->type != nullptr); @@ -3452,28 +3452,22 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { isize slice_len = var_args.count; if (slice_len > 0) { lbAddr base_array = {}; - if (e->flags & EntityFlag_NoCapture) { - for (lbNoCaptureData const &nc : p->no_captures) { - if (are_types_identical(nc.slice_type, slice_type)) { - base_array = nc.base_array; - break; - } + for (auto const &vr : p->variadic_reuses) { + if (are_types_identical(vr.slice_type, slice_type)) { + base_array = vr.base_array; + break; } - DeclInfo *d = decl_info_of_entity(p->entity); - if (d != nullptr && base_array.addr.value == nullptr) { - for (NoCaptureData const &nc : d->no_captures) { - if (are_types_identical(nc.slice_type, slice_type)) { - base_array = lb_add_local_generated(p, alloc_type_array(elem_type, nc.max_count), true); - array_add(&p->no_captures, lbNoCaptureData{slice_type, base_array}); - break; - } + } + DeclInfo *d = decl_info_of_entity(p->entity); + if (d != nullptr && base_array.addr.value == nullptr) { + for (auto const &vr : d->variadic_reuses) { + if (are_types_identical(vr.slice_type, slice_type)) { + base_array = lb_add_local_generated(p, alloc_type_array(elem_type, vr.max_count), true); + array_add(&p->variadic_reuses, lbVariadicReuseData{slice_type, base_array}); + break; } } } - - if (base_array.addr.value == nullptr) { - base_array = lb_add_local_generated(p, alloc_type_array(elem_type, slice_len), true); - } GB_ASSERT(base_array.addr.value != nullptr); lbAddr slice = lb_add_local_generated(p, slice_type, true); -- cgit v1.2.3 From 5ce6676914f5daadf42613574d4700c2750275de Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 14 Jul 2024 12:41:16 +0100 Subject: Make `#no_capture` map to `nocapture` --- src/llvm_backend_proc.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 1585df865..825434c31 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -253,6 +253,9 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i if (e->flags&EntityFlag_NoAlias) { lb_add_proc_attribute_at_index(p, offset+parameter_index, "noalias"); } + if (e->flags&EntityFlag_NoCapture) { + lb_add_proc_attribute_at_index(p, offset+parameter_index, "nocapture"); + } parameter_index += 1; } } -- cgit v1.2.3 From 3533094cc2fc8cae8229b9887efae4541ccd278b Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 14 Jul 2024 12:44:13 +0100 Subject: Restrict `#no_capture` to pointer-like types only --- src/check_type.cpp | 6 +----- src/llvm_backend_proc.cpp | 4 +++- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/check_type.cpp b/src/check_type.cpp index 7b75bf503..d0dddb62b 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -2071,13 +2071,9 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para } else { if (is_type_internally_pointer_like(type)) { // okay - } else if (is_type_slice(type) || is_type_string(type)) { - // okay - } else if (is_type_dynamic_array(type)) { - // okay } else { ERROR_BLOCK(); - error(name, "'#no_capture' can only be applied to pointer-like types, slices, strings, and dynamic arrays"); + error(name, "'#no_capture' can only be applied to pointer-like types"); error_line("\t'#no_capture' does not currently do anything useful\n"); p->flags &= ~FieldFlag_no_capture; } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 825434c31..272ffb474 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -254,7 +254,9 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i lb_add_proc_attribute_at_index(p, offset+parameter_index, "noalias"); } if (e->flags&EntityFlag_NoCapture) { - lb_add_proc_attribute_at_index(p, offset+parameter_index, "nocapture"); + if (is_type_internally_pointer_like(e->type)) { + lb_add_proc_attribute_at_index(p, offset+parameter_index, "nocapture"); + } } parameter_index += 1; } -- cgit v1.2.3 From 5027c7081e02cc208a354cbad2fbeb90ac297b03 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 14 Jul 2024 12:50:33 +0100 Subject: Reuse slice variable for variadic parameters --- src/llvm_backend.hpp | 1 + src/llvm_backend_proc.cpp | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 24494e2af..100748038 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -299,6 +299,7 @@ enum lbProcedureFlag : u32 { struct lbVariadicReuseData { Type *slice_type; lbAddr base_array; + lbAddr slice_addr; }; struct lbProcedure { diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 272ffb474..bc85b14c2 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -3457,9 +3457,12 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { isize slice_len = var_args.count; if (slice_len > 0) { lbAddr base_array = {}; + lbAddr slice = {}; + for (auto const &vr : p->variadic_reuses) { if (are_types_identical(vr.slice_type, slice_type)) { base_array = vr.base_array; + slice = vr.slice_addr; break; } } @@ -3468,14 +3471,15 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { for (auto const &vr : d->variadic_reuses) { if (are_types_identical(vr.slice_type, slice_type)) { base_array = lb_add_local_generated(p, alloc_type_array(elem_type, vr.max_count), true); - array_add(&p->variadic_reuses, lbVariadicReuseData{slice_type, base_array}); + slice = lb_add_local_generated(p, slice_type, true); + array_add(&p->variadic_reuses, lbVariadicReuseData{slice_type, base_array, slice}); break; } } } GB_ASSERT(base_array.addr.value != nullptr); + GB_ASSERT(slice.addr.value != nullptr); - lbAddr slice = lb_add_local_generated(p, slice_type, true); for (isize i = 0; i < var_args.count; i++) { lbValue addr = lb_emit_array_epi(p, base_array.addr, cast(i32)i); -- cgit v1.2.3 From 6959554040d85597300ab2ce6c25852d18e61923 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 14 Jul 2024 13:44:47 +0100 Subject: Calculate size and alignment, and reuse memory for all variadic calls within a procedure body --- src/check_decl.cpp | 9 +++++++++ src/checker.cpp | 2 ++ src/checker.hpp | 4 +++- src/llvm_backend.hpp | 6 +++--- src/llvm_backend_proc.cpp | 25 +++++++++++++++++-------- 5 files changed, 34 insertions(+), 12 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/check_decl.cpp b/src/check_decl.cpp index 7d81d102d..6828774e4 100644 --- a/src/check_decl.cpp +++ b/src/check_decl.cpp @@ -1869,5 +1869,14 @@ gb_internal bool check_proc_body(CheckerContext *ctx_, Token token, DeclInfo *de add_deps_from_child_to_parent(decl); + for (VariadicReuseData const &vr : decl->variadic_reuses) { + GB_ASSERT(vr.slice_type->kind == Type_Slice); + Type *elem = vr.slice_type->Slice.elem; + i64 size = type_size_of(elem); + i64 align = type_align_of(elem); + decl->variadic_reuse_max_bytes = gb_max(decl->variadic_reuse_max_bytes, size*vr.max_count); + decl->variadic_reuse_max_align = gb_max(decl->variadic_reuse_max_align, align); + } + return true; } diff --git a/src/checker.cpp b/src/checker.cpp index 9adf4ef3c..336440d32 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -185,6 +185,8 @@ gb_internal void init_decl_info(DeclInfo *d, Scope *scope, DeclInfo *parent) { ptr_set_init(&d->type_info_deps, 0); d->labels.allocator = heap_allocator(); d->variadic_reuses.allocator = heap_allocator(); + d->variadic_reuse_max_bytes = 0; + d->variadic_reuse_max_align = 1; } gb_internal DeclInfo *make_decl_info(Scope *scope, DeclInfo *parent) { diff --git a/src/checker.hpp b/src/checker.hpp index 2fadbe56a..d76e4c7d0 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -183,7 +183,7 @@ char const *ProcCheckedState_strings[ProcCheckedState_COUNT] { struct VariadicReuseData { Type *slice_type; // ..elem_type - isize max_count; + i64 max_count; }; // DeclInfo is used to store information of certain declarations to allow for "any order" usage @@ -225,6 +225,8 @@ struct DeclInfo { Array labels; Array variadic_reuses; + i64 variadic_reuse_max_bytes; + i64 variadic_reuse_max_align; // NOTE(bill): this is to prevent a race condition since these procedure literals can be created anywhere at any time struct lbModule *code_gen_module; diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 100748038..deb05528f 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -296,9 +296,8 @@ enum lbProcedureFlag : u32 { lbProcedureFlag_DebugAllocaCopy = 1<<1, }; -struct lbVariadicReuseData { +struct lbVariadicReuseSlices { Type *slice_type; - lbAddr base_array; lbAddr slice_addr; }; @@ -342,7 +341,8 @@ struct lbProcedure { bool in_multi_assignment; Array raw_input_parameters; - Array variadic_reuses; + Array variadic_reuses; + lbAddr variadic_reuse_base_array_ptr; LLVMValueRef temp_callee_return_struct_memory; Ast *curr_stmt; diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index bc85b14c2..7a895fbdd 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -3456,39 +3456,48 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { } isize slice_len = var_args.count; if (slice_len > 0) { - lbAddr base_array = {}; lbAddr slice = {}; for (auto const &vr : p->variadic_reuses) { if (are_types_identical(vr.slice_type, slice_type)) { - base_array = vr.base_array; slice = vr.slice_addr; break; } } + DeclInfo *d = decl_info_of_entity(p->entity); - if (d != nullptr && base_array.addr.value == nullptr) { + if (d != nullptr && slice.addr.value == nullptr) { for (auto const &vr : d->variadic_reuses) { if (are_types_identical(vr.slice_type, slice_type)) { - base_array = lb_add_local_generated(p, alloc_type_array(elem_type, vr.max_count), true); slice = lb_add_local_generated(p, slice_type, true); - array_add(&p->variadic_reuses, lbVariadicReuseData{slice_type, base_array, slice}); + array_add(&p->variadic_reuses, lbVariadicReuseSlices{slice_type, slice}); break; } } } - GB_ASSERT(base_array.addr.value != nullptr); + + lbValue base_array_ptr = p->variadic_reuse_base_array_ptr.addr; + if (d != nullptr && base_array_ptr.value == nullptr) { + i64 max_bytes = d->variadic_reuse_max_bytes; + i64 max_align = gb_max(d->variadic_reuse_max_align, 16); + p->variadic_reuse_base_array_ptr = lb_add_local_generated(p, alloc_type_array(t_u8, max_bytes), true); + lb_try_update_alignment(p->variadic_reuse_base_array_ptr.addr, cast(unsigned)max_align); + base_array_ptr = p->variadic_reuse_base_array_ptr.addr; + } + + GB_ASSERT(base_array_ptr.value != nullptr); GB_ASSERT(slice.addr.value != nullptr); + base_array_ptr = lb_emit_conv(p, base_array_ptr, alloc_type_pointer(alloc_type_array(elem_type, slice_len))); for (isize i = 0; i < var_args.count; i++) { - lbValue addr = lb_emit_array_epi(p, base_array.addr, cast(i32)i); + lbValue addr = lb_emit_array_epi(p, base_array_ptr, cast(i32)i); lbValue var_arg = var_args[i]; var_arg = lb_emit_conv(p, var_arg, elem_type); lb_emit_store(p, addr, var_arg); } - lbValue base_elem = lb_emit_array_epi(p, base_array.addr, 0); + lbValue base_elem = lb_emit_array_epi(p, base_array_ptr, 0); lbValue len = lb_const_int(p->module, t_int, slice_len); lb_fill_slice(p, slice, base_elem, len); -- cgit v1.2.3 From 2e0c5fefdedbeb1d9008a6f05a5c73c9fca7602f Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 14 Jul 2024 14:07:36 +0100 Subject: Reuse the slice value too for variadic parameters (LLVM >= 13) --- src/llvm_backend_proc.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 7a895fbdd..5270d6c30 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -3469,7 +3469,18 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { if (d != nullptr && slice.addr.value == nullptr) { for (auto const &vr : d->variadic_reuses) { if (are_types_identical(vr.slice_type, slice_type)) { + #if LLVM_VERSION_MAJOR >= 13 + // NOTE(bill): No point wasting even more memory, just reuse this stack variable too + if (p->variadic_reuses.count > 0) { + slice = p->variadic_reuses[0].slice_addr; + } else { + slice = lb_add_local_generated(p, slice_type, true); + } + // NOTE(bill): Change the underlying type to match the specific type + slice.addr.type = alloc_type_pointer(slice_type); + #else slice = lb_add_local_generated(p, slice_type, true); + #endif array_add(&p->variadic_reuses, lbVariadicReuseSlices{slice_type, slice}); break; } -- cgit v1.2.3 From f657055f12c546767bfd74de6b6d2b2329dd8ea7 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 14 Jul 2024 23:36:54 +0100 Subject: Add `slice` variable if not exists --- src/llvm_backend_proc.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 5270d6c30..001a6f27c 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -3496,6 +3496,10 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { base_array_ptr = p->variadic_reuse_base_array_ptr.addr; } + if (slice.addr.value == nullptr) { + slice = lb_add_local_generated(p, slice_type, true); + } + GB_ASSERT(base_array_ptr.value != nullptr); GB_ASSERT(slice.addr.value != nullptr); -- cgit v1.2.3 From 432388ac7fed8e295fab14d6a7c22f8bf888d2df Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 15 Jul 2024 02:42:28 +0100 Subject: Generate backing array in the case where there is no `DeclInfo` for the procedure body --- src/llvm_backend_proc.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 001a6f27c..eefe1c422 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -3488,12 +3488,16 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { } lbValue base_array_ptr = p->variadic_reuse_base_array_ptr.addr; - if (d != nullptr && base_array_ptr.value == nullptr) { - i64 max_bytes = d->variadic_reuse_max_bytes; - i64 max_align = gb_max(d->variadic_reuse_max_align, 16); - p->variadic_reuse_base_array_ptr = lb_add_local_generated(p, alloc_type_array(t_u8, max_bytes), true); - lb_try_update_alignment(p->variadic_reuse_base_array_ptr.addr, cast(unsigned)max_align); - base_array_ptr = p->variadic_reuse_base_array_ptr.addr; + if (base_array_ptr.value == nullptr) { + if (d != nullptr) { + i64 max_bytes = d->variadic_reuse_max_bytes; + i64 max_align = gb_max(d->variadic_reuse_max_align, 16); + p->variadic_reuse_base_array_ptr = lb_add_local_generated(p, alloc_type_array(t_u8, max_bytes), true); + lb_try_update_alignment(p->variadic_reuse_base_array_ptr.addr, cast(unsigned)max_align); + base_array_ptr = p->variadic_reuse_base_array_ptr.addr; + } else { + base_array_ptr = lb_add_local_generated(p, alloc_type_array(elem_type, slice_len), true).addr; + } } if (slice.addr.value == nullptr) { -- cgit v1.2.3 From 1e37eaf54daf885636ea3ad9606a2b54e01721f9 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 15 Jul 2024 14:49:20 +0100 Subject: Begin work for `bit_set[...; [N]T]` (not working) --- src/build_settings.cpp | 2 + src/check_expr.cpp | 6 ++- src/check_type.cpp | 21 ++------ src/llvm_backend_const.cpp | 2 + src/llvm_backend_expr.cpp | 120 ++++++++++++++++++++++++++++++++++--------- src/llvm_backend_general.cpp | 2 + src/llvm_backend_proc.cpp | 5 ++ src/main.cpp | 5 ++ src/types.cpp | 21 ++++++++ 9 files changed, 141 insertions(+), 43 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/build_settings.cpp b/src/build_settings.cpp index e0e7810e6..49bb83b22 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -440,6 +440,8 @@ struct BuildContext { bool cached; BuildCacheData build_cache_data; + bool internal_no_inline; + bool no_threaded_checker; bool show_debug_messages; diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 3fcfe29f5..01ff9da5b 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -9947,10 +9947,14 @@ gb_internal ExprKind check_compound_literal(CheckerContext *c, Operand *o, Ast * } Type *et = base_type(t->BitSet.elem); isize field_count = 0; - if (et->kind == Type_Enum) { + if (et != nullptr && et->kind == Type_Enum) { field_count = et->Enum.fields.count; } + if (is_type_array(bit_set_to_int(t))) { + is_constant = false; + } + if (cl->elems[0]->kind == Ast_FieldValue) { error(cl->elems[0], "'field = value' in a bit_set a literal is not allowed"); is_constant = false; diff --git a/src/check_type.cpp b/src/check_type.cpp index fea937e4e..e3609970a 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -939,22 +939,6 @@ gb_internal void check_enum_type(CheckerContext *ctx, Type *enum_type, Type *nam enum_type->Enum.max_value_index = max_value_index; } -gb_internal bool is_valid_bit_field_backing_type(Type *type) { - if (type == nullptr) { - return false; - } - type = base_type(type); - if (is_type_untyped(type)) { - return false; - } - if (is_type_integer(type)) { - return true; - } - if (type->kind == Type_Array) { - return is_type_integer(type->Array.elem); - } - return false; -} gb_internal void check_bit_field_type(CheckerContext *ctx, Type *bit_field_type, Type *named_type, Ast *node) { ast_node(bf, BitFieldType, node); @@ -1268,11 +1252,14 @@ gb_internal void check_bit_set_type(CheckerContext *c, Type *type, Type *named_t Type *t = default_type(lhs.type); if (bs->underlying != nullptr) { Type *u = check_type(c, bs->underlying); + // if (!is_valid_bit_field_backing_type(u)) { if (!is_type_integer(u)) { gbString ts = type_to_string(u); error(bs->underlying, "Expected an underlying integer for the bit set, got %s", ts); gb_string_free(ts); - return; + if (!is_valid_bit_field_backing_type(u)) { + return; + } } type->BitSet.underlying = u; } diff --git a/src/llvm_backend_const.cpp b/src/llvm_backend_const.cpp index 5d9caeba1..12bcc4e1f 100644 --- a/src/llvm_backend_const.cpp +++ b/src/llvm_backend_const.cpp @@ -434,6 +434,8 @@ gb_internal LLVMValueRef lb_big_int_to_llvm(lbModule *m, Type *original_type, Bi } } + GB_ASSERT(!is_type_array(original_type)); + LLVMValueRef value = LLVMConstIntOfArbitraryPrecision(lb_type(m, original_type), cast(unsigned)((sz+7)/8), cast(u64 *)rop); if (big_int_is_neg(a)) { value = LLVMConstNeg(value); diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index bcacc0537..dfb7e162e 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -296,12 +296,6 @@ gb_internal bool lb_try_direct_vector_arith(lbProcedure *p, TokenKind op, lbValu GB_ASSERT(vector_type0 == vector_type1); LLVMTypeRef vector_type = vector_type0; - LLVMValueRef lhs_vp = LLVMBuildPointerCast(p->builder, lhs_ptr.value, LLVMPointerType(vector_type, 0), ""); - LLVMValueRef rhs_vp = LLVMBuildPointerCast(p->builder, rhs_ptr.value, LLVMPointerType(vector_type, 0), ""); - LLVMValueRef x = LLVMBuildLoad2(p->builder, vector_type, lhs_vp, ""); - LLVMValueRef y = LLVMBuildLoad2(p->builder, vector_type, rhs_vp, ""); - LLVMValueRef z = nullptr; - Type *integral_type = base_type(elem_type); if (is_type_simd_vector(integral_type)) { integral_type = core_array_type(integral_type); @@ -311,8 +305,18 @@ gb_internal bool lb_try_direct_vector_arith(lbProcedure *p, TokenKind op, lbValu case Token_Add: op = Token_Or; break; case Token_Sub: op = Token_AndNot; break; } + Type *u = bit_set_to_int(type); + if (is_type_array(u)) { + return false; + } } + LLVMValueRef lhs_vp = LLVMBuildPointerCast(p->builder, lhs_ptr.value, LLVMPointerType(vector_type, 0), ""); + LLVMValueRef rhs_vp = LLVMBuildPointerCast(p->builder, rhs_ptr.value, LLVMPointerType(vector_type, 0), ""); + LLVMValueRef x = LLVMBuildLoad2(p->builder, vector_type, lhs_vp, ""); + LLVMValueRef y = LLVMBuildLoad2(p->builder, vector_type, rhs_vp, ""); + LLVMValueRef z = nullptr; + if (is_type_float(integral_type)) { switch (op) { case Token_Add: @@ -1286,6 +1290,14 @@ handle_op:; case Token_Add: op = Token_Or; break; case Token_Sub: op = Token_AndNot; break; } + Type *u = bit_set_to_int(type); + if (is_type_array(u)) { + lhs.type = u; + rhs.type = u; + res = lb_emit_arith(p, op, lhs, rhs, u); + res.type = type; + return res; + } } Type *integral_type = type; @@ -1441,6 +1453,7 @@ gb_internal lbValue lb_build_binary_in(lbProcedure *p, lbValue left, lbValue rig GB_ASSERT(are_types_identical(left.type, key_type)); Type *it = bit_set_to_int(rt); + left = lb_emit_conv(p, left, it); if (is_type_different_to_arch_endianness(it)) { left = lb_emit_byte_swap(p, left, integer_endian_type_to_platform_type(it)); @@ -2054,6 +2067,26 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { } } + // bit_set <-> backing type + if (is_type_bit_set(src)) { + Type *backing = bit_set_to_int(src); + if (are_types_identical(backing, dst)) { + lbValue res = {}; + res.type = t; + res.value = value.value; + return res; + } + } + if (is_type_bit_set(dst)) { + Type *backing = bit_set_to_int(dst); + if (are_types_identical(src, backing)) { + lbValue res = {}; + res.type = t; + res.value = value.value; + return res; + } + } + // Pointer <-> uintptr if (is_type_pointer(src) && is_type_uintptr(dst)) { @@ -2951,13 +2984,32 @@ gb_internal lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind, case Type_Pointer: case Type_MultiPointer: case Type_Proc: - case Type_BitSet: if (op_kind == Token_CmpEq) { res.value = LLVMBuildIsNull(p->builder, x.value, ""); } else if (op_kind == Token_NotEq) { res.value = LLVMBuildIsNotNull(p->builder, x.value, ""); } return res; + case Type_BitSet: + { + Type *u = bit_set_to_int(bt); + if (is_type_array(u)) { + auto args = array_make(permanent_allocator(), 2); + lbValue lhs = lb_address_from_load_or_generate_local(p, x); + args[0] = lb_emit_conv(p, lhs, t_rawptr); + args[1] = lb_const_int(p->module, t_int, type_size_of(t)); + lbValue val = lb_emit_runtime_call(p, "memory_compare_zero", args); + lbValue res = lb_emit_comp(p, op_kind, val, lb_const_int(p->module, t_int, 0)); + return res; + } else { + if (op_kind == Token_CmpEq) { + res.value = LLVMBuildIsNull(p->builder, x.value, ""); + } else if (op_kind == Token_NotEq) { + res.value = LLVMBuildIsNotNull(p->builder, x.value, ""); + } + } + return res; + } case Type_Slice: { @@ -4878,29 +4930,47 @@ gb_internal lbAddr lb_build_addr_compound_lit(lbProcedure *p, Ast *expr) { case Type_BitSet: { i64 sz = type_size_of(type); if (cl->elems.count > 0 && sz > 0) { - lb_addr_store(p, v, lb_const_value(p->module, type, exact_value_compound(expr))); - lbValue lower = lb_const_value(p->module, t_int, exact_value_i64(bt->BitSet.lower)); - for (Ast *elem : cl->elems) { - GB_ASSERT(elem->kind != Ast_FieldValue); - if (lb_is_elem_const(elem, et)) { - continue; + Type *backing = bit_set_to_int(type); + if (is_type_array(backing)) { + GB_PANIC("TODO: bit_set [N]T"); + Type *base_it = core_array_type(backing); + i64 bits_per_elem = 8*type_size_of(base_it); + gb_unused(bits_per_elem); + lbValue one = lb_const_value(p->module, t_i64, exact_value_i64(1)); + for (Ast *elem : cl->elems) { + GB_ASSERT(elem->kind != Ast_FieldValue); + lbValue expr = lb_build_expr(p, elem); + GB_ASSERT(expr.type->kind != Type_Tuple); + + lbValue e = lb_emit_conv(p, expr, t_i64); + e = lb_emit_arith(p, Token_Sub, e, lower, t_i64); + // lbValue idx = lb_emit_arith(p, Token_Div, e, bits_per_elem, t_i64); + // lbValue val = lb_emit_arith(p, Token_Div, e, bits_per_elem, t_i64); } - - lbValue expr = lb_build_expr(p, elem); - GB_ASSERT(expr.type->kind != Type_Tuple); - + } else { Type *it = bit_set_to_int(bt); lbValue one = lb_const_value(p->module, it, exact_value_i64(1)); - lbValue e = lb_emit_conv(p, expr, it); - e = lb_emit_arith(p, Token_Sub, e, lower, it); - e = lb_emit_arith(p, Token_Shl, one, e, it); - - lbValue old_value = lb_emit_transmute(p, lb_addr_load(p, v), it); - lbValue new_value = lb_emit_arith(p, Token_Or, old_value, e, it); - new_value = lb_emit_transmute(p, new_value, type); - lb_addr_store(p, v, new_value); + for (Ast *elem : cl->elems) { + GB_ASSERT(elem->kind != Ast_FieldValue); + + if (lb_is_elem_const(elem, et)) { + continue; + } + + lbValue expr = lb_build_expr(p, elem); + GB_ASSERT(expr.type->kind != Type_Tuple); + + lbValue e = lb_emit_conv(p, expr, it); + e = lb_emit_arith(p, Token_Sub, e, lower, it); + e = lb_emit_arith(p, Token_Shl, one, e, it); + + lbValue old_value = lb_emit_transmute(p, lb_addr_load(p, v), it); + lbValue new_value = lb_emit_arith(p, Token_Or, old_value, e, it); + new_value = lb_emit_transmute(p, new_value, type); + lb_addr_store(p, v, new_value); + } } } break; diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index bb04fc746..a91c1d1fe 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1023,6 +1023,8 @@ gb_internal void lb_emit_store(lbProcedure *p, lbValue ptr, lbValue value) { LLVMTypeRef rawptr_type = lb_type(p->module, t_rawptr); LLVMTypeRef rawptr_ptr_type = LLVMPointerType(rawptr_type, 0); LLVMBuildStore(p->builder, LLVMConstNull(rawptr_type), LLVMBuildBitCast(p->builder, ptr.value, rawptr_ptr_type, "")); + } else if (is_type_bit_set(a)) { + lb_mem_zero_ptr(p, ptr.value, a, 1); } else if (lb_sizeof(src_t) <= lb_max_zero_init_size()) { LLVMBuildStore(p->builder, LLVMConstNull(src_t), ptr.value); } else { diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index eefe1c422..4ee4fb769 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -159,6 +159,11 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i case ProcInlining_no_inline: lb_add_attribute_to_proc(m, p->value, "noinline"); break; + default: + if (build_context.internal_no_inline) { + lb_add_attribute_to_proc(m, p->value, "noinline"); + break; + } } switch (entity->Procedure.optimization_mode) { diff --git a/src/main.cpp b/src/main.cpp index 0c3ef1399..e7f4ccc0a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -394,6 +394,7 @@ enum BuildFlagKind { BuildFlag_InternalIgnorePanic, BuildFlag_InternalModulePerFile, BuildFlag_InternalCached, + BuildFlag_InternalNoInline, BuildFlag_Tilde, @@ -598,6 +599,7 @@ gb_internal bool parse_build_flags(Array args) { add_flag(&build_flags, BuildFlag_InternalIgnorePanic, str_lit("internal-ignore-panic"), BuildFlagParam_None, Command_all); add_flag(&build_flags, BuildFlag_InternalModulePerFile, str_lit("internal-module-per-file"), BuildFlagParam_None, Command_all); add_flag(&build_flags, BuildFlag_InternalCached, str_lit("internal-cached"), BuildFlagParam_None, Command_all); + add_flag(&build_flags, BuildFlag_InternalNoInline, str_lit("internal-no-inline"), BuildFlagParam_None, Command_all); #if ALLOW_TILDE add_flag(&build_flags, BuildFlag_Tilde, str_lit("tilde"), BuildFlagParam_None, Command__does_build); @@ -1422,6 +1424,9 @@ gb_internal bool parse_build_flags(Array args) { build_context.cached = true; build_context.use_separate_modules = true; break; + case BuildFlag_InternalNoInline: + build_context.internal_no_inline = true; + break; case BuildFlag_Tilde: build_context.tilde_backend = true; diff --git a/src/types.cpp b/src/types.cpp index fdc174d81..3f86d4c50 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -2011,6 +2011,24 @@ gb_internal bool is_type_valid_bit_set_elem(Type *t) { return false; } + +gb_internal bool is_valid_bit_field_backing_type(Type *type) { + if (type == nullptr) { + return false; + } + type = base_type(type); + if (is_type_untyped(type)) { + return false; + } + if (is_type_integer(type)) { + return true; + } + if (type->kind == Type_Array) { + return is_type_integer(type->Array.elem); + } + return false; +} + gb_internal Type *bit_set_to_int(Type *t) { GB_ASSERT(is_type_bit_set(t)); Type *bt = base_type(t); @@ -2018,6 +2036,9 @@ gb_internal Type *bit_set_to_int(Type *t) { if (underlying != nullptr && is_type_integer(underlying)) { return underlying; } + if (underlying != nullptr && is_valid_bit_field_backing_type(underlying)) { + return underlying; + } i64 sz = type_size_of(t); switch (sz) { -- cgit v1.2.3 From 23ca27f40b956b03499e2095a04b252722e34bb6 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 16 Jul 2024 00:48:17 +0100 Subject: Add intrinsics `add_sat` and `sub_sat` --- src/check_builtin.cpp | 2 ++ src/checker_builtin_procs.hpp | 6 ++++++ src/llvm_backend_proc.cpp | 6 ++++++ 3 files changed, 14 insertions(+) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index db62b2bdb..26de3a112 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -4261,6 +4261,8 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As case BuiltinProc_overflow_add: case BuiltinProc_overflow_sub: case BuiltinProc_overflow_mul: + case BuiltinProc_add_sat: + case BuiltinProc_sub_sat: { Operand x = {}; Operand y = {}; diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index a90c52e61..3a2e1ce22 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -70,6 +70,9 @@ enum BuiltinProcId { BuiltinProc_overflow_sub, BuiltinProc_overflow_mul, + BuiltinProc_add_sat, + BuiltinProc_sub_sat, + BuiltinProc_sqrt, BuiltinProc_fused_mul_add, @@ -393,6 +396,9 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("overflow_sub"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("overflow_mul"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("add_sat"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("sub_sat"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("sqrt"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("fused_mul_add"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics}, diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 4ee4fb769..bdc381bc4 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2236,6 +2236,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu case BuiltinProc_overflow_add: case BuiltinProc_overflow_sub: case BuiltinProc_overflow_mul: + case BuiltinProc_add_sat: + case BuiltinProc_sub_sat: { Type *main_type = tv.type; Type *type = main_type; @@ -2254,12 +2256,16 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu case BuiltinProc_overflow_add: name = "llvm.uadd.with.overflow"; break; case BuiltinProc_overflow_sub: name = "llvm.usub.with.overflow"; break; case BuiltinProc_overflow_mul: name = "llvm.umul.with.overflow"; break; + case BuiltinProc_add_sat: name = "llvm.uadd.sat"; break; + case BuiltinProc_sub_sat: name = "llvm.usub.sat"; break; } } else { switch (id) { case BuiltinProc_overflow_add: name = "llvm.sadd.with.overflow"; break; case BuiltinProc_overflow_sub: name = "llvm.ssub.with.overflow"; break; case BuiltinProc_overflow_mul: name = "llvm.smul.with.overflow"; break; + case BuiltinProc_add_sat: name = "llvm.sadd.sat"; break; + case BuiltinProc_sub_sat: name = "llvm.ssub.sat"; break; } } LLVMTypeRef types[1] = {lb_type(p->module, type)}; -- cgit v1.2.3 From 853487e86cd1bede0c5179ed1ba796aad2200f39 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Tue, 16 Jul 2024 22:07:49 +0200 Subject: fix `add_sat` and `sub_sat` intrinsics --- base/intrinsics/intrinsics.odin | 9 ++++++--- src/check_builtin.cpp | 43 ++++++++++++++++++++++++++++++++++++++++- src/llvm_backend_proc.cpp | 42 ++++++++++++++++++++++++++++++++++------ 3 files changed, 84 insertions(+), 10 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index 8a16ca40e..047373cda 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -38,9 +38,12 @@ count_leading_zeros :: proc(x: $T) -> T where type_is_integer(T) || type_is_sim reverse_bits :: proc(x: $T) -> T where type_is_integer(T) || type_is_simd_vector(T) --- byte_swap :: proc(x: $T) -> T where type_is_integer(T) || type_is_float(T) --- -overflow_add :: proc(lhs, rhs: $T) -> (T, bool) --- -overflow_sub :: proc(lhs, rhs: $T) -> (T, bool) --- -overflow_mul :: proc(lhs, rhs: $T) -> (T, bool) --- +overflow_add :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) --- +overflow_sub :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) --- +overflow_mul :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) --- + +add_sat :: proc(lhs, rhs: $T) -> T where type_is_integer(T) --- +sub_sat :: proc(lhs, rhs: $T) -> T where type_is_integer(T) --- sqrt :: proc(x: $T) -> T where type_is_float(T) || (type_is_simd_vector(T) && type_is_float(type_elem_type(T))) --- diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 26de3a112..b6b1f9874 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -4261,6 +4261,47 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As case BuiltinProc_overflow_add: case BuiltinProc_overflow_sub: case BuiltinProc_overflow_mul: + { + Operand x = {}; + Operand y = {}; + check_expr(c, &x, ce->args[0]); + check_expr(c, &y, ce->args[1]); + if (x.mode == Addressing_Invalid) { + return false; + } + if (y.mode == Addressing_Invalid) { + return false; + } + convert_to_typed(c, &y, x.type); if (y.mode == Addressing_Invalid) return false; + convert_to_typed(c, &x, y.type); + if (is_type_untyped(x.type)) { + gbString xts = type_to_string(x.type); + error(x.expr, "Expected a typed integer for '%.*s', got %s", LIT(builtin_name), xts); + gb_string_free(xts); + return false; + } + if (!is_type_integer(x.type)) { + gbString xts = type_to_string(x.type); + error(x.expr, "Expected an integer for '%.*s', got %s", LIT(builtin_name), xts); + gb_string_free(xts); + return false; + } + Type *ct = core_type(x.type); + if (is_type_different_to_arch_endianness(ct)) { + GB_ASSERT(ct->kind == Type_Basic); + if (ct->Basic.flags & (BasicFlag_EndianLittle|BasicFlag_EndianBig)) { + gbString xts = type_to_string(x.type); + error(x.expr, "Expected an integer which does not specify the explicit endianness for '%.*s', got %s", LIT(builtin_name), xts); + gb_string_free(xts); + return false; + } + } + + operand->mode = Addressing_Value; + operand->type = make_optional_ok_type(default_type(x.type)); + } + break; + case BuiltinProc_add_sat: case BuiltinProc_sub_sat: { @@ -4300,7 +4341,7 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As } operand->mode = Addressing_Value; - operand->type = make_optional_ok_type(default_type(x.type)); + operand->type = default_type(x.type); } break; diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index bdc381bc4..e26b2e50f 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2236,8 +2236,6 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu case BuiltinProc_overflow_add: case BuiltinProc_overflow_sub: case BuiltinProc_overflow_mul: - case BuiltinProc_add_sat: - case BuiltinProc_sub_sat: { Type *main_type = tv.type; Type *type = main_type; @@ -2256,16 +2254,12 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu case BuiltinProc_overflow_add: name = "llvm.uadd.with.overflow"; break; case BuiltinProc_overflow_sub: name = "llvm.usub.with.overflow"; break; case BuiltinProc_overflow_mul: name = "llvm.umul.with.overflow"; break; - case BuiltinProc_add_sat: name = "llvm.uadd.sat"; break; - case BuiltinProc_sub_sat: name = "llvm.usub.sat"; break; } } else { switch (id) { case BuiltinProc_overflow_add: name = "llvm.sadd.with.overflow"; break; case BuiltinProc_overflow_sub: name = "llvm.ssub.with.overflow"; break; case BuiltinProc_overflow_mul: name = "llvm.smul.with.overflow"; break; - case BuiltinProc_add_sat: name = "llvm.sadd.sat"; break; - case BuiltinProc_sub_sat: name = "llvm.ssub.sat"; break; } } LLVMTypeRef types[1] = {lb_type(p->module, type)}; @@ -2291,6 +2285,42 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu return res; } + case BuiltinProc_add_sat: + case BuiltinProc_sub_sat: + { + Type *main_type = tv.type; + Type *type = main_type; + if (is_type_tuple(main_type)) { + type = main_type->Tuple.variables[0]->type; + } + + lbValue x = lb_build_expr(p, ce->args[0]); + lbValue y = lb_build_expr(p, ce->args[1]); + x = lb_emit_conv(p, x, type); + y = lb_emit_conv(p, y, type); + + char const *name = nullptr; + if (is_type_unsigned(type)) { + switch (id) { + case BuiltinProc_add_sat: name = "llvm.uadd.sat"; break; + case BuiltinProc_sub_sat: name = "llvm.usub.sat"; break; + } + } else { + switch (id) { + case BuiltinProc_add_sat: name = "llvm.sadd.sat"; break; + case BuiltinProc_sub_sat: name = "llvm.ssub.sat"; break; + } + } + LLVMTypeRef types[1] = {lb_type(p->module, type)}; + + LLVMValueRef args[2] = { x.value, y.value }; + + lbValue res = {}; + res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); + res.type = type; + return res; + } + case BuiltinProc_sqrt: { Type *type = tv.type; -- cgit v1.2.3 From 47f14dd9ea2a9ce0df263f8a3fe2e6ccf5b53751 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Tue, 16 Jul 2024 22:11:54 +0200 Subject: type is never a tuple here --- src/llvm_backend_proc.cpp | 3 --- 1 file changed, 3 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index e26b2e50f..2f736ff6c 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2290,9 +2290,6 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu { Type *main_type = tv.type; Type *type = main_type; - if (is_type_tuple(main_type)) { - type = main_type->Tuple.variables[0]->type; - } lbValue x = lb_build_expr(p, ce->args[0]); lbValue y = lb_build_expr(p, ce->args[1]); -- cgit v1.2.3 From 9a01a13914e9b1f577399fed7ed09132306946b1 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 5 Aug 2024 13:13:19 +0100 Subject: Add `simd_reduce_any` and `simd_reduce_all` --- base/intrinsics/intrinsics.odin | 18 +++++++++++------- core/simd/simd.odin | 3 +++ src/check_builtin.cpp | 23 +++++++++++++++++++++++ src/checker_builtin_procs.hpp | 7 +++++++ src/llvm_backend_proc.cpp | 17 +++++++++++++++++ 5 files changed, 61 insertions(+), 7 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index 37a42b904..277eafdf1 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -268,13 +268,17 @@ simd_lanes_ge :: proc(a, b: #simd[N]T) -> #simd[N]Integer --- simd_extract :: proc(a: #simd[N]T, idx: uint) -> T --- simd_replace :: proc(a: #simd[N]T, idx: uint, elem: T) -> #simd[N]T --- -simd_reduce_add_ordered :: proc(a: #simd[N]T) -> T --- -simd_reduce_mul_ordered :: proc(a: #simd[N]T) -> T --- -simd_reduce_min :: proc(a: #simd[N]T) -> T --- -simd_reduce_max :: proc(a: #simd[N]T) -> T --- -simd_reduce_and :: proc(a: #simd[N]T) -> T --- -simd_reduce_or :: proc(a: #simd[N]T) -> T --- -simd_reduce_xor :: proc(a: #simd[N]T) -> T --- +simd_reduce_add_ordered :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)--- +simd_reduce_mul_ordered :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)--- +simd_reduce_min :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)--- +simd_reduce_max :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)--- +simd_reduce_and :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)--- +simd_reduce_or :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)--- +simd_reduce_xor :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)--- + +simd_reduce_any :: proc(a: #simd[N]T) -> T where type_is_boolean(T) --- +simd_reduce_all :: proc(a: #simd[N]T) -> T where type_is_boolean(T) --- + simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T --- simd_select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T --- diff --git a/core/simd/simd.odin b/core/simd/simd.odin index c5a594df6..6166f703b 100644 --- a/core/simd/simd.odin +++ b/core/simd/simd.odin @@ -115,6 +115,9 @@ reduce_and :: intrinsics.simd_reduce_and reduce_or :: intrinsics.simd_reduce_or reduce_xor :: intrinsics.simd_reduce_xor +reduce_any :: intrinsics.simd_reduce_any +reduce_all :: intrinsics.simd_reduce_all + // swizzle :: proc(a: #simd[N]T, indices: ..int) -> #simd[len(indices)]T swizzle :: builtin.swizzle diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index b96337326..73a90ed62 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -775,6 +775,29 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan return true; } + case BuiltinProc_simd_reduce_any: + case BuiltinProc_simd_reduce_all: + { + Operand x = {}; + check_expr(c, &x, ce->args[0]); if (x.mode == Addressing_Invalid) return false; + + if (!is_type_simd_vector(x.type)) { + error(x.expr, "'%.*s' expected a simd vector type", LIT(builtin_name)); + return false; + } + Type *elem = base_array_type(x.type); + if (!is_type_boolean(elem)) { + gbString xs = type_to_string(x.type); + error(x.expr, "'%.*s' expected a #simd type with a boolean element, got '%s'", LIT(builtin_name), xs); + gb_string_free(xs); + return false; + } + + operand->mode = Addressing_Value; + operand->type = t_untyped_bool; + return true; + } + case BuiltinProc_simd_shuffle: { diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 3a2e1ce22..b6eb326d2 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -174,6 +174,9 @@ BuiltinProc__simd_begin, BuiltinProc_simd_reduce_or, BuiltinProc_simd_reduce_xor, + BuiltinProc_simd_reduce_any, + BuiltinProc_simd_reduce_all, + BuiltinProc_simd_shuffle, BuiltinProc_simd_select, @@ -501,6 +504,10 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("simd_reduce_or"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_reduce_xor"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_reduce_any"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_reduce_all"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + + {STR_LIT("simd_shuffle"), 2, true, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_select"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics}, diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 2f736ff6c..ee121d6f2 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1527,6 +1527,23 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn return res; } + case BuiltinProc_simd_reduce_any: + case BuiltinProc_simd_reduce_all: + { + char const *name = nullptr; + switch (builtin_id) { + case BuiltinProc_simd_reduce_any: name = "llvm.vector.reduce.and"; break; + case BuiltinProc_simd_reduce_all: name = "llvm.vector.reduce.or"; break; + } + + LLVMTypeRef types[1] = { lb_type(p->module, arg0.type) }; + LLVMValueRef args[1] = { arg0.value }; + + res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); + return res; + } + + case BuiltinProc_simd_shuffle: { Type *vt = arg0.type; -- cgit v1.2.3 From 90fc52c2ee2ac3e5c01744f57d1e02a30e19b55a Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 5 Aug 2024 13:19:01 +0100 Subject: Rename `add_sat` -> `saturating_add` --- base/intrinsics/intrinsics.odin | 7 +++++-- core/simd/simd.odin | 4 ++-- src/check_builtin.cpp | 12 ++++++------ src/checker_builtin_procs.hpp | 16 ++++++++-------- src/llvm_backend_proc.cpp | 20 ++++++++++---------- 5 files changed, 31 insertions(+), 28 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index 277eafdf1..fadbda3fb 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -42,8 +42,8 @@ overflow_add :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #option overflow_sub :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok --- overflow_mul :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok --- -add_sat :: proc(lhs, rhs: $T) -> T where type_is_integer(T) --- -sub_sat :: proc(lhs, rhs: $T) -> T where type_is_integer(T) --- +saturating_add :: proc(lhs, rhs: $T) -> T where type_is_integer(T) --- +saturating_sub :: proc(lhs, rhs: $T) -> T where type_is_integer(T) --- sqrt :: proc(x: $T) -> T where type_is_float(T) || (type_is_simd_vector(T) && type_is_float(type_elem_type(T))) --- @@ -227,6 +227,9 @@ simd_sub :: proc(a, b: #simd[N]T) -> #simd[N]T --- simd_mul :: proc(a, b: #simd[N]T) -> #simd[N]T --- simd_div :: proc(a, b: #simd[N]T) -> #simd[N]T where type_is_float(T) --- +simd_saturating_add :: proc(a, b: #simd[N]T) -> #simd[N]T where type_is_integer(T) --- +simd_saturating_sub :: proc(a, b: #simd[N]T) -> #simd[N]T where type_is_integer(T) --- + // Keeps Odin's Behaviour // (x << y) if y <= mask else 0 simd_shl :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T --- diff --git a/core/simd/simd.odin b/core/simd/simd.odin index 6166f703b..46f6a0112 100644 --- a/core/simd/simd.odin +++ b/core/simd/simd.odin @@ -74,8 +74,8 @@ shl_masked :: intrinsics.simd_shl_masked shr_masked :: intrinsics.simd_shr_masked // Saturation Arithmetic -add_sat :: intrinsics.simd_add_sat -sub_sat :: intrinsics.simd_sub_sat +saturating_add :: intrinsics.simd_saturating_add +saturating_sub :: intrinsics.simd_saturating_sub bit_and :: intrinsics.simd_bit_and bit_or :: intrinsics.simd_bit_or diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 73a90ed62..b4967a56a 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -470,8 +470,8 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan } // Integer only - case BuiltinProc_simd_add_sat: - case BuiltinProc_simd_sub_sat: + case BuiltinProc_simd_saturating_add: + case BuiltinProc_simd_saturating_sub: case BuiltinProc_simd_bit_and: case BuiltinProc_simd_bit_or: case BuiltinProc_simd_bit_xor: @@ -501,8 +501,8 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan Type *elem = base_array_type(x.type); switch (id) { - case BuiltinProc_simd_add_sat: - case BuiltinProc_simd_sub_sat: + case BuiltinProc_simd_saturating_add: + case BuiltinProc_simd_saturating_sub: if (!is_type_integer(elem)) { gbString xs = type_to_string(x.type); error(x.expr, "'%.*s' expected a #simd type with an integer element, got '%s'", LIT(builtin_name), xs); @@ -4325,8 +4325,8 @@ gb_internal bool check_builtin_procedure(CheckerContext *c, Operand *operand, As } break; - case BuiltinProc_add_sat: - case BuiltinProc_sub_sat: + case BuiltinProc_saturating_add: + case BuiltinProc_saturating_sub: { Operand x = {}; Operand y = {}; diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index b6eb326d2..7fd71c36a 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -70,8 +70,8 @@ enum BuiltinProcId { BuiltinProc_overflow_sub, BuiltinProc_overflow_mul, - BuiltinProc_add_sat, - BuiltinProc_sub_sat, + BuiltinProc_saturating_add, + BuiltinProc_saturating_sub, BuiltinProc_sqrt, BuiltinProc_fused_mul_add, @@ -141,8 +141,8 @@ BuiltinProc__simd_begin, BuiltinProc_simd_shl_masked, // C logic BuiltinProc_simd_shr_masked, // C logic - BuiltinProc_simd_add_sat, // saturation arithmetic - BuiltinProc_simd_sub_sat, // saturation arithmetic + BuiltinProc_simd_saturating_add, // saturation arithmetic + BuiltinProc_simd_saturating_sub, // saturation arithmetic BuiltinProc_simd_bit_and, BuiltinProc_simd_bit_or, @@ -399,8 +399,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("overflow_sub"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("overflow_mul"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, - {STR_LIT("add_sat"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, - {STR_LIT("sub_sat"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("saturating_add"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("saturating_sub"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("sqrt"), 1, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("fused_mul_add"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics}, @@ -470,8 +470,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("simd_shl_masked"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_shr_masked"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, - {STR_LIT("simd_add_sat"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, - {STR_LIT("simd_sub_sat"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_saturating_add"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_saturating_sub"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_bit_and"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_bit_or"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index ee121d6f2..64db2ad36 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1646,13 +1646,13 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn } - case BuiltinProc_simd_add_sat: - case BuiltinProc_simd_sub_sat: + case BuiltinProc_simd_saturating_add: + case BuiltinProc_simd_saturating_sub: { char const *name = nullptr; switch (builtin_id) { - case BuiltinProc_simd_add_sat: name = is_signed ? "llvm.sadd.sat" : "llvm.uadd.sat"; break; - case BuiltinProc_simd_sub_sat: name = is_signed ? "llvm.ssub.sat" : "llvm.usub.sat"; break; + case BuiltinProc_simd_saturating_add: name = is_signed ? "llvm.sadd.sat" : "llvm.uadd.sat"; break; + case BuiltinProc_simd_saturating_sub: name = is_signed ? "llvm.ssub.sat" : "llvm.usub.sat"; break; } LLVMTypeRef types[1] = {lb_type(p->module, arg0.type)}; @@ -2302,8 +2302,8 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu return res; } - case BuiltinProc_add_sat: - case BuiltinProc_sub_sat: + case BuiltinProc_saturating_add: + case BuiltinProc_saturating_sub: { Type *main_type = tv.type; Type *type = main_type; @@ -2316,13 +2316,13 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu char const *name = nullptr; if (is_type_unsigned(type)) { switch (id) { - case BuiltinProc_add_sat: name = "llvm.uadd.sat"; break; - case BuiltinProc_sub_sat: name = "llvm.usub.sat"; break; + case BuiltinProc_saturating_add: name = "llvm.uadd.sat"; break; + case BuiltinProc_saturating_sub: name = "llvm.usub.sat"; break; } } else { switch (id) { - case BuiltinProc_add_sat: name = "llvm.sadd.sat"; break; - case BuiltinProc_sub_sat: name = "llvm.ssub.sat"; break; + case BuiltinProc_saturating_add: name = "llvm.sadd.sat"; break; + case BuiltinProc_saturating_sub: name = "llvm.ssub.sat"; break; } } LLVMTypeRef types[1] = {lb_type(p->module, type)}; -- cgit v1.2.3 From 7e701d1677fbe594e0970824062d8b3564d33d26 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 5 Aug 2024 13:46:24 +0100 Subject: Add `intrinsics.simd_gather` and ``intrinsics.simd_scatter` --- base/intrinsics/intrinsics.odin | 4 +++ core/simd/simd.odin | 5 ++++ src/check_builtin.cpp | 54 +++++++++++++++++++++++++++++++++++++++++ src/checker_builtin_procs.hpp | 6 +++++ src/llvm_backend_proc.cpp | 46 +++++++++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+) (limited to 'src/llvm_backend_proc.cpp') diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index fadbda3fb..5566c8c6c 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -283,6 +283,10 @@ simd_reduce_any :: proc(a: #simd[N]T) -> T where type_is_boolean(T) --- simd_reduce_all :: proc(a: #simd[N]T) -> T where type_is_boolean(T) --- +simd_gather :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) --- +simd_scatter :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) --- + + simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T --- simd_select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T --- diff --git a/core/simd/simd.odin b/core/simd/simd.odin index 46f6a0112..2fc0bc2c0 100644 --- a/core/simd/simd.odin +++ b/core/simd/simd.odin @@ -102,6 +102,11 @@ lanes_le :: intrinsics.simd_lanes_le lanes_gt :: intrinsics.simd_lanes_gt lanes_ge :: intrinsics.simd_lanes_ge + +// Gather and Scatter intrinsics +gather :: intrinsics.simd_gather +scatter :: intrinsics.simd_scatter + // extract :: proc(a: #simd[N]T, idx: uint) -> T extract :: intrinsics.simd_extract // replace :: proc(a: #simd[N]T, idx: uint, elem: T) -> #simd[N]T diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index b4967a56a..99a989b4f 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -663,6 +663,60 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan return true; } + case BuiltinProc_simd_gather: + case BuiltinProc_simd_scatter: + { + // gather (ptr: #simd[N]rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) -> #simd[N]T + // scatter(ptr: #simd[N]rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) + + Operand ptr = {}; + Operand values = {}; + Operand mask = {}; + check_expr(c, &ptr, ce->args[0]); if (ptr.mode == Addressing_Invalid) return false; + check_expr(c, &values, ce->args[1]); if (values.mode == Addressing_Invalid) return false; + check_expr(c, &mask, ce->args[2]); if (mask.mode == Addressing_Invalid) return false; + if (!is_type_simd_vector(ptr.type)) { error(ptr.expr, "'%.*s' expected a simd vector type", LIT(builtin_name)); return false; } + if (!is_type_simd_vector(values.type)) { error(values.expr, "'%.*s' expected a simd vector type", LIT(builtin_name)); return false; } + if (!is_type_simd_vector(mask.type)) { error(mask.expr, "'%.*s' expected a simd vector type", LIT(builtin_name)); return false; } + + Type *ptr_elem = base_array_type(ptr.type); + if (!is_type_rawptr(ptr_elem)) { + gbString s = type_to_string(ptr.type); + error(ptr.expr, "Expected a simd vector of 'rawptr' for the addresses, got %s", s); + gb_string_free(s); + return false; + } + Type *mask_elem = base_array_type(mask.type); + + if (!is_type_integer(mask_elem) && !is_type_boolean(mask_elem)) { + gbString s = type_to_string(mask.type); + error(mask.expr, "Expected a simd vector of integers or booleans for the mask, got %s", s); + gb_string_free(s); + return false; + } + + i64 ptr_count = get_array_type_count(ptr.type); + i64 values_count = get_array_type_count(values.type); + i64 mask_count = get_array_type_count(mask.type); + if (ptr_count != values_count || + values_count != mask_count || + mask_count != ptr_count) { + gbString s = type_to_string(mask.type); + error(mask.expr, "All simd vectors must be of the same length, got %lld vs %lld vs %lld", cast(long long)ptr_count, cast(long long)values_count, cast(long long)mask_count); + gb_string_free(s); + return false; + } + + if (id == BuiltinProc_simd_gather) { + operand->mode = Addressing_Value; + operand->type = values.type; + } else { + operand->mode = Addressing_NoValue; + operand->type = nullptr; + } + return true; + } + case BuiltinProc_simd_extract: { Operand x = {}; diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 7fd71c36a..826c10e10 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -191,6 +191,9 @@ BuiltinProc__simd_begin, BuiltinProc_simd_lanes_rotate_left, BuiltinProc_simd_lanes_rotate_right, + BuiltinProc_simd_gather, + BuiltinProc_simd_scatter, + // Platform specific SIMD intrinsics BuiltinProc_simd_x86__MM_SHUFFLE, @@ -522,6 +525,9 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("simd_lanes_rotate_left"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_lanes_rotate_right"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_gather"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_scatter"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_x86__MM_SHUFFLE"), 4, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT(""), 0, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 64db2ad36..5ccbd3399 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1688,6 +1688,52 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn return res; } + + case BuiltinProc_simd_gather: + case BuiltinProc_simd_scatter: + { + LLVMValueRef ptr = arg0.value; + LLVMValueRef val = arg1.value; + LLVMValueRef mask = arg2.value; + + unsigned count = cast(unsigned)get_array_type_count(arg0.type); + + LLVMTypeRef mask_type = LLVMVectorType(LLVMInt1TypeInContext(p->module->ctx), count); + mask = LLVMBuildTrunc(p->builder, mask, mask_type, ""); + + char const *name = nullptr; + switch (builtin_id) { + case BuiltinProc_simd_gather: name = "llvm.masked.gather"; break; + case BuiltinProc_simd_scatter: name = "llvm.masked.scatter"; break; + } + LLVMTypeRef types[2] = { + lb_type(p->module, arg1.type), + lb_type(p->module, arg0.type) + }; + + auto alignment = cast(unsigned long long)type_align_of(base_array_type(arg1.type)); + LLVMValueRef align = LLVMConstInt(LLVMInt32TypeInContext(p->module->ctx), alignment, false); + + LLVMValueRef args[4] = {}; + switch (builtin_id) { + case BuiltinProc_simd_gather: + args[0] = ptr; + args[1] = align; + args[2] = mask; + args[3] = val; + break; + case BuiltinProc_simd_scatter: + args[0] = val; + args[1] = ptr; + args[2] = align; + args[3] = mask; + break; + } + + res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); + return res; + + } } GB_PANIC("Unhandled simd intrinsic: '%.*s'", LIT(builtin_procs[builtin_id].name)); -- cgit v1.2.3 From 84ac56f77881c38762ad1a3cf66d4340c8d847d8 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 5 Aug 2024 14:08:41 +0100 Subject: Add `intrinsics.simd_masked_load` and `intrinsics.simd_masked_store` --- base/intrinsics/intrinsics.odin | 7 +++-- core/simd/simd.odin | 3 ++ src/check_builtin.cpp | 62 +++++++++++++++++++++++++++++------------ src/checker.cpp | 4 +-- src/checker_builtin_procs.hpp | 9 ++++-- src/llvm_backend_proc.cpp | 18 ++++++++++-- 6 files changed, 75 insertions(+), 28 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index 5566c8c6c..7aa56a9e9 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -283,8 +283,11 @@ simd_reduce_any :: proc(a: #simd[N]T) -> T where type_is_boolean(T) --- simd_reduce_all :: proc(a: #simd[N]T) -> T where type_is_boolean(T) --- -simd_gather :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) --- -simd_scatter :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) --- +simd_gather :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) --- +simd_scatter :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) --- + +simd_masked_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) --- +simd_masked_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) --- simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T --- diff --git a/core/simd/simd.odin b/core/simd/simd.odin index 2fc0bc2c0..f8924e5de 100644 --- a/core/simd/simd.odin +++ b/core/simd/simd.odin @@ -106,6 +106,9 @@ lanes_ge :: intrinsics.simd_lanes_ge // Gather and Scatter intrinsics gather :: intrinsics.simd_gather scatter :: intrinsics.simd_scatter +masked_load :: intrinsics.simd_gather +masked_store :: intrinsics.simd_scatter + // extract :: proc(a: #simd[N]T, idx: uint) -> T extract :: intrinsics.simd_extract diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 99a989b4f..b5851bc01 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -665,26 +665,40 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan case BuiltinProc_simd_gather: case BuiltinProc_simd_scatter: + case BuiltinProc_simd_masked_load: + case BuiltinProc_simd_masked_store: { // gather (ptr: #simd[N]rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) -> #simd[N]T // scatter(ptr: #simd[N]rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) + // masked_load (ptr: rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) -> #simd[N]T + // masked_store(ptr: rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) + Operand ptr = {}; Operand values = {}; Operand mask = {}; check_expr(c, &ptr, ce->args[0]); if (ptr.mode == Addressing_Invalid) return false; check_expr(c, &values, ce->args[1]); if (values.mode == Addressing_Invalid) return false; check_expr(c, &mask, ce->args[2]); if (mask.mode == Addressing_Invalid) return false; - if (!is_type_simd_vector(ptr.type)) { error(ptr.expr, "'%.*s' expected a simd vector type", LIT(builtin_name)); return false; } if (!is_type_simd_vector(values.type)) { error(values.expr, "'%.*s' expected a simd vector type", LIT(builtin_name)); return false; } if (!is_type_simd_vector(mask.type)) { error(mask.expr, "'%.*s' expected a simd vector type", LIT(builtin_name)); return false; } - Type *ptr_elem = base_array_type(ptr.type); - if (!is_type_rawptr(ptr_elem)) { - gbString s = type_to_string(ptr.type); - error(ptr.expr, "Expected a simd vector of 'rawptr' for the addresses, got %s", s); - gb_string_free(s); - return false; + if (id == BuiltinProc_simd_gather || id == BuiltinProc_simd_scatter) { + if (!is_type_simd_vector(ptr.type)) { error(ptr.expr, "'%.*s' expected a simd vector type", LIT(builtin_name)); return false; } + Type *ptr_elem = base_array_type(ptr.type); + if (!is_type_rawptr(ptr_elem)) { + gbString s = type_to_string(ptr.type); + error(ptr.expr, "Expected a simd vector of 'rawptr' for the addresses, got %s", s); + gb_string_free(s); + return false; + } + } else { + if (!is_type_pointer(ptr.type)) { + gbString s = type_to_string(ptr.type); + error(ptr.expr, "Expected a pointer type for the address, got %s", s); + gb_string_free(s); + return false; + } } Type *mask_elem = base_array_type(mask.type); @@ -695,19 +709,31 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan return false; } - i64 ptr_count = get_array_type_count(ptr.type); - i64 values_count = get_array_type_count(values.type); - i64 mask_count = get_array_type_count(mask.type); - if (ptr_count != values_count || - values_count != mask_count || - mask_count != ptr_count) { - gbString s = type_to_string(mask.type); - error(mask.expr, "All simd vectors must be of the same length, got %lld vs %lld vs %lld", cast(long long)ptr_count, cast(long long)values_count, cast(long long)mask_count); - gb_string_free(s); - return false; + if (id == BuiltinProc_simd_gather || id == BuiltinProc_simd_scatter) { + i64 ptr_count = get_array_type_count(ptr.type); + i64 values_count = get_array_type_count(values.type); + i64 mask_count = get_array_type_count(mask.type); + if (ptr_count != values_count || + values_count != mask_count || + mask_count != ptr_count) { + gbString s = type_to_string(mask.type); + error(mask.expr, "All simd vectors must be of the same length, got %lld vs %lld vs %lld", cast(long long)ptr_count, cast(long long)values_count, cast(long long)mask_count); + gb_string_free(s); + return false; + } + } else { + i64 values_count = get_array_type_count(values.type); + i64 mask_count = get_array_type_count(mask.type); + if (values_count != mask_count) { + gbString s = type_to_string(mask.type); + error(mask.expr, "All simd vectors must be of the same length, got %lld vs %lld", cast(long long)values_count, cast(long long)mask_count); + gb_string_free(s); + return false; + } } - if (id == BuiltinProc_simd_gather) { + if (id == BuiltinProc_simd_gather || + id == BuiltinProc_simd_masked_load) { operand->mode = Addressing_Value; operand->type = values.type; } else { diff --git a/src/checker.cpp b/src/checker.cpp index 3eae271a0..60000ec29 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1651,9 +1651,9 @@ gb_internal void add_type_and_value(CheckerContext *ctx, Ast *expr, AddressingMo if (mode == Addressing_Constant || mode == Addressing_Invalid) { expr->tav.value = value; - } else if (mode == Addressing_Value && is_type_typeid(type)) { + } else if (mode == Addressing_Value && type != nullptr && is_type_typeid(type)) { expr->tav.value = value; - } else if (mode == Addressing_Value && is_type_proc(type)) { + } else if (mode == Addressing_Value && type != nullptr && is_type_proc(type)) { expr->tav.value = value; } diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index 826c10e10..a5f688cd8 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -193,7 +193,8 @@ BuiltinProc__simd_begin, BuiltinProc_simd_gather, BuiltinProc_simd_scatter, - + BuiltinProc_simd_masked_load, + BuiltinProc_simd_masked_store, // Platform specific SIMD intrinsics BuiltinProc_simd_x86__MM_SHUFFLE, @@ -525,8 +526,10 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("simd_lanes_rotate_left"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_lanes_rotate_right"), 2, false, Expr_Expr, BuiltinProcPkg_intrinsics}, - {STR_LIT("simd_gather"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics}, - {STR_LIT("simd_scatter"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_gather"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_scatter"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_masked_load"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_masked_store"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_x86__MM_SHUFFLE"), 4, false, Expr_Expr, BuiltinProcPkg_intrinsics}, diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 5ccbd3399..bfdac7c96 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1691,20 +1691,24 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn case BuiltinProc_simd_gather: case BuiltinProc_simd_scatter: + case BuiltinProc_simd_masked_load: + case BuiltinProc_simd_masked_store: { LLVMValueRef ptr = arg0.value; LLVMValueRef val = arg1.value; LLVMValueRef mask = arg2.value; - unsigned count = cast(unsigned)get_array_type_count(arg0.type); + unsigned count = cast(unsigned)get_array_type_count(arg1.type); LLVMTypeRef mask_type = LLVMVectorType(LLVMInt1TypeInContext(p->module->ctx), count); mask = LLVMBuildTrunc(p->builder, mask, mask_type, ""); char const *name = nullptr; switch (builtin_id) { - case BuiltinProc_simd_gather: name = "llvm.masked.gather"; break; - case BuiltinProc_simd_scatter: name = "llvm.masked.scatter"; break; + case BuiltinProc_simd_gather: name = "llvm.masked.gather"; break; + case BuiltinProc_simd_scatter: name = "llvm.masked.scatter"; break; + case BuiltinProc_simd_masked_load: name = "llvm.masked.load"; break; + case BuiltinProc_simd_masked_store: name = "llvm.masked.store"; break; } LLVMTypeRef types[2] = { lb_type(p->module, arg1.type), @@ -1716,12 +1720,20 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn LLVMValueRef args[4] = {}; switch (builtin_id) { + case BuiltinProc_simd_masked_load: + types[1] = lb_type(p->module, t_rawptr); + /*fallthrough*/ case BuiltinProc_simd_gather: args[0] = ptr; args[1] = align; args[2] = mask; args[3] = val; + // res.type = arg1.type; break; + + case BuiltinProc_simd_masked_store: + types[1] = lb_type(p->module, t_rawptr); + /*fallthrough*/ case BuiltinProc_simd_scatter: args[0] = val; args[1] = ptr; -- cgit v1.2.3 From 80ea4e0aeb0de21fe0855c468669c16ec48b8c40 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 5 Aug 2024 14:25:33 +0100 Subject: Remove dead code --- base/intrinsics/intrinsics.odin | 3 --- src/llvm_backend_proc.cpp | 1 - 2 files changed, 4 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index 7aa56a9e9..e78a41719 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -240,9 +240,6 @@ simd_shr :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T --- simd_shl_masked :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T --- simd_shr_masked :: proc(a: #simd[N]T, b: #simd[N]Unsigned_Integer) -> #simd[N]T --- -simd_add_sat :: proc(a, b: #simd[N]T) -> #simd[N]T --- -simd_sub_sat :: proc(a, b: #simd[N]T) -> #simd[N]T --- - simd_bit_and :: proc(a, b: #simd[N]T) -> #simd[N]T --- simd_bit_or :: proc(a, b: #simd[N]T) -> #simd[N]T --- simd_bit_xor :: proc(a, b: #simd[N]T) -> #simd[N]T --- diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index bfdac7c96..ce1cc8586 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1728,7 +1728,6 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn args[1] = align; args[2] = mask; args[3] = val; - // res.type = arg1.type; break; case BuiltinProc_simd_masked_store: -- cgit v1.2.3 From f56abf37804240c508f0ca7f249176208d333c72 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 5 Aug 2024 14:54:09 +0100 Subject: Add `intrinsics.masked_expand_load` and `intrinsics.masked_compress_store` --- base/intrinsics/intrinsics.odin | 4 ++++ core/simd/simd.odin | 3 ++- src/check_builtin.cpp | 7 ++++++- src/checker_builtin_procs.hpp | 4 ++++ src/llvm_backend_proc.cpp | 34 ++++++++++++++++++++++++++++------ 5 files changed, 44 insertions(+), 8 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index e78a41719..c78559f3f 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -286,6 +286,10 @@ simd_scatter :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) simd_masked_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) --- simd_masked_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) --- +simd_masked_expand_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) --- +simd_masked_compress_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) --- + + simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T --- simd_select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T --- diff --git a/core/simd/simd.odin b/core/simd/simd.odin index e93c94687..1f3c67b72 100644 --- a/core/simd/simd.odin +++ b/core/simd/simd.odin @@ -108,7 +108,8 @@ gather :: intrinsics.simd_gather scatter :: intrinsics.simd_scatter masked_load :: intrinsics.simd_masked_load masked_store :: intrinsics.simd_masked_store - +masked_expand_load :: intrinsics.simd_masked_expand_load +masked_compress_store :: intrinsics.simd_masked_compress_store // extract :: proc(a: #simd[N]T, idx: uint) -> T extract :: intrinsics.simd_extract diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index b5851bc01..bde102a8d 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -667,12 +667,16 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan case BuiltinProc_simd_scatter: case BuiltinProc_simd_masked_load: case BuiltinProc_simd_masked_store: + case BuiltinProc_simd_masked_expand_load: + case BuiltinProc_simd_masked_compress_store: { // gather (ptr: #simd[N]rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) -> #simd[N]T // scatter(ptr: #simd[N]rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) // masked_load (ptr: rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) -> #simd[N]T // masked_store(ptr: rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) + // masked_expand_load (ptr: rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) -> #simd[N]T + // masked_compress_store(ptr: rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) Operand ptr = {}; Operand values = {}; @@ -733,7 +737,8 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan } if (id == BuiltinProc_simd_gather || - id == BuiltinProc_simd_masked_load) { + id == BuiltinProc_simd_masked_load || + id == BuiltinProc_simd_masked_expand_load) { operand->mode = Addressing_Value; operand->type = values.type; } else { diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index a5f688cd8..6245dadaf 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -195,6 +195,8 @@ BuiltinProc__simd_begin, BuiltinProc_simd_scatter, BuiltinProc_simd_masked_load, BuiltinProc_simd_masked_store, + BuiltinProc_simd_masked_expand_load, + BuiltinProc_simd_masked_compress_store, // Platform specific SIMD intrinsics BuiltinProc_simd_x86__MM_SHUFFLE, @@ -530,6 +532,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("simd_scatter"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_masked_load"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_masked_store"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_masked_expand_load"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_masked_compress_store"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_x86__MM_SHUFFLE"), 4, false, Expr_Expr, BuiltinProcPkg_intrinsics}, diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index ce1cc8586..ceaeb1aca 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1693,6 +1693,8 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn case BuiltinProc_simd_scatter: case BuiltinProc_simd_masked_load: case BuiltinProc_simd_masked_store: + case BuiltinProc_simd_masked_expand_load: + case BuiltinProc_simd_masked_compress_store: { LLVMValueRef ptr = arg0.value; LLVMValueRef val = arg1.value; @@ -1705,11 +1707,14 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn char const *name = nullptr; switch (builtin_id) { - case BuiltinProc_simd_gather: name = "llvm.masked.gather"; break; - case BuiltinProc_simd_scatter: name = "llvm.masked.scatter"; break; - case BuiltinProc_simd_masked_load: name = "llvm.masked.load"; break; - case BuiltinProc_simd_masked_store: name = "llvm.masked.store"; break; - } + case BuiltinProc_simd_gather: name = "llvm.masked.gather"; break; + case BuiltinProc_simd_scatter: name = "llvm.masked.scatter"; break; + case BuiltinProc_simd_masked_load: name = "llvm.masked.load"; break; + case BuiltinProc_simd_masked_store: name = "llvm.masked.store"; break; + case BuiltinProc_simd_masked_expand_load: name = "llvm.masked.expandload"; break; + case BuiltinProc_simd_masked_compress_store: name = "llvm.masked.compressstore"; break; + } + unsigned type_count = 2; LLVMTypeRef types[2] = { lb_type(p->module, arg1.type), lb_type(p->module, arg0.type) @@ -1718,6 +1723,7 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn auto alignment = cast(unsigned long long)type_align_of(base_array_type(arg1.type)); LLVMValueRef align = LLVMConstInt(LLVMInt32TypeInContext(p->module->ctx), alignment, false); + unsigned arg_count = 4; LLVMValueRef args[4] = {}; switch (builtin_id) { case BuiltinProc_simd_masked_load: @@ -1739,9 +1745,25 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn args[2] = align; args[3] = mask; break; + + case BuiltinProc_simd_masked_expand_load: + arg_count = 3; + type_count = 1; + args[0] = ptr; + args[1] = mask; + args[2] = val; + break; + + case BuiltinProc_simd_masked_compress_store: + arg_count = 3; + type_count = 1; + args[0] = val; + args[1] = ptr; + args[2] = mask; + break; } - res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); + res.value = lb_call_intrinsic(p, name, args, arg_count, types, type_count); return res; } -- cgit v1.2.3 From e27a424f4d88a0409e6492ab167dbe5a82b9e3ac Mon Sep 17 00:00:00 2001 From: Feoramund <161657516+Feoramund@users.noreply.github.com> Date: Tue, 6 Aug 2024 14:50:34 -0400 Subject: Swap `reduce_any` and `reduce_all` `llvm.vector.reduce.or` will return true if any lane is true. `llvm.vector.reduce.and` will return true if all lanes are true. --- src/llvm_backend_proc.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index ceaeb1aca..ee3ed1995 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1532,8 +1532,8 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn { char const *name = nullptr; switch (builtin_id) { - case BuiltinProc_simd_reduce_any: name = "llvm.vector.reduce.and"; break; - case BuiltinProc_simd_reduce_all: name = "llvm.vector.reduce.or"; break; + case BuiltinProc_simd_reduce_any: name = "llvm.vector.reduce.or"; break; + case BuiltinProc_simd_reduce_all: name = "llvm.vector.reduce.and"; break; } LLVMTypeRef types[1] = { lb_type(p->module, arg0.type) }; -- cgit v1.2.3 From e3f375afd8e1c08c82818d7132296c277d871436 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sun, 11 Aug 2024 17:00:49 +0200 Subject: fix c_vararg bit_set Fixes #4051 --- src/llvm_backend_expr.cpp | 11 +++++++++++ src/llvm_backend_proc.cpp | 10 +++++----- src/types.cpp | 5 +---- 3 files changed, 17 insertions(+), 9 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index f843dfa92..962082cae 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -2469,6 +2469,17 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { return {}; } +gb_internal lbValue lb_emit_c_vararg(lbProcedure *p, lbValue arg, Type *type) { + Type *core = core_type(type); + if (core->kind == Type_BitSet) { + core = core_type(bit_set_to_int(core)); + arg = lb_emit_transmute(p, arg, core); + } + + Type *promoted = c_vararg_promote_type(core); + return lb_emit_conv(p, arg, promoted); +} + gb_internal lbValue lb_compare_records(lbProcedure *p, TokenKind op_kind, lbValue left, lbValue right, Type *type) { GB_ASSERT((is_type_struct(type) || is_type_union(type)) && is_type_comparable(type)); lbValue left_ptr = lb_address_from_load_or_generate_local(p, left); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index ee3ed1995..3326b4041 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -3569,9 +3569,9 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { if (is_type_untyped_nil(arg.type)) { arg = lb_const_nil(p->module, t_rawptr); } - array_add(&args, lb_emit_conv(p, arg, c_vararg_promote_type(default_type(arg.type)))); + array_add(&args, lb_emit_c_vararg(p, arg, arg.type)); } else { - array_add(&args, lb_emit_conv(p, arg, c_vararg_promote_type(elem_type))); + array_add(&args, lb_emit_c_vararg(p, arg, elem_type)); } } break; @@ -3697,15 +3697,15 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { if (is_type_untyped_nil(arg.type)) { arg = lb_const_nil(p->module, t_rawptr); } - array_add(&args, lb_emit_conv(p, arg, c_vararg_promote_type(default_type(arg.type)))); + array_add(&args, lb_emit_c_vararg(p, arg, arg.type)); } else { - array_add(&args, lb_emit_conv(p, arg, c_vararg_promote_type(elem_type))); + array_add(&args, lb_emit_c_vararg(p, arg, elem_type)); } } } else { lbValue value = lb_build_expr(p, fv->value); GB_ASSERT(!is_type_tuple(value.type)); - array_add(&args, lb_emit_conv(p, value, c_vararg_promote_type(value.type))); + array_add(&args, lb_emit_c_vararg(p, value, value.type)); } } else { lbValue value = lb_build_expr(p, fv->value); diff --git a/src/types.cpp b/src/types.cpp index da5099d22..21dd6ad39 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -2960,10 +2960,7 @@ gb_internal Type *c_vararg_promote_type(Type *type) { GB_ASSERT(type != nullptr); Type *core = core_type(type); - - if (core->kind == Type_BitSet) { - core = core_type(bit_set_to_int(core)); - } + GB_ASSERT(core->kind != Type_BitSet); if (core->kind == Type_Basic) { switch (core->Basic.kind) { -- cgit v1.2.3 From ca6ef95b038f3eb443971240de73924a721485cc Mon Sep 17 00:00:00 2001 From: Laytan Date: Thu, 15 Aug 2024 20:39:35 +0200 Subject: add support for linux_riscv64 and freestanding_riscv64 --- .github/workflows/ci.yml | 50 +++ base/runtime/entry_unix.odin | 3 + base/runtime/entry_unix_no_crt_riscv64.asm | 10 + base/runtime/os_specific_linux.odin | 2 + .../crypto/_chacha20/simd128/chacha20_simd128.odin | 2 +- core/net/socket_linux.odin | 4 +- core/os/os_linux.odin | 19 ++ core/sys/info/cpu_linux_riscv64.odin | 46 +++ core/sys/info/cpu_riscv64.odin | 16 + core/sys/info/sysinfo.odin | 2 +- core/sys/linux/bits.odin | 14 +- core/sys/linux/sys.odin | 54 +-- core/sys/linux/syscall_riscv64.odin | 334 ++++++++++++++++++ core/sys/linux/types.odin | 48 ++- core/sys/unix/syscalls_linux.odin | 380 +++++++++++++++++++-- misc/featuregen/README.md | 4 +- misc/featuregen/build_featuregen.sh | 5 + misc/featuregen/featuregen.py | 19 +- src/build_settings.cpp | 20 +- src/build_settings_microarch.cpp | 102 ++++-- src/check_builtin.cpp | 5 + src/checker.cpp | 1 + src/linker.cpp | 29 +- src/llvm_abi.cpp | 269 ++++++++++++++- src/llvm_backend.cpp | 31 +- src/llvm_backend_general.cpp | 8 +- src/llvm_backend_proc.cpp | 25 ++ src/main.cpp | 9 + 28 files changed, 1395 insertions(+), 116 deletions(-) create mode 100644 base/runtime/entry_unix_no_crt_riscv64.asm create mode 100644 core/sys/info/cpu_linux_riscv64.odin create mode 100644 core/sys/info/cpu_riscv64.odin create mode 100644 core/sys/linux/syscall_riscv64.odin create mode 100755 misc/featuregen/build_featuregen.sh (limited to 'src/llvm_backend_proc.cpp') diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bb5ad0d27..455a451e7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -220,3 +220,53 @@ jobs: run: | call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat odin check examples/all -strict-style -target:windows_i386 + + build_linux_riscv64: + runs-on: ubuntu-latest + name: Linux riscv64 (emulated) Build, Check and Test + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + + - name: Download LLVM (Linux) + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 18 + echo "/usr/lib/llvm-18/bin" >> $GITHUB_PATH + + - name: Build Odin + run: ./build_odin.sh release + + - name: Odin version + run: ./odin version + + - name: Odin report + run: ./odin report + + - name: Compile needed Vendor + run: | + make -C vendor/stb/src + make -C vendor/cgltf/src + make -C vendor/miniaudio/src + + - name: Odin check + run: ./odin check examples/all -target:linux_riscv64 -vet -strict-style -disallow-do + + - name: Install riscv64 toolchain and qemu + run: sudo apt-get install -y qemu-user qemu-user-static gcc-12-riscv64-linux-gnu libc6-riscv64-cross + + - name: Odin run + run: ./odin run examples/demo -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" + + - name: Odin run -debug + run: ./odin run examples/demo -debug -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" + + - name: Normal Core library tests + run: ./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" + + - name: Optimized Core library tests + run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" + + - name: Internals tests + run: ./odin test tests/internal -all-packages -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" diff --git a/base/runtime/entry_unix.odin b/base/runtime/entry_unix.odin index 7d7252625..5dfd37f99 100644 --- a/base/runtime/entry_unix.odin +++ b/base/runtime/entry_unix.odin @@ -34,6 +34,9 @@ when ODIN_BUILD_MODE == .Dynamic { } else when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 { @require foreign import entry "entry_unix_no_crt_darwin_arm64.asm" SYS_exit :: 1 + } else when ODIN_ARCH == .riscv64 { + @require foreign import entry "entry_unix_no_crt_riscv64.asm" + SYS_exit :: 93 } @(link_name="_start_odin", linkage="strong", require) _start_odin :: proc "c" (argc: i32, argv: [^]cstring) -> ! { diff --git a/base/runtime/entry_unix_no_crt_riscv64.asm b/base/runtime/entry_unix_no_crt_riscv64.asm new file mode 100644 index 000000000..756515b72 --- /dev/null +++ b/base/runtime/entry_unix_no_crt_riscv64.asm @@ -0,0 +1,10 @@ +.text + +.globl _start + +_start: + ld a0, 0(sp) + addi a1, sp, 8 + addi sp, sp, ~15 + call _start_odin + ebreak diff --git a/base/runtime/os_specific_linux.odin b/base/runtime/os_specific_linux.odin index a944ba309..146e647fb 100644 --- a/base/runtime/os_specific_linux.odin +++ b/base/runtime/os_specific_linux.odin @@ -12,6 +12,8 @@ _stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) { SYS_write :: uintptr(4) } else when ODIN_ARCH == .arm32 { SYS_write :: uintptr(4) + } else when ODIN_ARCH == .riscv64 { + SYS_write :: uintptr(64) } stderr :: 2 diff --git a/core/crypto/_chacha20/simd128/chacha20_simd128.odin b/core/crypto/_chacha20/simd128/chacha20_simd128.odin index 4cab3c5e8..2f91ac52a 100644 --- a/core/crypto/_chacha20/simd128/chacha20_simd128.odin +++ b/core/crypto/_chacha20/simd128/chacha20_simd128.odin @@ -3,7 +3,7 @@ package chacha20_simd128 import "base:intrinsics" import "core:crypto/_chacha20" import "core:simd" -import "core:sys/info" +@(require) import "core:sys/info" // Portable 128-bit `core:simd` implementation. // diff --git a/core/net/socket_linux.odin b/core/net/socket_linux.odin index dce428685..52f328814 100644 --- a/core/net/socket_linux.odin +++ b/core/net/socket_linux.odin @@ -33,8 +33,8 @@ Socket_Option :: enum c.int { Linger = c.int(linux.Socket_Option.LINGER), Receive_Buffer_Size = c.int(linux.Socket_Option.RCVBUF), Send_Buffer_Size = c.int(linux.Socket_Option.SNDBUF), - Receive_Timeout = c.int(linux.Socket_Option.RCVTIMEO_NEW), - Send_Timeout = c.int(linux.Socket_Option.SNDTIMEO_NEW), + Receive_Timeout = c.int(linux.Socket_Option.RCVTIMEO), + Send_Timeout = c.int(linux.Socket_Option.SNDTIMEO), } // Wrappers and unwrappers for system-native types diff --git a/core/os/os_linux.odin b/core/os/os_linux.odin index 0fcd1a21a..f1b3720c6 100644 --- a/core/os/os_linux.odin +++ b/core/os/os_linux.odin @@ -284,6 +284,25 @@ when ODIN_ARCH == .arm64 { _reserved: [2]i32, } #assert(size_of(OS_Stat) == 128) +} else when ODIN_ARCH == .riscv64 { + OS_Stat :: struct { + device_id: u64, + serial: u64, + mode: u32, + nlink: u32, + uid: u32, + gid: u32, + rdev: u64, + _: u64, + size: i64, + block_size: i32, + _: i32, + blocks: i64, + last_access: Unix_File_Time, + modified: Unix_File_Time, + status_change: Unix_File_Time, + _: [3]uint, + } } else { OS_Stat :: struct { device_id: u64, // ID of device containing file diff --git a/core/sys/info/cpu_linux_riscv64.odin b/core/sys/info/cpu_linux_riscv64.odin new file mode 100644 index 000000000..0f109e7ba --- /dev/null +++ b/core/sys/info/cpu_linux_riscv64.odin @@ -0,0 +1,46 @@ +//+build riscv64 +//+build linux +package sysinfo + +import "base:intrinsics" + +import "core:sys/linux" + +@(init, private) +init_cpu_features :: proc() { + fd, err := linux.open("/proc/self/auxv", {}) + if err != .NONE { return } + defer linux.close(fd) + + // This is probably enough right? + buf: [4096]byte + n, rerr := linux.read(fd, buf[:]) + if rerr != .NONE || n == 0 { return } + + ulong :: u64 + AT_HWCAP :: 16 + + // TODO: using these we could get more information than just the basics. + // AT_HWCAP2 :: 26 + // AT_HWCAP3 :: 29 + // AT_HWCAP4 :: 30 + + auxv := buf[:n] + for len(auxv) >= size_of(ulong)*2 { + key := intrinsics.unaligned_load((^ulong)(&auxv[0])) + val := intrinsics.unaligned_load((^ulong)(&auxv[size_of(ulong)])) + auxv = auxv[2*size_of(ulong):] + + if key != AT_HWCAP { + continue + } + + cpu_features = transmute(CPU_Features)(val) + break + } +} + +@(init, private) +init_cpu_name :: proc() { + cpu_name = "RISCV64" +} diff --git a/core/sys/info/cpu_riscv64.odin b/core/sys/info/cpu_riscv64.odin new file mode 100644 index 000000000..754110911 --- /dev/null +++ b/core/sys/info/cpu_riscv64.odin @@ -0,0 +1,16 @@ +package sysinfo + +CPU_Feature :: enum u64 { + I = 'I' - 'A', // Base features, don't think this is ever not here. + M = 'M' - 'A', // Integer multiplication and division, currently required by Odin. + A = 'A' - 'A', // Atomics. + F = 'F' - 'A', // Single precision floating point, currently required by Odin. + D = 'D' - 'A', // Double precision floating point, currently required by Odin. + C = 'C' - 'A', // Compressed instructions. + V = 'V' - 'A', // Vector operations. +} + +CPU_Features :: distinct bit_set[CPU_Feature; u64] + +cpu_features: Maybe(CPU_Features) +cpu_name: Maybe(string) diff --git a/core/sys/info/sysinfo.odin b/core/sys/info/sysinfo.odin index f0262f317..f624a1718 100644 --- a/core/sys/info/sysinfo.odin +++ b/core/sys/info/sysinfo.odin @@ -1,6 +1,6 @@ package sysinfo -when !(ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 || ODIN_ARCH == .arm32 || ODIN_ARCH == .arm64) { +when !(ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 || ODIN_ARCH == .arm32 || ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64) { #assert(false, "This package is unsupported on this architecture.") } diff --git a/core/sys/linux/bits.odin b/core/sys/linux/bits.odin index b8ec3c133..f78891bc8 100644 --- a/core/sys/linux/bits.odin +++ b/core/sys/linux/bits.odin @@ -1343,14 +1343,16 @@ Socket_Option :: enum { RESERVE_MEM = 73, TXREHASH = 74, RCVMARK = 75, - // Hardcoded 64-bit Time. It's time to move on. - TIMESTAMP = TIMESTAMP_NEW, - TIMESTAMPNS = TIMESTAMPNS_NEW, - TIMESTAMPING = TIMESTAMPING_NEW, - RCVTIMEO = RCVTIMEO_NEW, - SNDTIMEO = SNDTIMEO_NEW, + TIMESTAMP = TIMESTAMP_OLD when _SOCKET_OPTION_OLD else TIMESTAMP_NEW, + TIMESTAMPNS = TIMESTAMPNS_OLD when _SOCKET_OPTION_OLD else TIMESTAMPNS_NEW, + TIMESTAMPING = TIMESTAMPING_OLD when _SOCKET_OPTION_OLD else TIMESTAMPING_NEW, + RCVTIMEO = RCVTIMEO_OLD when _SOCKET_OPTION_OLD else RCVTIMEO_NEW, + SNDTIMEO = SNDTIMEO_OLD when _SOCKET_OPTION_OLD else SNDTIMEO_NEW, } +@(private) +_SOCKET_OPTION_OLD :: size_of(rawptr) == 8 /* || size_of(time_t) == size_of(__kernel_long_t) */ + Socket_UDP_Option :: enum { CORK = 1, ENCAP = 100, diff --git a/core/sys/linux/sys.odin b/core/sys/linux/sys.odin index f4f609ab9..a6d4f723d 100644 --- a/core/sys/linux/sys.odin +++ b/core/sys/linux/sys.odin @@ -39,7 +39,7 @@ write :: proc "contextless" (fd: Fd, buf: []u8) -> (int, Errno) { On ARM64 available since Linux 2.6.16. */ open :: proc "contextless" (name: cstring, flags: Open_Flags, mode: Mode = {}) -> (Fd, Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_openat, AT_FDCWD, transmute(uintptr) name, transmute(u32) flags, transmute(u32) mode) return errno_unwrap(ret, Fd) } else { @@ -68,7 +68,7 @@ close :: proc "contextless" (fd: Fd) -> (Errno) { */ stat :: proc "contextless" (filename: cstring, stat: ^Stat) -> (Errno) { when size_of(int) == 8 { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_fstatat, AT_FDCWD, cast(rawptr) filename, stat, 0) return Errno(-ret) } else { @@ -111,7 +111,7 @@ fstat :: proc "contextless" (fd: Fd, stat: ^Stat) -> (Errno) { */ lstat :: proc "contextless" (filename: cstring, stat: ^Stat) -> (Errno) { when size_of(int) == 8 { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { return fstatat(AT_FDCWD, filename, stat, {.SYMLINK_NOFOLLOW}) } else { ret := syscall(SYS_lstat, cast(rawptr) filename, stat) @@ -128,7 +128,7 @@ lstat :: proc "contextless" (filename: cstring, stat: ^Stat) -> (Errno) { Available since Linux 2.2. */ poll :: proc "contextless" (fds: []Poll_Fd, timeout: i32) -> (i32, Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { seconds := cast(uint) timeout / 1000 nanoseconds := cast(uint) (timeout % 1000) * 1_000_000 timeout_spec := Time_Spec{seconds, nanoseconds} @@ -291,7 +291,7 @@ writev :: proc "contextless" (fd: Fd, iov: []IO_Vec) -> (int, Errno) { For ARM64 available since Linux 2.6.16. */ access :: proc "contextless" (name: cstring, mode: Mode = F_OK) -> (Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_faccessat, AT_FDCWD, cast(rawptr) name, transmute(u32) mode) return Errno(-ret) } else { @@ -407,7 +407,7 @@ dup :: proc "contextless" (fd: Fd) -> (Fd, Errno) { On ARM64 available since Linux 2.6.27. */ dup2 :: proc "contextless" (old: Fd, new: Fd) -> (Fd, Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_dup3, old, new, 0) return errno_unwrap(ret, Fd) } else { @@ -422,7 +422,7 @@ dup2 :: proc "contextless" (old: Fd, new: Fd) -> (Fd, Errno) { On ARM64 available since Linux 2.6.16. */ pause :: proc "contextless" () { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { syscall(SYS_ppoll, 0, 0, 0, 0) } else { syscall(SYS_pause) @@ -452,7 +452,7 @@ getitimer :: proc "contextless" (which: ITimer_Which, cur: ^ITimer_Val) -> (Errn Available since Linux 1.0. */ alarm :: proc "contextless" (seconds: u32) -> u32 { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { new := ITimer_Val { value = { seconds = cast(int) seconds } } old := ITimer_Val {} syscall(SYS_setitimer, ITimer_Which.REAL, &new, &old) @@ -765,7 +765,7 @@ getsockopt :: proc { Available since Linux 1.0. */ fork :: proc "contextless" () -> (Pid, Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_clone, u64(Signal.SIGCHLD), cast(rawptr) nil, cast(rawptr) nil, cast(rawptr) nil, u64(0)) return errno_unwrap(ret, Pid) } else { @@ -779,7 +779,7 @@ fork :: proc "contextless" () -> (Pid, Errno) { Available since Linux 2.2. */ vfork :: proc "contextless" () -> Pid { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return Pid(syscall(SYS_vfork)) } else { return Pid(syscall(SYS_clone, Signal.SIGCHLD)) @@ -792,7 +792,7 @@ vfork :: proc "contextless" () -> Pid { On ARM64 available since Linux 3.19. */ execve :: proc "contextless" (name: cstring, argv: [^]cstring, envp: [^]cstring) -> (Errno) { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { ret := syscall(SYS_execve, cast(rawptr) name, cast(rawptr) argv, cast(rawptr) envp) return Errno(-ret) } else { @@ -1193,7 +1193,7 @@ fchdir :: proc "contextless" (fd: Fd) -> (Errno) { On ARM64 available since Linux 2.6.16. */ rename :: proc "contextless" (old: cstring, new: cstring) -> (Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_renameat, AT_FDCWD, cast(rawptr) old, AT_FDCWD, cast(rawptr) new) return Errno(-ret) } else { @@ -1208,7 +1208,7 @@ rename :: proc "contextless" (old: cstring, new: cstring) -> (Errno) { On ARM64 available since Linux 2.6.16. */ mkdir :: proc "contextless" (name: cstring, mode: Mode) -> (Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_mkdirat, AT_FDCWD, cast(rawptr) name, transmute(u32) mode) return Errno(-ret) } else { @@ -1223,7 +1223,7 @@ mkdir :: proc "contextless" (name: cstring, mode: Mode) -> (Errno) { On ARM64 available since Linux 2.6.16. */ rmdir :: proc "contextless" (name: cstring) -> (Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_unlinkat, AT_FDCWD, cast(rawptr) name, transmute(i32) FD_Flags{.REMOVEDIR}) return Errno(-ret) } else { @@ -1238,7 +1238,7 @@ rmdir :: proc "contextless" (name: cstring) -> (Errno) { On ARM64 available since Linux 2.6.16. */ creat :: proc "contextless" (name: cstring, mode: Mode) -> (Fd, Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { return openat(AT_FDCWD, name, {.CREAT, .WRONLY,.TRUNC}, mode) } else { ret := syscall(SYS_creat, cast(rawptr) name, transmute(u32) mode) @@ -1252,7 +1252,7 @@ creat :: proc "contextless" (name: cstring, mode: Mode) -> (Fd, Errno) { On ARM64 available since Linux 2.6.16. */ link :: proc "contextless" (target: cstring, linkpath: cstring) -> (Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_linkat, AT_FDCWD, cast(rawptr) target, AT_FDCWD, cast(rawptr) linkpath, 0) return Errno(-ret) } else { @@ -1267,7 +1267,7 @@ link :: proc "contextless" (target: cstring, linkpath: cstring) -> (Errno) { On ARM64 available since Linux 2.6.16. */ unlink :: proc "contextless" (name: cstring) -> (Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_unlinkat, AT_FDCWD, cast(rawptr) name, 0) return Errno(-ret) } else { @@ -1282,7 +1282,7 @@ unlink :: proc "contextless" (name: cstring) -> (Errno) { On arm64 available since Linux 2.6.16. */ symlink :: proc "contextless" (target: cstring, linkpath: cstring) -> (Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_symlinkat, cast(rawptr) target, AT_FDCWD, cast(rawptr) linkpath) return Errno(-ret) } else { @@ -1297,7 +1297,7 @@ symlink :: proc "contextless" (target: cstring, linkpath: cstring) -> (Errno) { On arm64 available since Linux 2.6.16. */ readlink :: proc "contextless" (name: cstring, buf: []u8) -> (int, Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_readlinkat, AT_FDCWD, cast(rawptr) name, raw_data(buf), len(buf)) return errno_unwrap(ret, int) } else { @@ -1312,7 +1312,7 @@ readlink :: proc "contextless" (name: cstring, buf: []u8) -> (int, Errno) { On ARM64 available since Linux 2.6.16. */ chmod :: proc "contextless" (name: cstring, mode: Mode) -> (Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_fchmodat, AT_FDCWD, cast(rawptr) name, transmute(u32) mode) return Errno(-ret) } else { @@ -1340,7 +1340,7 @@ chown :: proc "contextless" (name: cstring, uid: Uid, gid: Gid) -> (Errno) { when size_of(int) == 4 { ret := syscall(SYS_chown32, cast(rawptr) name, uid, gid) return Errno(-ret) - } else when ODIN_ARCH == .arm64 { + } else when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_fchownat, AT_FDCWD, cast(rawptr) name, uid, gid, 0) return Errno(-ret) } else { @@ -1374,7 +1374,7 @@ lchown :: proc "contextless" (name: cstring, uid: Uid, gid: Gid) -> (Errno) { when size_of(int) == 4 { ret := syscall(SYS_lchown32, cast(rawptr) name, uid, gid) return Errno(-ret) - } else when ODIN_ARCH == .arm64 { + } else when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_fchownat, AT_FDCWD, cast(rawptr) name, uid, gid, transmute(i32) FD_Flags{.SYMLINK_NOFOLLOW}) return Errno(-ret) } else { @@ -1727,7 +1727,7 @@ getppid :: proc "contextless" () -> Pid { Available since Linux 1.0. */ getpgrp :: proc "contextless" () -> (Pid, Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_getpgid, 0) return errno_unwrap(ret, Pid) } else { @@ -1950,7 +1950,7 @@ sigaltstack :: proc "contextless" (stack: ^Sig_Stack, old_stack: ^Sig_Stack) -> On ARM64 available since Linux 2.6.16. */ mknod :: proc "contextless" (name: cstring, mode: Mode, dev: Dev) -> (Errno) { - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { ret := syscall(SYS_mknodat, AT_FDCWD, cast(rawptr) name, transmute(u32) mode, dev) return Errno(-ret) } else { @@ -2207,7 +2207,7 @@ gettid :: proc "contextless" () -> Pid { Available since Linux 1.0. */ time :: proc "contextless" (tloc: ^uint) -> (Errno) { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { ret := syscall(SYS_time, tloc) return Errno(-ret) } else { @@ -2335,7 +2335,7 @@ futex :: proc{ Available since Linux 2.6. */ epoll_create :: proc(size: i32 = 1) -> (Fd, Errno) { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { ret := syscall(SYS_epoll_create, i32(1)) return errno_unwrap(ret, Fd) } else { @@ -2433,7 +2433,7 @@ exit_group :: proc "contextless" (code: i32) -> ! { Available since Linux 2.6. */ epoll_wait :: proc(epfd: Fd, events: [^]EPoll_Event, count: i32, timeout: i32) -> (i32, Errno) { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { ret := syscall(SYS_epoll_wait, epfd, events, count, timeout) return errno_unwrap(ret, i32) } else { diff --git a/core/sys/linux/syscall_riscv64.odin b/core/sys/linux/syscall_riscv64.odin new file mode 100644 index 000000000..ce374312e --- /dev/null +++ b/core/sys/linux/syscall_riscv64.odin @@ -0,0 +1,334 @@ +//+build riscv64 +package linux + +// https://github.com/riscv-collab/riscv-gnu-toolchain/blob/master/linux-headers/include/asm-generic/unistd.h + +SYS_io_setup :: uintptr(0) +SYS_io_destroy :: uintptr(1) +SYS_io_submit :: uintptr(2) +SYS_io_cancel :: uintptr(3) +SYS_io_getevents :: uintptr(4) +SYS_setxattr :: uintptr(5) +SYS_lsetxattr :: uintptr(6) +SYS_fsetxattr :: uintptr(7) +SYS_getxattr :: uintptr(8) +SYS_lgetxattr :: uintptr(9) +SYS_fgetxattr :: uintptr(10) +SYS_listxattr :: uintptr(11) +SYS_llistxattr :: uintptr(12) +SYS_flistxattr :: uintptr(13) +SYS_removexattr :: uintptr(14) +SYS_lremovexattr :: uintptr(15) +SYS_fremovexattr :: uintptr(16) +SYS_getcwd :: uintptr(17) +SYS_lookup_dcookie :: uintptr(18) +SYS_eventfd2 :: uintptr(19) +SYS_epoll_create1 :: uintptr(20) +SYS_epoll_ctl :: uintptr(21) +SYS_epoll_pwait :: uintptr(22) +SYS_dup :: uintptr(23) +SYS_dup3 :: uintptr(24) +SYS_fcntl :: uintptr(25) +SYS_inotify_init1 :: uintptr(26) +SYS_inotify_add_watch :: uintptr(27) +SYS_inotify_rm_watch :: uintptr(28) +SYS_ioctl :: uintptr(29) +SYS_ioprio_set :: uintptr(30) +SYS_ioprio_get :: uintptr(31) +SYS_flock :: uintptr(32) +SYS_mknodat :: uintptr(33) +SYS_mkdirat :: uintptr(34) +SYS_unlinkat :: uintptr(35) +SYS_symlinkat :: uintptr(36) +SYS_linkat :: uintptr(37) +SYS_renameat :: uintptr(38) +SYS_umount2 :: uintptr(39) +SYS_mount :: uintptr(40) +SYS_pivot_root :: uintptr(41) +SYS_nfsservctl :: uintptr(42) +SYS_statfs :: uintptr(43) +SYS_fstatfs :: uintptr(44) +SYS_truncate :: uintptr(45) +SYS_ftruncate :: uintptr(46) +SYS_fallocate :: uintptr(47) +SYS_faccessat :: uintptr(48) +SYS_chdir :: uintptr(49) +SYS_fchdir :: uintptr(50) +SYS_chroot :: uintptr(51) +SYS_fchmod :: uintptr(52) +SYS_fchmodat :: uintptr(53) +SYS_fchownat :: uintptr(54) +SYS_fchown :: uintptr(55) +SYS_openat :: uintptr(56) +SYS_close :: uintptr(57) +SYS_vhangup :: uintptr(58) +SYS_pipe2 :: uintptr(59) +SYS_quotactl :: uintptr(60) +SYS_getdents64 :: uintptr(61) +SYS_lseek :: uintptr(62) +SYS_read :: uintptr(63) +SYS_write :: uintptr(64) +SYS_readv :: uintptr(65) +SYS_writev :: uintptr(66) +SYS_pread64 :: uintptr(67) +SYS_pwrite64 :: uintptr(68) +SYS_preadv :: uintptr(69) +SYS_pwritev :: uintptr(70) +SYS_sendfile :: uintptr(71) +SYS_pselect6 :: uintptr(72) +SYS_ppoll :: uintptr(73) +SYS_signalfd4 :: uintptr(74) +SYS_vmsplice :: uintptr(75) +SYS_splice :: uintptr(76) +SYS_tee :: uintptr(77) +SYS_readlinkat :: uintptr(78) +SYS_fstatat :: uintptr(79) +SYS_fstat :: uintptr(80) +SYS_sync :: uintptr(81) +SYS_fsync :: uintptr(82) +SYS_fdatasync :: uintptr(83) +SYS_sync_file_range2 :: uintptr(84) +SYS_sync_file_range :: uintptr(84) +SYS_timerfd_create :: uintptr(85) +SYS_timerfd_settime :: uintptr(86) +SYS_timerfd_gettime :: uintptr(87) +SYS_utimensat :: uintptr(88) +SYS_acct :: uintptr(89) +SYS_capget :: uintptr(90) +SYS_capset :: uintptr(91) +SYS_personality :: uintptr(92) +SYS_exit :: uintptr(93) +SYS_exit_group :: uintptr(94) +SYS_waitid :: uintptr(95) +SYS_set_tid_address :: uintptr(96) +SYS_unshare :: uintptr(97) +SYS_futex :: uintptr(98) +SYS_set_robust_list :: uintptr(99) +SYS_get_robust_list :: uintptr(100) +SYS_nanosleep :: uintptr(101) +SYS_getitimer :: uintptr(102) +SYS_setitimer :: uintptr(103) +SYS_kexec_load :: uintptr(104) +SYS_init_module :: uintptr(105) +SYS_delete_module :: uintptr(106) +SYS_timer_create :: uintptr(107) +SYS_timer_gettime :: uintptr(108) +SYS_timer_getoverrun :: uintptr(109) +SYS_timer_settime :: uintptr(110) +SYS_timer_delete :: uintptr(111) +SYS_clock_settime :: uintptr(112) +SYS_clock_gettime :: uintptr(113) +SYS_clock_getres :: uintptr(114) +SYS_clock_nanosleep :: uintptr(115) +SYS_syslog :: uintptr(116) +SYS_ptrace :: uintptr(117) +SYS_sched_setparam :: uintptr(118) +SYS_sched_setscheduler :: uintptr(119) +SYS_sched_getscheduler :: uintptr(120) +SYS_sched_getparam :: uintptr(121) +SYS_sched_setaffinity :: uintptr(122) +SYS_sched_getaffinity :: uintptr(123) +SYS_sched_yield :: uintptr(124) +SYS_sched_get_priority_max :: uintptr(125) +SYS_sched_get_priority_min :: uintptr(126) +SYS_sched_rr_get_interval :: uintptr(127) +SYS_restart_syscall :: uintptr(128) +SYS_kill :: uintptr(129) +SYS_tkill :: uintptr(130) +SYS_tgkill :: uintptr(131) +SYS_sigaltstack :: uintptr(132) +SYS_rt_sigsuspend :: uintptr(133) +SYS_rt_sigaction :: uintptr(134) +SYS_rt_sigprocmask :: uintptr(135) +SYS_rt_sigpending :: uintptr(136) +SYS_rt_sigtimedwait :: uintptr(137) +SYS_rt_sigqueueinfo :: uintptr(138) +SYS_rt_sigreturn :: uintptr(139) +SYS_setpriority :: uintptr(140) +SYS_getpriority :: uintptr(141) +SYS_reboot :: uintptr(142) +SYS_setregid :: uintptr(143) +SYS_setgid :: uintptr(144) +SYS_setreuid :: uintptr(145) +SYS_setuid :: uintptr(146) +SYS_setresuid :: uintptr(147) +SYS_getresuid :: uintptr(148) +SYS_setresgid :: uintptr(149) +SYS_getresgid :: uintptr(150) +SYS_setfsuid :: uintptr(151) +SYS_setfsgid :: uintptr(152) +SYS_times :: uintptr(153) +SYS_setpgid :: uintptr(154) +SYS_getpgid :: uintptr(155) +SYS_getsid :: uintptr(156) +SYS_setsid :: uintptr(157) +SYS_getgroups :: uintptr(158) +SYS_setgroups :: uintptr(159) +SYS_uname :: uintptr(160) +SYS_sethostname :: uintptr(161) +SYS_setdomainname :: uintptr(162) +SYS_getrlimit :: uintptr(163) +SYS_setrlimit :: uintptr(164) +SYS_getrusage :: uintptr(165) +SYS_umask :: uintptr(166) +SYS_prctl :: uintptr(167) +SYS_getcpu :: uintptr(168) +SYS_gettimeofday :: uintptr(169) +SYS_settimeofday :: uintptr(170) +SYS_adjtimex :: uintptr(171) +SYS_getpid :: uintptr(172) +SYS_getppid :: uintptr(173) +SYS_getuid :: uintptr(174) +SYS_geteuid :: uintptr(175) +SYS_getgid :: uintptr(176) +SYS_getegid :: uintptr(177) +SYS_gettid :: uintptr(178) +SYS_sysinfo :: uintptr(179) +SYS_mq_open :: uintptr(180) +SYS_mq_unlink :: uintptr(181) +SYS_mq_timedsend :: uintptr(182) +SYS_mq_timedreceive :: uintptr(183) +SYS_mq_notify :: uintptr(184) +SYS_mq_getsetattr :: uintptr(185) +SYS_msgget :: uintptr(186) +SYS_msgctl :: uintptr(187) +SYS_msgrcv :: uintptr(188) +SYS_msgsnd :: uintptr(189) +SYS_semget :: uintptr(190) +SYS_semctl :: uintptr(191) +SYS_semtimedop :: uintptr(192) +SYS_semop :: uintptr(193) +SYS_shmget :: uintptr(194) +SYS_shmctl :: uintptr(195) +SYS_shmat :: uintptr(196) +SYS_shmdt :: uintptr(197) +SYS_socket :: uintptr(198) +SYS_socketpair :: uintptr(199) +SYS_bind :: uintptr(200) +SYS_listen :: uintptr(201) +SYS_accept :: uintptr(202) +SYS_connect :: uintptr(203) +SYS_getsockname :: uintptr(204) +SYS_getpeername :: uintptr(205) +SYS_sendto :: uintptr(206) +SYS_recvfrom :: uintptr(207) +SYS_setsockopt :: uintptr(208) +SYS_getsockopt :: uintptr(209) +SYS_shutdown :: uintptr(210) +SYS_sendmsg :: uintptr(211) +SYS_recvmsg :: uintptr(212) +SYS_readahead :: uintptr(213) +SYS_brk :: uintptr(214) +SYS_munmap :: uintptr(215) +SYS_mremap :: uintptr(216) +SYS_add_key :: uintptr(217) +SYS_request_key :: uintptr(218) +SYS_keyctl :: uintptr(219) +SYS_clone :: uintptr(220) +SYS_execve :: uintptr(221) +SYS_mmap :: uintptr(222) +SYS_fadvise64 :: uintptr(223) +SYS_swapon :: uintptr(224) +SYS_swapoff :: uintptr(225) +SYS_mprotect :: uintptr(226) +SYS_msync :: uintptr(227) +SYS_mlock :: uintptr(228) +SYS_munlock :: uintptr(229) +SYS_mlockall :: uintptr(230) +SYS_munlockall :: uintptr(231) +SYS_mincore :: uintptr(232) +SYS_madvise :: uintptr(233) +SYS_remap_file_pages :: uintptr(234) +SYS_mbind :: uintptr(235) +SYS_get_mempolicy :: uintptr(236) +SYS_set_mempolicy :: uintptr(237) +SYS_migrate_pages :: uintptr(238) +SYS_move_pages :: uintptr(239) +SYS_rt_tgsigqueueinfo :: uintptr(240) +SYS_perf_event_open :: uintptr(241) +SYS_accept4 :: uintptr(242) +SYS_recvmmsg :: uintptr(243) +SYS_wait4 :: uintptr(260) +SYS_prlimit64 :: uintptr(261) +SYS_fanotify_init :: uintptr(262) +SYS_fanotify_mark :: uintptr(263) +SYS_name_to_handle_at :: uintptr(264) +SYS_open_by_handle_at :: uintptr(265) +SYS_clock_adjtime :: uintptr(266) +SYS_syncfs :: uintptr(267) +SYS_setns :: uintptr(268) +SYS_sendmmsg :: uintptr(269) +SYS_process_vm_readv :: uintptr(270) +SYS_process_vm_writev :: uintptr(271) +SYS_kcmp :: uintptr(272) +SYS_finit_module :: uintptr(273) +SYS_sched_setattr :: uintptr(274) +SYS_sched_getattr :: uintptr(275) +SYS_renameat2 :: uintptr(276) +SYS_seccomp :: uintptr(277) +SYS_getrandom :: uintptr(278) +SYS_memfd_create :: uintptr(279) +SYS_bpf :: uintptr(280) +SYS_execveat :: uintptr(281) +SYS_userfaultfd :: uintptr(282) +SYS_membarrier :: uintptr(283) +SYS_mlock2 :: uintptr(284) +SYS_copy_file_range :: uintptr(285) +SYS_preadv2 :: uintptr(286) +SYS_pwritev2 :: uintptr(287) +SYS_pkey_mprotect :: uintptr(288) +SYS_pkey_alloc :: uintptr(289) +SYS_pkey_free :: uintptr(290) +SYS_statx :: uintptr(291) +SYS_io_pgetevents :: uintptr(292) +SYS_rseq :: uintptr(293) +SYS_kexec_file_load :: uintptr(294) +SYS_clock_gettime64 :: uintptr(403) +SYS_clock_settime64 :: uintptr(404) +SYS_clock_adjtime64 :: uintptr(405) +SYS_clock_getres_time64 :: uintptr(406) +SYS_clock_nanosleep_time64 :: uintptr(407) +SYS_timer_gettime64 :: uintptr(408) +SYS_timer_settime64 :: uintptr(409) +SYS_timerfd_gettime64 :: uintptr(410) +SYS_timerfd_settime64 :: uintptr(411) +SYS_utimensat_time64 :: uintptr(412) +SYS_pselect6_time64 :: uintptr(413) +SYS_ppoll_time64 :: uintptr(414) +SYS_io_pgetevents_time64 :: uintptr(416) +SYS_recvmmsg_time64 :: uintptr(417) +SYS_mq_timedsend_time64 :: uintptr(418) +SYS_mq_timedreceive_time64 :: uintptr(419) +SYS_semtimedop_time64 :: uintptr(420) +SYS_rt_sigtimedwait_time64 :: uintptr(421) +SYS_futex_time64 :: uintptr(422) +SYS_sched_rr_get_interval_time64 :: uintptr(423) +SYS_pidfd_send_signal :: uintptr(424) +SYS_io_uring_setup :: uintptr(425) +SYS_io_uring_enter :: uintptr(426) +SYS_io_uring_register :: uintptr(427) +SYS_open_tree :: uintptr(428) +SYS_move_mount :: uintptr(429) +SYS_fsopen :: uintptr(430) +SYS_fsconfig :: uintptr(431) +SYS_fsmount :: uintptr(432) +SYS_fspick :: uintptr(433) +SYS_pidfd_open :: uintptr(434) +SYS_clone3 :: uintptr(435) +SYS_close_range :: uintptr(436) +SYS_openat2 :: uintptr(437) +SYS_pidfd_getfd :: uintptr(438) +SYS_faccessat2 :: uintptr(439) +SYS_process_madvise :: uintptr(440) +SYS_epoll_pwait2 :: uintptr(441) +SYS_mount_setattr :: uintptr(442) +SYS_quotactl_fd :: uintptr(443) +SYS_landlock_create_ruleset :: uintptr(444) +SYS_landlock_add_rule :: uintptr(445) +SYS_landlock_restrict_self :: uintptr(446) +SYS_memfd_secret :: uintptr(447) +SYS_process_mrelease :: uintptr(448) +SYS_futex_waitv :: uintptr(449) +SYS_set_mempolicy_home_node :: uintptr(450) +SYS_cachestat :: uintptr(451) +SYS_fchmodat2 :: uintptr(452) diff --git a/core/sys/linux/types.odin b/core/sys/linux/types.odin index 288edf879..c78a5b576 100644 --- a/core/sys/linux/types.odin +++ b/core/sys/linux/types.odin @@ -120,6 +120,25 @@ when ODIN_ARCH == .amd64 { _: [3]uint, } } else when ODIN_ARCH == .arm64 { + _Arch_Stat :: struct { + dev: Dev, + ino: Inode, + mode: Mode, + nlink: u32, + uid: Uid, + gid: Gid, + rdev: Dev, + _: u64, + size: int, + blksize: i32, + _: i32, + blocks: int, + atime: Time_Spec, + mtime: Time_Spec, + ctime: Time_Spec, + _: [2]u32, + } +} else when ODIN_ARCH == .riscv64 { _Arch_Stat :: struct { dev: Dev, ino: Inode, @@ -927,7 +946,7 @@ when ODIN_ARCH == .i386 { nsems: uint, _: [2]uint, } -} else when ODIN_ARCH == .arm64 { +} else when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { _Arch_Semid_DS :: struct { perm: IPC_Perm, otime: int, @@ -1167,6 +1186,33 @@ when ODIN_ARCH == .arm32 { xmm_space: [32]uint, padding: [56]uint, } +} else when ODIN_ARCH == .riscv64 { + _Arch_User_Regs :: struct { + pc, ra, sp, gp, tp, + t0, t1, t2, + s0, s1, + a0, a1, a2, a3, a4, a5, a6, a7, + s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + t3, t4, t5, t6: uint, + } + _Arch_User_FP_Regs :: struct #raw_union { + f_ext: struct { + f: [32]u32, + fcsr: u32, + }, + d_ext: struct { + f: [32]u64, + fcsr: u32, + }, + q_ext: struct { + using _: struct #align(16) { + f: [64]u64, + }, + fcsr: u32, + reserved: [3]u32, + }, + } + _Arch_User_FPX_Regs :: struct {} } /* diff --git a/core/sys/unix/syscalls_linux.odin b/core/sys/unix/syscalls_linux.odin index 038c16276..89ad2661f 100644 --- a/core/sys/unix/syscalls_linux.odin +++ b/core/sys/unix/syscalls_linux.odin @@ -1514,6 +1514,338 @@ when ODIN_ARCH == .amd64 { SYS_landlock_create_ruleset : uintptr : 444 SYS_landlock_add_rule : uintptr : 445 SYS_landlock_restrict_self : uintptr : 446 +} else when ODIN_ARCH == .riscv64 { + SYS_io_setup :: uintptr(0) + SYS_io_destroy :: uintptr(1) + SYS_io_submit :: uintptr(2) + SYS_io_cancel :: uintptr(3) + SYS_io_getevents :: uintptr(4) + SYS_setxattr :: uintptr(5) + SYS_lsetxattr :: uintptr(6) + SYS_fsetxattr :: uintptr(7) + SYS_getxattr :: uintptr(8) + SYS_lgetxattr :: uintptr(9) + SYS_fgetxattr :: uintptr(10) + SYS_listxattr :: uintptr(11) + SYS_llistxattr :: uintptr(12) + SYS_flistxattr :: uintptr(13) + SYS_removexattr :: uintptr(14) + SYS_lremovexattr :: uintptr(15) + SYS_fremovexattr :: uintptr(16) + SYS_getcwd :: uintptr(17) + SYS_lookup_dcookie :: uintptr(18) + SYS_eventfd2 :: uintptr(19) + SYS_epoll_create1 :: uintptr(20) + SYS_epoll_ctl :: uintptr(21) + SYS_epoll_pwait :: uintptr(22) + SYS_dup :: uintptr(23) + SYS_dup3 :: uintptr(24) + SYS_fcntl :: uintptr(25) + SYS_inotify_init1 :: uintptr(26) + SYS_inotify_add_watch :: uintptr(27) + SYS_inotify_rm_watch :: uintptr(28) + SYS_ioctl :: uintptr(29) + SYS_ioprio_set :: uintptr(30) + SYS_ioprio_get :: uintptr(31) + SYS_flock :: uintptr(32) + SYS_mknodat :: uintptr(33) + SYS_mkdirat :: uintptr(34) + SYS_unlinkat :: uintptr(35) + SYS_symlinkat :: uintptr(36) + SYS_linkat :: uintptr(37) + SYS_renameat :: uintptr(38) + SYS_umount2 :: uintptr(39) + SYS_mount :: uintptr(40) + SYS_pivot_root :: uintptr(41) + SYS_nfsservctl :: uintptr(42) + SYS_statfs :: uintptr(43) + SYS_fstatfs :: uintptr(44) + SYS_truncate :: uintptr(45) + SYS_ftruncate :: uintptr(46) + SYS_fallocate :: uintptr(47) + SYS_faccessat :: uintptr(48) + SYS_chdir :: uintptr(49) + SYS_fchdir :: uintptr(50) + SYS_chroot :: uintptr(51) + SYS_fchmod :: uintptr(52) + SYS_fchmodat :: uintptr(53) + SYS_fchownat :: uintptr(54) + SYS_fchown :: uintptr(55) + SYS_openat :: uintptr(56) + SYS_close :: uintptr(57) + SYS_vhangup :: uintptr(58) + SYS_pipe2 :: uintptr(59) + SYS_quotactl :: uintptr(60) + SYS_getdents64 :: uintptr(61) + SYS_lseek :: uintptr(62) + SYS_read :: uintptr(63) + SYS_write :: uintptr(64) + SYS_readv :: uintptr(65) + SYS_writev :: uintptr(66) + SYS_pread64 :: uintptr(67) + SYS_pwrite64 :: uintptr(68) + SYS_preadv :: uintptr(69) + SYS_pwritev :: uintptr(70) + SYS_sendfile :: uintptr(71) + SYS_pselect6 :: uintptr(72) + SYS_ppoll :: uintptr(73) + SYS_signalfd4 :: uintptr(74) + SYS_vmsplice :: uintptr(75) + SYS_splice :: uintptr(76) + SYS_tee :: uintptr(77) + SYS_readlinkat :: uintptr(78) + SYS_fstatat :: uintptr(79) + SYS_fstat :: uintptr(80) + SYS_sync :: uintptr(81) + SYS_fsync :: uintptr(82) + SYS_fdatasync :: uintptr(83) + SYS_sync_file_range2 :: uintptr(84) + SYS_sync_file_range :: uintptr(84) + SYS_timerfd_create :: uintptr(85) + SYS_timerfd_settime :: uintptr(86) + SYS_timerfd_gettime :: uintptr(87) + SYS_utimensat :: uintptr(88) + SYS_acct :: uintptr(89) + SYS_capget :: uintptr(90) + SYS_capset :: uintptr(91) + SYS_personality :: uintptr(92) + SYS_exit :: uintptr(93) + SYS_exit_group :: uintptr(94) + SYS_waitid :: uintptr(95) + SYS_set_tid_address :: uintptr(96) + SYS_unshare :: uintptr(97) + SYS_futex :: uintptr(98) + SYS_set_robust_list :: uintptr(99) + SYS_get_robust_list :: uintptr(100) + SYS_nanosleep :: uintptr(101) + SYS_getitimer :: uintptr(102) + SYS_setitimer :: uintptr(103) + SYS_kexec_load :: uintptr(104) + SYS_init_module :: uintptr(105) + SYS_delete_module :: uintptr(106) + SYS_timer_create :: uintptr(107) + SYS_timer_gettime :: uintptr(108) + SYS_timer_getoverrun :: uintptr(109) + SYS_timer_settime :: uintptr(110) + SYS_timer_delete :: uintptr(111) + SYS_clock_settime :: uintptr(112) + SYS_clock_gettime :: uintptr(113) + SYS_clock_getres :: uintptr(114) + SYS_clock_nanosleep :: uintptr(115) + SYS_syslog :: uintptr(116) + SYS_ptrace :: uintptr(117) + SYS_sched_setparam :: uintptr(118) + SYS_sched_setscheduler :: uintptr(119) + SYS_sched_getscheduler :: uintptr(120) + SYS_sched_getparam :: uintptr(121) + SYS_sched_setaffinity :: uintptr(122) + SYS_sched_getaffinity :: uintptr(123) + SYS_sched_yield :: uintptr(124) + SYS_sched_get_priority_max :: uintptr(125) + SYS_sched_get_priority_min :: uintptr(126) + SYS_sched_rr_get_interval :: uintptr(127) + SYS_restart_syscall :: uintptr(128) + SYS_kill :: uintptr(129) + SYS_tkill :: uintptr(130) + SYS_tgkill :: uintptr(131) + SYS_sigaltstack :: uintptr(132) + SYS_rt_sigsuspend :: uintptr(133) + SYS_rt_sigaction :: uintptr(134) + SYS_rt_sigprocmask :: uintptr(135) + SYS_rt_sigpending :: uintptr(136) + SYS_rt_sigtimedwait :: uintptr(137) + SYS_rt_sigqueueinfo :: uintptr(138) + SYS_rt_sigreturn :: uintptr(139) + SYS_setpriority :: uintptr(140) + SYS_getpriority :: uintptr(141) + SYS_reboot :: uintptr(142) + SYS_setregid :: uintptr(143) + SYS_setgid :: uintptr(144) + SYS_setreuid :: uintptr(145) + SYS_setuid :: uintptr(146) + SYS_setresuid :: uintptr(147) + SYS_getresuid :: uintptr(148) + SYS_setresgid :: uintptr(149) + SYS_getresgid :: uintptr(150) + SYS_setfsuid :: uintptr(151) + SYS_setfsgid :: uintptr(152) + SYS_times :: uintptr(153) + SYS_setpgid :: uintptr(154) + SYS_getpgid :: uintptr(155) + SYS_getsid :: uintptr(156) + SYS_setsid :: uintptr(157) + SYS_getgroups :: uintptr(158) + SYS_setgroups :: uintptr(159) + SYS_uname :: uintptr(160) + SYS_sethostname :: uintptr(161) + SYS_setdomainname :: uintptr(162) + SYS_getrlimit :: uintptr(163) + SYS_setrlimit :: uintptr(164) + SYS_getrusage :: uintptr(165) + SYS_umask :: uintptr(166) + SYS_prctl :: uintptr(167) + SYS_getcpu :: uintptr(168) + SYS_gettimeofday :: uintptr(169) + SYS_settimeofday :: uintptr(170) + SYS_adjtimex :: uintptr(171) + SYS_getpid :: uintptr(172) + SYS_getppid :: uintptr(173) + SYS_getuid :: uintptr(174) + SYS_geteuid :: uintptr(175) + SYS_getgid :: uintptr(176) + SYS_getegid :: uintptr(177) + SYS_gettid :: uintptr(178) + SYS_sysinfo :: uintptr(179) + SYS_mq_open :: uintptr(180) + SYS_mq_unlink :: uintptr(181) + SYS_mq_timedsend :: uintptr(182) + SYS_mq_timedreceive :: uintptr(183) + SYS_mq_notify :: uintptr(184) + SYS_mq_getsetattr :: uintptr(185) + SYS_msgget :: uintptr(186) + SYS_msgctl :: uintptr(187) + SYS_msgrcv :: uintptr(188) + SYS_msgsnd :: uintptr(189) + SYS_semget :: uintptr(190) + SYS_semctl :: uintptr(191) + SYS_semtimedop :: uintptr(192) + SYS_semop :: uintptr(193) + SYS_shmget :: uintptr(194) + SYS_shmctl :: uintptr(195) + SYS_shmat :: uintptr(196) + SYS_shmdt :: uintptr(197) + SYS_socket :: uintptr(198) + SYS_socketpair :: uintptr(199) + SYS_bind :: uintptr(200) + SYS_listen :: uintptr(201) + SYS_accept :: uintptr(202) + SYS_connect :: uintptr(203) + SYS_getsockname :: uintptr(204) + SYS_getpeername :: uintptr(205) + SYS_sendto :: uintptr(206) + SYS_recvfrom :: uintptr(207) + SYS_setsockopt :: uintptr(208) + SYS_getsockopt :: uintptr(209) + SYS_shutdown :: uintptr(210) + SYS_sendmsg :: uintptr(211) + SYS_recvmsg :: uintptr(212) + SYS_readahead :: uintptr(213) + SYS_brk :: uintptr(214) + SYS_munmap :: uintptr(215) + SYS_mremap :: uintptr(216) + SYS_add_key :: uintptr(217) + SYS_request_key :: uintptr(218) + SYS_keyctl :: uintptr(219) + SYS_clone :: uintptr(220) + SYS_execve :: uintptr(221) + SYS_mmap :: uintptr(222) + SYS_fadvise64 :: uintptr(223) + SYS_swapon :: uintptr(224) + SYS_swapoff :: uintptr(225) + SYS_mprotect :: uintptr(226) + SYS_msync :: uintptr(227) + SYS_mlock :: uintptr(228) + SYS_munlock :: uintptr(229) + SYS_mlockall :: uintptr(230) + SYS_munlockall :: uintptr(231) + SYS_mincore :: uintptr(232) + SYS_madvise :: uintptr(233) + SYS_remap_file_pages :: uintptr(234) + SYS_mbind :: uintptr(235) + SYS_get_mempolicy :: uintptr(236) + SYS_set_mempolicy :: uintptr(237) + SYS_migrate_pages :: uintptr(238) + SYS_move_pages :: uintptr(239) + SYS_rt_tgsigqueueinfo :: uintptr(240) + SYS_perf_event_open :: uintptr(241) + SYS_accept4 :: uintptr(242) + SYS_recvmmsg :: uintptr(243) + SYS_wait4 :: uintptr(260) + SYS_prlimit64 :: uintptr(261) + SYS_fanotify_init :: uintptr(262) + SYS_fanotify_mark :: uintptr(263) + SYS_name_to_handle_at :: uintptr(264) + SYS_open_by_handle_at :: uintptr(265) + SYS_clock_adjtime :: uintptr(266) + SYS_syncfs :: uintptr(267) + SYS_setns :: uintptr(268) + SYS_sendmmsg :: uintptr(269) + SYS_process_vm_readv :: uintptr(270) + SYS_process_vm_writev :: uintptr(271) + SYS_kcmp :: uintptr(272) + SYS_finit_module :: uintptr(273) + SYS_sched_setattr :: uintptr(274) + SYS_sched_getattr :: uintptr(275) + SYS_renameat2 :: uintptr(276) + SYS_seccomp :: uintptr(277) + SYS_getrandom :: uintptr(278) + SYS_memfd_create :: uintptr(279) + SYS_bpf :: uintptr(280) + SYS_execveat :: uintptr(281) + SYS_userfaultfd :: uintptr(282) + SYS_membarrier :: uintptr(283) + SYS_mlock2 :: uintptr(284) + SYS_copy_file_range :: uintptr(285) + SYS_preadv2 :: uintptr(286) + SYS_pwritev2 :: uintptr(287) + SYS_pkey_mprotect :: uintptr(288) + SYS_pkey_alloc :: uintptr(289) + SYS_pkey_free :: uintptr(290) + SYS_statx :: uintptr(291) + SYS_io_pgetevents :: uintptr(292) + SYS_rseq :: uintptr(293) + SYS_kexec_file_load :: uintptr(294) + SYS_clock_gettime64 :: uintptr(403) + SYS_clock_settime64 :: uintptr(404) + SYS_clock_adjtime64 :: uintptr(405) + SYS_clock_getres_time64 :: uintptr(406) + SYS_clock_nanosleep_time64 :: uintptr(407) + SYS_timer_gettime64 :: uintptr(408) + SYS_timer_settime64 :: uintptr(409) + SYS_timerfd_gettime64 :: uintptr(410) + SYS_timerfd_settime64 :: uintptr(411) + SYS_utimensat_time64 :: uintptr(412) + SYS_pselect6_time64 :: uintptr(413) + SYS_ppoll_time64 :: uintptr(414) + SYS_io_pgetevents_time64 :: uintptr(416) + SYS_recvmmsg_time64 :: uintptr(417) + SYS_mq_timedsend_time64 :: uintptr(418) + SYS_mq_timedreceive_time64 :: uintptr(419) + SYS_semtimedop_time64 :: uintptr(420) + SYS_rt_sigtimedwait_time64 :: uintptr(421) + SYS_futex_time64 :: uintptr(422) + SYS_sched_rr_get_interval_time64 :: uintptr(423) + SYS_pidfd_send_signal :: uintptr(424) + SYS_io_uring_setup :: uintptr(425) + SYS_io_uring_enter :: uintptr(426) + SYS_io_uring_register :: uintptr(427) + SYS_open_tree :: uintptr(428) + SYS_move_mount :: uintptr(429) + SYS_fsopen :: uintptr(430) + SYS_fsconfig :: uintptr(431) + SYS_fsmount :: uintptr(432) + SYS_fspick :: uintptr(433) + SYS_pidfd_open :: uintptr(434) + SYS_clone3 :: uintptr(435) + SYS_close_range :: uintptr(436) + SYS_openat2 :: uintptr(437) + SYS_pidfd_getfd :: uintptr(438) + SYS_faccessat2 :: uintptr(439) + SYS_process_madvise :: uintptr(440) + SYS_epoll_pwait2 :: uintptr(441) + SYS_mount_setattr :: uintptr(442) + SYS_quotactl_fd :: uintptr(443) + SYS_landlock_create_ruleset :: uintptr(444) + SYS_landlock_add_rule :: uintptr(445) + SYS_landlock_restrict_self :: uintptr(446) + SYS_memfd_secret :: uintptr(447) + SYS_process_mrelease :: uintptr(448) + SYS_futex_waitv :: uintptr(449) + SYS_set_mempolicy_home_node :: uintptr(450) + SYS_cachestat :: uintptr(451) + SYS_fchmodat2 :: uintptr(452) + + SIGCHLD :: 17 } else { #panic("Unsupported architecture") } @@ -1742,7 +2074,7 @@ sys_getrandom :: proc "contextless" (buf: [^]byte, buflen: uint, flags: int) -> } sys_open :: proc "contextless" (path: cstring, flags: int, mode: uint = 0o000) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_open, uintptr(rawptr(path)), uintptr(flags), uintptr(mode))) } else { // NOTE: arm64 does not have open return int(intrinsics.syscall(SYS_openat, AT_FDCWD, uintptr(rawptr(path)), uintptr(flags), uintptr(mode))) @@ -1762,7 +2094,7 @@ sys_read :: proc "contextless" (fd: int, buf: rawptr, size: uint) -> int { } sys_pread :: proc "contextless" (fd: int, buf: rawptr, size: uint, offset: i64) -> int { - when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 { + when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { return int(intrinsics.syscall(SYS_pread64, uintptr(fd), uintptr(buf), uintptr(size), uintptr(offset))) } else { low := uintptr(offset & 0xFFFFFFFF) @@ -1776,7 +2108,7 @@ sys_write :: proc "contextless" (fd: int, buf: rawptr, size: uint) -> int { } sys_pwrite :: proc "contextless" (fd: int, buf: rawptr, size: uint, offset: i64) -> int { - when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 { + when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { return int(intrinsics.syscall(SYS_pwrite64, uintptr(fd), uintptr(buf), uintptr(size), uintptr(offset))) } else { low := uintptr(offset & 0xFFFFFFFF) @@ -1786,7 +2118,7 @@ sys_pwrite :: proc "contextless" (fd: int, buf: rawptr, size: uint, offset: i64) } sys_lseek :: proc "contextless" (fd: int, offset: i64, whence: int) -> i64 { - when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 { + when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { return i64(intrinsics.syscall(SYS_lseek, uintptr(fd), uintptr(offset), uintptr(whence))) } else { low := uintptr(offset & 0xFFFFFFFF) @@ -1800,7 +2132,7 @@ sys_lseek :: proc "contextless" (fd: int, offset: i64, whence: int) -> i64 { sys_stat :: proc "contextless" (path: cstring, stat: rawptr) -> int { when ODIN_ARCH == .amd64 { return int(intrinsics.syscall(SYS_stat, uintptr(rawptr(path)), uintptr(stat))) - } else when ODIN_ARCH != .arm64 { + } else when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_stat64, uintptr(rawptr(path)), uintptr(stat))) } else { // NOTE: arm64 does not have stat return int(intrinsics.syscall(SYS_fstatat, AT_FDCWD, uintptr(rawptr(path)), uintptr(stat), 0)) @@ -1808,7 +2140,7 @@ sys_stat :: proc "contextless" (path: cstring, stat: rawptr) -> int { } sys_fstat :: proc "contextless" (fd: int, stat: rawptr) -> int { - when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 { + when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { return int(intrinsics.syscall(SYS_fstat, uintptr(fd), uintptr(stat))) } else { return int(intrinsics.syscall(SYS_fstat64, uintptr(fd), uintptr(stat))) @@ -1818,7 +2150,7 @@ sys_fstat :: proc "contextless" (fd: int, stat: rawptr) -> int { sys_lstat :: proc "contextless" (path: cstring, stat: rawptr) -> int { when ODIN_ARCH == .amd64 { return int(intrinsics.syscall(SYS_lstat, uintptr(rawptr(path)), uintptr(stat))) - } else when ODIN_ARCH != .arm64 { + } else when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_lstat64, uintptr(rawptr(path)), uintptr(stat))) } else { // NOTE: arm64 does not have any lstat return int(intrinsics.syscall(SYS_fstatat, AT_FDCWD, uintptr(rawptr(path)), uintptr(stat), AT_SYMLINK_NOFOLLOW)) @@ -1826,7 +2158,7 @@ sys_lstat :: proc "contextless" (path: cstring, stat: rawptr) -> int { } sys_readlink :: proc "contextless" (path: cstring, buf: rawptr, bufsiz: uint) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_readlink, uintptr(rawptr(path)), uintptr(buf), uintptr(bufsiz))) } else { // NOTE: arm64 does not have readlink return int(intrinsics.syscall(SYS_readlinkat, AT_FDCWD, uintptr(rawptr(path)), uintptr(buf), uintptr(bufsiz))) @@ -1834,7 +2166,7 @@ sys_readlink :: proc "contextless" (path: cstring, buf: rawptr, bufsiz: uint) -> } sys_symlink :: proc "contextless" (old_name: cstring, new_name: cstring) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_symlink, uintptr(rawptr(old_name)), uintptr(rawptr(new_name)))) } else { // NOTE: arm64 does not have symlink return int(intrinsics.syscall(SYS_symlinkat, uintptr(rawptr(old_name)), AT_FDCWD, uintptr(rawptr(new_name)))) @@ -1842,7 +2174,7 @@ sys_symlink :: proc "contextless" (old_name: cstring, new_name: cstring) -> int } sys_access :: proc "contextless" (path: cstring, mask: int) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_access, uintptr(rawptr(path)), uintptr(mask))) } else { // NOTE: arm64 does not have access return int(intrinsics.syscall(SYS_faccessat, AT_FDCWD, uintptr(rawptr(path)), uintptr(mask))) @@ -1862,7 +2194,7 @@ sys_fchdir :: proc "contextless" (fd: int) -> int { } sys_chmod :: proc "contextless" (path: cstring, mode: uint) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_chmod, uintptr(rawptr(path)), uintptr(mode))) } else { // NOTE: arm64 does not have chmod return int(intrinsics.syscall(SYS_fchmodat, AT_FDCWD, uintptr(rawptr(path)), uintptr(mode))) @@ -1874,7 +2206,7 @@ sys_fchmod :: proc "contextless" (fd: int, mode: uint) -> int { } sys_chown :: proc "contextless" (path: cstring, user: int, group: int) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH !=. riscv64 { return int(intrinsics.syscall(SYS_chown, uintptr(rawptr(path)), uintptr(user), uintptr(group))) } else { // NOTE: arm64 does not have chown return int(intrinsics.syscall(SYS_fchownat, AT_FDCWD, uintptr(rawptr(path)), uintptr(user), uintptr(group), 0)) @@ -1886,7 +2218,7 @@ sys_fchown :: proc "contextless" (fd: int, user: int, group: int) -> int { } sys_lchown :: proc "contextless" (path: cstring, user: int, group: int) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_lchown, uintptr(rawptr(path)), uintptr(user), uintptr(group))) } else { // NOTE: arm64 does not have lchown return int(intrinsics.syscall(SYS_fchownat, AT_FDCWD, uintptr(rawptr(path)), uintptr(user), uintptr(group), AT_SYMLINK_NOFOLLOW)) @@ -1894,7 +2226,7 @@ sys_lchown :: proc "contextless" (path: cstring, user: int, group: int) -> int { } sys_rename :: proc "contextless" (old, new: cstring) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_rename, uintptr(rawptr(old)), uintptr(rawptr(new)))) } else { // NOTE: arm64 does not have rename return int(intrinsics.syscall(SYS_renameat, AT_FDCWD, uintptr(rawptr(old)), uintptr(rawptr(new)))) @@ -1902,7 +2234,7 @@ sys_rename :: proc "contextless" (old, new: cstring) -> int { } sys_link :: proc "contextless" (old_name: cstring, new_name: cstring) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_link, uintptr(rawptr(old_name)), uintptr(rawptr(new_name)))) } else { // NOTE: arm64 does not have link return int(intrinsics.syscall(SYS_linkat, AT_FDCWD, uintptr(rawptr(old_name)), AT_FDCWD, uintptr(rawptr(new_name)), AT_SYMLINK_FOLLOW)) @@ -1910,7 +2242,7 @@ sys_link :: proc "contextless" (old_name: cstring, new_name: cstring) -> int { } sys_unlink :: proc "contextless" (path: cstring) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_unlink, uintptr(rawptr(path)))) } else { // NOTE: arm64 does not have unlink return int(intrinsics.syscall(SYS_unlinkat, AT_FDCWD, uintptr(rawptr(path)), 0)) @@ -1922,7 +2254,7 @@ sys_unlinkat :: proc "contextless" (dfd: int, path: cstring, flag: int = 0) -> i } sys_rmdir :: proc "contextless" (path: cstring) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_rmdir, uintptr(rawptr(path)))) } else { // NOTE: arm64 does not have rmdir return int(intrinsics.syscall(SYS_unlinkat, AT_FDCWD, uintptr(rawptr(path)), AT_REMOVEDIR)) @@ -1930,7 +2262,7 @@ sys_rmdir :: proc "contextless" (path: cstring) -> int { } sys_mkdir :: proc "contextless" (path: cstring, mode: uint) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_mkdir, uintptr(rawptr(path)), uintptr(mode))) } else { // NOTE: arm64 does not have mkdir return int(intrinsics.syscall(SYS_mkdirat, AT_FDCWD, uintptr(rawptr(path)), uintptr(mode))) @@ -1942,7 +2274,7 @@ sys_mkdirat :: proc "contextless" (dfd: int, path: cstring, mode: uint) -> int { } sys_mknod :: proc "contextless" (path: cstring, mode: uint, dev: int) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_mknod, uintptr(rawptr(path)), uintptr(mode), uintptr(dev))) } else { // NOTE: arm64 does not have mknod return int(intrinsics.syscall(SYS_mknodat, AT_FDCWD, uintptr(rawptr(path)), uintptr(mode), uintptr(dev))) @@ -1954,7 +2286,7 @@ sys_mknodat :: proc "contextless" (dfd: int, path: cstring, mode: uint, dev: int } sys_truncate :: proc "contextless" (path: cstring, length: i64) -> int { - when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 { + when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { return int(intrinsics.syscall(SYS_truncate, uintptr(rawptr(path)), uintptr(length))) } else { low := uintptr(length & 0xFFFFFFFF) @@ -1964,7 +2296,7 @@ sys_truncate :: proc "contextless" (path: cstring, length: i64) -> int { } sys_ftruncate :: proc "contextless" (fd: int, length: i64) -> int { - when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 { + when ODIN_ARCH == .amd64 || ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { return int(intrinsics.syscall(SYS_ftruncate, uintptr(fd), uintptr(length))) } else { low := uintptr(length & 0xFFFFFFFF) @@ -1982,7 +2314,7 @@ sys_getdents64 :: proc "contextless" (fd: int, dirent: rawptr, count: int) -> in } sys_fork :: proc "contextless" () -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_fork)) } else { return int(intrinsics.syscall(SYS_clone, SIGCHLD)) @@ -1992,7 +2324,7 @@ sys_pipe2 :: proc "contextless" (fds: rawptr, flags: int) -> int { return int(intrinsics.syscall(SYS_pipe2, uintptr(fds), uintptr(flags))) } sys_dup2 :: proc "contextless" (oldfd: int, newfd: int) -> int { - when ODIN_ARCH != .arm64 { + when ODIN_ARCH != .arm64 && ODIN_ARCH != .riscv64 { return int(intrinsics.syscall(SYS_dup2, uintptr(oldfd), uintptr(newfd))) } else { return int(intrinsics.syscall(SYS_dup3, uintptr(oldfd), uintptr(newfd), 0)) @@ -2076,7 +2408,7 @@ sys_fcntl :: proc "contextless" (fd: int, cmd: int, arg: int) -> int { sys_poll :: proc "contextless" (fds: rawptr, nfds: uint, timeout: int) -> int { // NOTE: specialcased here because `arm64` does not have `poll` - when ODIN_ARCH == .arm64 { + when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 { seconds := i64(timeout / 1_000) nanoseconds := i64((timeout % 1000) * 1_000_000) timeout_spec := timespec{seconds, nanoseconds} diff --git a/misc/featuregen/README.md b/misc/featuregen/README.md index 22a798cca..82d95a2b6 100644 --- a/misc/featuregen/README.md +++ b/misc/featuregen/README.md @@ -5,7 +5,7 @@ for features regarding microarchitecture and target features of the compiler. It is not pretty! But LLVM has no way to query this information with their C API. -It generates these globals (intended for `src/build_settings.cpp`: +It generates these globals (intended for `src/build_settings_microarch.cpp`: - `target_microarch_list`: an array of strings indexed by the architecture, each string is a comma-seperated list of microarchitectures available on that architecture - `target_features_list`: an array of strings indexed by the architecture, each string is a comma-seperated list of target features available on that architecture @@ -23,6 +23,6 @@ does not impact much at all, the only thing it will do is make LLVM print a mess ## Usage 1. Make sure the table of architectures at the top of the python script is up-to-date (the triple can be any valid triple for the architecture) -1. `./build.sh` +1. `./build_featuregen.sh` 1. `python3 featuregen.py` 1. Copy the output into `src/build_settings.cpp` diff --git a/misc/featuregen/build_featuregen.sh b/misc/featuregen/build_featuregen.sh new file mode 100755 index 000000000..d68f29925 --- /dev/null +++ b/misc/featuregen/build_featuregen.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +set -ex + +$(llvm-config --bindir)/clang++ $(llvm-config --cxxflags --ldflags --libs) featuregen.cpp -o featuregen diff --git a/misc/featuregen/featuregen.py b/misc/featuregen/featuregen.py index da4cc68f5..ecc47f70c 100644 --- a/misc/featuregen/featuregen.py +++ b/misc/featuregen/featuregen.py @@ -4,12 +4,13 @@ import os import sys archs = [ - ("amd64", "linux_amd64", "x86_64-pc-linux-gnu", [], []), - ("i386", "linux_i386", "i386-pc-linux-gnu", [], []), - ("arm32", "linux_arm32", "arm-linux-gnu", [], []), - ("arm64", "linux_arm64", "aarch64-linux-elf", [], []), - ("wasm32", "js_wasm32", "wasm32-js-js", [], []), - ("wasm64p32", "js_wasm64p32","wasm32-js-js", [], []), + ("amd64", "linux_amd64", "x86_64-pc-linux-gnu", [], []), + ("i386", "linux_i386", "i386-pc-linux-gnu", [], []), + ("arm32", "linux_arm32", "arm-linux-gnu", [], []), + ("arm64", "linux_arm64", "aarch64-linux-elf", [], []), + ("wasm32", "js_wasm32", "wasm32-js-js", [], []), + ("wasm64p32", "js_wasm64p32", "wasm32-js-js", [], []), + ("riscv64", "linux_riscv64", "riscv64-linux-gnu", [], []), ]; SEEKING_CPUS = 0 @@ -78,7 +79,8 @@ print("\t// TargetArch_Invalid:") print('\tstr_lit(""),') for arch, target, triple, cpus, features in archs: print(f"\t// TargetArch_{arch}:") - print(f'\tstr_lit("{','.join(cpus)}"),') + cpus_str = ','.join(cpus) + print(f'\tstr_lit("{cpus_str}"),') print("};") print("") @@ -89,7 +91,8 @@ print("\t// TargetArch_Invalid:") print('\tstr_lit(""),') for arch, target, triple, cpus, features in archs: print(f"\t// TargetArch_{arch}:") - print(f'\tstr_lit("{','.join(features)}"),') + features_str = ','.join(features) + print(f'\tstr_lit("{features_str}"),') print("};") print("") diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 82523d736..3d56f4202 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -39,6 +39,7 @@ enum TargetArchKind : u16 { TargetArch_arm64, TargetArch_wasm32, TargetArch_wasm64p32, + TargetArch_riscv64, TargetArch_COUNT, }; @@ -104,6 +105,7 @@ gb_global String target_arch_names[TargetArch_COUNT] = { str_lit("arm64"), str_lit("wasm32"), str_lit("wasm64p32"), + str_lit("riscv64"), }; #include "build_settings_microarch.cpp" @@ -555,13 +557,18 @@ gb_global TargetMetrics target_linux_arm64 = { 8, 8, 16, 32, str_lit("aarch64-linux-elf"), }; - gb_global TargetMetrics target_linux_arm32 = { TargetOs_linux, TargetArch_arm32, 4, 4, 8, 16, str_lit("arm-unknown-linux-gnueabihf"), }; +gb_global TargetMetrics target_linux_riscv64 = { + TargetOs_linux, + TargetArch_riscv64, + 8, 8, 16, 32, + str_lit("riscv64-linux-gnu"), +}; gb_global TargetMetrics target_darwin_amd64 = { TargetOs_darwin, @@ -716,6 +723,12 @@ gb_global TargetMetrics target_freestanding_arm32 = { 4, 4, 8, 16, str_lit("arm-unknown-unknown-gnueabihf"), }; +gb_global TargetMetrics target_freestanding_riscv64 = { + TargetOs_freestanding, + TargetArch_riscv64, + 8, 8, 16, 32, + str_lit("riscv64-unknown-gnu"), +}; struct NamedTargetMetrics { @@ -733,6 +746,7 @@ gb_global NamedTargetMetrics named_targets[] = { { str_lit("linux_amd64"), &target_linux_amd64 }, { str_lit("linux_arm64"), &target_linux_arm64 }, { str_lit("linux_arm32"), &target_linux_arm32 }, + { str_lit("linux_riscv64"), &target_linux_riscv64 }, { str_lit("windows_i386"), &target_windows_i386 }, { str_lit("windows_amd64"), &target_windows_amd64 }, @@ -761,6 +775,8 @@ gb_global NamedTargetMetrics named_targets[] = { { str_lit("freestanding_arm64"), &target_freestanding_arm64 }, { str_lit("freestanding_arm32"), &target_freestanding_arm32 }, + + { str_lit("freestanding_riscv64"), &target_freestanding_riscv64 }, }; gb_global NamedTargetMetrics *selected_target_metrics; @@ -1631,6 +1647,8 @@ gb_internal void init_build_context(TargetMetrics *cross_target, Subtarget subta // Disallow on wasm bc->use_separate_modules = false; + } if(bc->metrics.arch == TargetArch_riscv64) { + bc->link_flags = str_lit("-target riscv64 "); } else { // NOTE: for targets other than darwin, we don't specify a `-target` link flag. // This is because we don't support cross-linking and clang is better at figuring diff --git a/src/build_settings_microarch.cpp b/src/build_settings_microarch.cpp index 02b507031..8f64d4026 100644 --- a/src/build_settings_microarch.cpp +++ b/src/build_settings_microarch.cpp @@ -3,17 +3,19 @@ gb_global String target_microarch_list[TargetArch_COUNT] = { // TargetArch_Invalid: str_lit(""), // TargetArch_amd64: - str_lit("alderlake,amdfam10,athlon,athlon-4,athlon-fx,athlon-mp,athlon-tbird,athlon-xp,athlon64,athlon64-sse3,atom,atom_sse4_2,atom_sse4_2_movbe,barcelona,bdver1,bdver2,bdver3,bdver4,bonnell,broadwell,btver1,btver2,c3,c3-2,cannonlake,cascadelake,cooperlake,core-avx-i,core-avx2,core2,core_2_duo_sse4_1,core_2_duo_ssse3,core_2nd_gen_avx,core_3rd_gen_avx,core_4th_gen_avx,core_4th_gen_avx_tsx,core_5th_gen_avx,core_5th_gen_avx_tsx,core_aes_pclmulqdq,core_i7_sse4_2,corei7,corei7-avx,emeraldrapids,generic,geode,goldmont,goldmont-plus,goldmont_plus,grandridge,graniterapids,graniterapids-d,graniterapids_d,haswell,i386,i486,i586,i686,icelake-client,icelake-server,icelake_client,icelake_server,ivybridge,k6,k6-2,k6-3,k8,k8-sse3,knl,knm,lakemont,meteorlake,mic_avx512,nehalem,nocona,opteron,opteron-sse3,penryn,pentium,pentium-m,pentium-mmx,pentium2,pentium3,pentium3m,pentium4,pentium4m,pentium_4,pentium_4_sse3,pentium_ii,pentium_iii,pentium_iii_no_xmm_regs,pentium_m,pentium_mmx,pentium_pro,pentiumpro,prescott,raptorlake,rocketlake,sandybridge,sapphirerapids,sierraforest,silvermont,skx,skylake,skylake-avx512,skylake_avx512,slm,tigerlake,tremont,westmere,winchip-c6,winchip2,x86-64,x86-64-v2,x86-64-v3,x86-64-v4,yonah,znver1,znver2,znver3,znver4"), + str_lit("alderlake,amdfam10,arrowlake,arrowlake-s,arrowlake_s,athlon,athlon-4,athlon-fx,athlon-mp,athlon-tbird,athlon-xp,athlon64,athlon64-sse3,atom,atom_sse4_2,atom_sse4_2_movbe,barcelona,bdver1,bdver2,bdver3,bdver4,bonnell,broadwell,btver1,btver2,c3,c3-2,cannonlake,cascadelake,clearwaterforest,cooperlake,core-avx-i,core-avx2,core2,core_2_duo_sse4_1,core_2_duo_ssse3,core_2nd_gen_avx,core_3rd_gen_avx,core_4th_gen_avx,core_4th_gen_avx_tsx,core_5th_gen_avx,core_5th_gen_avx_tsx,core_aes_pclmulqdq,core_i7_sse4_2,corei7,corei7-avx,emeraldrapids,generic,geode,goldmont,goldmont-plus,goldmont_plus,gracemont,grandridge,graniterapids,graniterapids-d,graniterapids_d,haswell,i386,i486,i586,i686,icelake-client,icelake-server,icelake_client,icelake_server,ivybridge,k6,k6-2,k6-3,k8,k8-sse3,knl,knm,lakemont,lunarlake,meteorlake,mic_avx512,nehalem,nocona,opteron,opteron-sse3,pantherlake,penryn,pentium,pentium-m,pentium-mmx,pentium2,pentium3,pentium3m,pentium4,pentium4m,pentium_4,pentium_4_sse3,pentium_ii,pentium_iii,pentium_iii_no_xmm_regs,pentium_m,pentium_mmx,pentium_pro,pentiumpro,prescott,raptorlake,rocketlake,sandybridge,sapphirerapids,sierraforest,silvermont,skx,skylake,skylake-avx512,skylake_avx512,slm,tigerlake,tremont,westmere,winchip-c6,winchip2,x86-64,x86-64-v2,x86-64-v3,x86-64-v4,yonah,znver1,znver2,znver3,znver4"), // TargetArch_i386: - str_lit("alderlake,amdfam10,athlon,athlon-4,athlon-fx,athlon-mp,athlon-tbird,athlon-xp,athlon64,athlon64-sse3,atom,atom_sse4_2,atom_sse4_2_movbe,barcelona,bdver1,bdver2,bdver3,bdver4,bonnell,broadwell,btver1,btver2,c3,c3-2,cannonlake,cascadelake,cooperlake,core-avx-i,core-avx2,core2,core_2_duo_sse4_1,core_2_duo_ssse3,core_2nd_gen_avx,core_3rd_gen_avx,core_4th_gen_avx,core_4th_gen_avx_tsx,core_5th_gen_avx,core_5th_gen_avx_tsx,core_aes_pclmulqdq,core_i7_sse4_2,corei7,corei7-avx,emeraldrapids,generic,geode,goldmont,goldmont-plus,goldmont_plus,grandridge,graniterapids,graniterapids-d,graniterapids_d,haswell,i386,i486,i586,i686,icelake-client,icelake-server,icelake_client,icelake_server,ivybridge,k6,k6-2,k6-3,k8,k8-sse3,knl,knm,lakemont,meteorlake,mic_avx512,nehalem,nocona,opteron,opteron-sse3,penryn,pentium,pentium-m,pentium-mmx,pentium2,pentium3,pentium3m,pentium4,pentium4m,pentium_4,pentium_4_sse3,pentium_ii,pentium_iii,pentium_iii_no_xmm_regs,pentium_m,pentium_mmx,pentium_pro,pentiumpro,prescott,raptorlake,rocketlake,sandybridge,sapphirerapids,sierraforest,silvermont,skx,skylake,skylake-avx512,skylake_avx512,slm,tigerlake,tremont,westmere,winchip-c6,winchip2,x86-64,x86-64-v2,x86-64-v3,x86-64-v4,yonah,znver1,znver2,znver3,znver4"), + str_lit("alderlake,amdfam10,arrowlake,arrowlake-s,arrowlake_s,athlon,athlon-4,athlon-fx,athlon-mp,athlon-tbird,athlon-xp,athlon64,athlon64-sse3,atom,atom_sse4_2,atom_sse4_2_movbe,barcelona,bdver1,bdver2,bdver3,bdver4,bonnell,broadwell,btver1,btver2,c3,c3-2,cannonlake,cascadelake,clearwaterforest,cooperlake,core-avx-i,core-avx2,core2,core_2_duo_sse4_1,core_2_duo_ssse3,core_2nd_gen_avx,core_3rd_gen_avx,core_4th_gen_avx,core_4th_gen_avx_tsx,core_5th_gen_avx,core_5th_gen_avx_tsx,core_aes_pclmulqdq,core_i7_sse4_2,corei7,corei7-avx,emeraldrapids,generic,geode,goldmont,goldmont-plus,goldmont_plus,gracemont,grandridge,graniterapids,graniterapids-d,graniterapids_d,haswell,i386,i486,i586,i686,icelake-client,icelake-server,icelake_client,icelake_server,ivybridge,k6,k6-2,k6-3,k8,k8-sse3,knl,knm,lakemont,lunarlake,meteorlake,mic_avx512,nehalem,nocona,opteron,opteron-sse3,pantherlake,penryn,pentium,pentium-m,pentium-mmx,pentium2,pentium3,pentium3m,pentium4,pentium4m,pentium_4,pentium_4_sse3,pentium_ii,pentium_iii,pentium_iii_no_xmm_regs,pentium_m,pentium_mmx,pentium_pro,pentiumpro,prescott,raptorlake,rocketlake,sandybridge,sapphirerapids,sierraforest,silvermont,skx,skylake,skylake-avx512,skylake_avx512,slm,tigerlake,tremont,westmere,winchip-c6,winchip2,x86-64,x86-64-v2,x86-64-v3,x86-64-v4,yonah,znver1,znver2,znver3,znver4"), // TargetArch_arm32: - str_lit("arm1020e,arm1020t,arm1022e,arm10e,arm10tdmi,arm1136j-s,arm1136jf-s,arm1156t2-s,arm1156t2f-s,arm1176jz-s,arm1176jzf-s,arm710t,arm720t,arm7tdmi,arm7tdmi-s,arm8,arm810,arm9,arm920,arm920t,arm922t,arm926ej-s,arm940t,arm946e-s,arm966e-s,arm968e-s,arm9e,arm9tdmi,cortex-a12,cortex-a15,cortex-a17,cortex-a32,cortex-a35,cortex-a5,cortex-a53,cortex-a55,cortex-a57,cortex-a7,cortex-a710,cortex-a72,cortex-a73,cortex-a75,cortex-a76,cortex-a76ae,cortex-a77,cortex-a78,cortex-a78c,cortex-a8,cortex-a9,cortex-m0,cortex-m0plus,cortex-m1,cortex-m23,cortex-m3,cortex-m33,cortex-m35p,cortex-m4,cortex-m55,cortex-m7,cortex-m85,cortex-r4,cortex-r4f,cortex-r5,cortex-r52,cortex-r7,cortex-r8,cortex-x1,cortex-x1c,cyclone,ep9312,exynos-m3,exynos-m4,exynos-m5,generic,iwmmxt,krait,kryo,mpcore,mpcorenovfp,neoverse-n1,neoverse-n2,neoverse-v1,sc000,sc300,strongarm,strongarm110,strongarm1100,strongarm1110,swift,xscale"), + str_lit("arm1020e,arm1020t,arm1022e,arm10e,arm10tdmi,arm1136j-s,arm1136jf-s,arm1156t2-s,arm1156t2f-s,arm1176jz-s,arm1176jzf-s,arm710t,arm720t,arm7tdmi,arm7tdmi-s,arm8,arm810,arm9,arm920,arm920t,arm922t,arm926ej-s,arm940t,arm946e-s,arm966e-s,arm968e-s,arm9e,arm9tdmi,cortex-a12,cortex-a15,cortex-a17,cortex-a32,cortex-a35,cortex-a5,cortex-a53,cortex-a55,cortex-a57,cortex-a7,cortex-a710,cortex-a72,cortex-a73,cortex-a75,cortex-a76,cortex-a76ae,cortex-a77,cortex-a78,cortex-a78c,cortex-a8,cortex-a9,cortex-m0,cortex-m0plus,cortex-m1,cortex-m23,cortex-m3,cortex-m33,cortex-m35p,cortex-m4,cortex-m52,cortex-m55,cortex-m7,cortex-m85,cortex-r4,cortex-r4f,cortex-r5,cortex-r52,cortex-r7,cortex-r8,cortex-x1,cortex-x1c,cyclone,ep9312,exynos-m3,exynos-m4,exynos-m5,generic,iwmmxt,krait,kryo,mpcore,mpcorenovfp,neoverse-n1,neoverse-n2,neoverse-v1,sc000,sc300,strongarm,strongarm110,strongarm1100,strongarm1110,swift,xscale"), // TargetArch_arm64: - str_lit("a64fx,ampere1,ampere1a,apple-a10,apple-a11,apple-a12,apple-a13,apple-a14,apple-a15,apple-a16,apple-a7,apple-a8,apple-a9,apple-latest,apple-m1,apple-m2,apple-s4,apple-s5,carmel,cortex-a34,cortex-a35,cortex-a510,cortex-a53,cortex-a55,cortex-a57,cortex-a65,cortex-a65ae,cortex-a710,cortex-a715,cortex-a72,cortex-a73,cortex-a75,cortex-a76,cortex-a76ae,cortex-a77,cortex-a78,cortex-a78c,cortex-r82,cortex-x1,cortex-x1c,cortex-x2,cortex-x3,cyclone,exynos-m3,exynos-m4,exynos-m5,falkor,generic,kryo,neoverse-512tvb,neoverse-e1,neoverse-n1,neoverse-n2,neoverse-v1,neoverse-v2,saphira,thunderx,thunderx2t99,thunderx3t110,thunderxt81,thunderxt83,thunderxt88,tsv110"), + str_lit("a64fx,ampere1,ampere1a,ampere1b,apple-a10,apple-a11,apple-a12,apple-a13,apple-a14,apple-a15,apple-a16,apple-a17,apple-a7,apple-a8,apple-a9,apple-latest,apple-m1,apple-m2,apple-m3,apple-s4,apple-s5,carmel,cortex-a34,cortex-a35,cortex-a510,cortex-a520,cortex-a53,cortex-a55,cortex-a57,cortex-a65,cortex-a65ae,cortex-a710,cortex-a715,cortex-a72,cortex-a720,cortex-a73,cortex-a75,cortex-a76,cortex-a76ae,cortex-a77,cortex-a78,cortex-a78c,cortex-r82,cortex-x1,cortex-x1c,cortex-x2,cortex-x3,cortex-x4,cyclone,exynos-m3,exynos-m4,exynos-m5,falkor,generic,kryo,neoverse-512tvb,neoverse-e1,neoverse-n1,neoverse-n2,neoverse-v1,neoverse-v2,saphira,thunderx,thunderx2t99,thunderx3t110,thunderxt81,thunderxt83,thunderxt88,tsv110"), // TargetArch_wasm32: str_lit("bleeding-edge,generic,mvp"), // TargetArch_wasm64p32: str_lit("bleeding-edge,generic,mvp"), + // TargetArch_riscv64: + str_lit("generic,generic-rv32,generic-rv64,rocket,rocket-rv32,rocket-rv64,sifive-7-series,sifive-e20,sifive-e21,sifive-e24,sifive-e31,sifive-e34,sifive-e76,sifive-p450,sifive-p670,sifive-s21,sifive-s51,sifive-s54,sifive-s76,sifive-u54,sifive-u74,sifive-x280,syntacore-scr1-base,syntacore-scr1-max,veyron-v1,xiangshan-nanhu"), }; // Generated with the featuregen script in `misc/featuregen` @@ -21,17 +23,19 @@ gb_global String target_features_list[TargetArch_COUNT] = { // TargetArch_Invalid: str_lit(""), // TargetArch_amd64: - str_lit("16bit-mode,32bit-mode,3dnow,3dnowa,64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512er,avx512f,avx512fp16,avx512ifma,avx512pf,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vp2intersect,avx512vpopcntdq,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,branchfusion,cldemote,clflushopt,clwb,clzero,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,ermsb,f16c,false-deps-getmant,false-deps-lzcnt-tzcnt,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-popcnt,false-deps-range,fast-11bytenop,fast-15bytenop,fast-7bytenop,fast-bextr,fast-gather,fast-hops,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fast-vector-shift-masks,faster-shift-than-shuffle,fma,fma4,fsgsbase,fsrm,fxsr,gfni,harden-sls-ijmp,harden-sls-ret,hreset,idivl-to-divb,idivq-to-divl,invpcid,kl,lea-sp,lea-uses-ag,lvi-cfi,lvi-load-hardening,lwp,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,mwaitx,no-bypass-delay,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pad-short-functions,pclmul,pconfig,pku,popcnt,prefer-128-bit,prefer-256-bit,prefer-mask-registers,prefer-movmsk-over-vtest,prefer-no-gather,prefer-no-scatter,prefetchi,prefetchwt1,prfchw,ptwrite,raoint,rdpid,rdpru,rdrnd,rdseed,retpoline,retpoline-external-thunk,retpoline-indirect-branches,retpoline-indirect-calls,rtm,sahf,sbb-dep-breaking,serialize,seses,sgx,sha,sha512,shstk,slow-3ops-lea,slow-incdec,slow-lea,slow-pmaddwd,slow-pmulld,slow-shld,slow-two-mem-ops,slow-unaligned-mem-16,slow-unaligned-mem-32,sm3,sm4,soft-float,sse,sse-unaligned-mem,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tagged-globals,tbm,tsxldtrk,tuning-fast-imm-vector-shift,uintr,use-glm-div-sqrt-costs,use-slm-arith-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,widekl,x87,xop,xsave,xsavec,xsaveopt,xsaves"), + str_lit("16bit-mode,32bit-mode,3dnow,3dnowa,64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx10.1-256,avx10.1-512,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512er,avx512f,avx512fp16,avx512ifma,avx512pf,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vp2intersect,avx512vpopcntdq,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,branchfusion,ccmp,cf,cldemote,clflushopt,clwb,clzero,cmov,cmpccxadd,crc32,cx16,cx8,egpr,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-lzcnt-tzcnt,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-popcnt,false-deps-range,fast-11bytenop,fast-15bytenop,fast-7bytenop,fast-bextr,fast-gather,fast-hops,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fast-vector-shift-masks,faster-shift-than-shuffle,fma,fma4,fsgsbase,fsrm,fxsr,gfni,harden-sls-ijmp,harden-sls-ret,hreset,idivl-to-divb,idivq-to-divl,invpcid,kl,lea-sp,lea-uses-ag,lvi-cfi,lvi-load-hardening,lwp,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,mwaitx,ndd,no-bypass-delay,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pad-short-functions,pclmul,pconfig,pku,popcnt,ppx,prefer-128-bit,prefer-256-bit,prefer-mask-registers,prefer-movmsk-over-vtest,prefer-no-gather,prefer-no-scatter,prefetchi,prefetchwt1,prfchw,ptwrite,push2pop2,raoint,rdpid,rdpru,rdrnd,rdseed,retpoline,retpoline-external-thunk,retpoline-indirect-branches,retpoline-indirect-calls,rtm,sahf,sbb-dep-breaking,serialize,seses,sgx,sha,sha512,shstk,slow-3ops-lea,slow-incdec,slow-lea,slow-pmaddwd,slow-pmulld,slow-shld,slow-two-mem-ops,slow-unaligned-mem-16,slow-unaligned-mem-32,sm3,sm4,soft-float,sse,sse-unaligned-mem,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tagged-globals,tbm,tsxldtrk,tuning-fast-imm-vector-shift,uintr,use-glm-div-sqrt-costs,use-slm-arith-costs,usermsr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,widekl,x87,xop,xsave,xsavec,xsaveopt,xsaves"), // TargetArch_i386: - str_lit("16bit-mode,32bit-mode,3dnow,3dnowa,64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512er,avx512f,avx512fp16,avx512ifma,avx512pf,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vp2intersect,avx512vpopcntdq,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,branchfusion,cldemote,clflushopt,clwb,clzero,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,ermsb,f16c,false-deps-getmant,false-deps-lzcnt-tzcnt,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-popcnt,false-deps-range,fast-11bytenop,fast-15bytenop,fast-7bytenop,fast-bextr,fast-gather,fast-hops,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fast-vector-shift-masks,faster-shift-than-shuffle,fma,fma4,fsgsbase,fsrm,fxsr,gfni,harden-sls-ijmp,harden-sls-ret,hreset,idivl-to-divb,idivq-to-divl,invpcid,kl,lea-sp,lea-uses-ag,lvi-cfi,lvi-load-hardening,lwp,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,mwaitx,no-bypass-delay,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pad-short-functions,pclmul,pconfig,pku,popcnt,prefer-128-bit,prefer-256-bit,prefer-mask-registers,prefer-movmsk-over-vtest,prefer-no-gather,prefer-no-scatter,prefetchi,prefetchwt1,prfchw,ptwrite,raoint,rdpid,rdpru,rdrnd,rdseed,retpoline,retpoline-external-thunk,retpoline-indirect-branches,retpoline-indirect-calls,rtm,sahf,sbb-dep-breaking,serialize,seses,sgx,sha,sha512,shstk,slow-3ops-lea,slow-incdec,slow-lea,slow-pmaddwd,slow-pmulld,slow-shld,slow-two-mem-ops,slow-unaligned-mem-16,slow-unaligned-mem-32,sm3,sm4,soft-float,sse,sse-unaligned-mem,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tagged-globals,tbm,tsxldtrk,tuning-fast-imm-vector-shift,uintr,use-glm-div-sqrt-costs,use-slm-arith-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,widekl,x87,xop,xsave,xsavec,xsaveopt,xsaves"), + str_lit("16bit-mode,32bit-mode,3dnow,3dnowa,64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx10.1-256,avx10.1-512,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512er,avx512f,avx512fp16,avx512ifma,avx512pf,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vp2intersect,avx512vpopcntdq,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,branchfusion,ccmp,cf,cldemote,clflushopt,clwb,clzero,cmov,cmpccxadd,crc32,cx16,cx8,egpr,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-lzcnt-tzcnt,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-popcnt,false-deps-range,fast-11bytenop,fast-15bytenop,fast-7bytenop,fast-bextr,fast-gather,fast-hops,fast-lzcnt,fast-movbe,fast-scalar-fsqrt,fast-scalar-shift-masks,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fast-vector-shift-masks,faster-shift-than-shuffle,fma,fma4,fsgsbase,fsrm,fxsr,gfni,harden-sls-ijmp,harden-sls-ret,hreset,idivl-to-divb,idivq-to-divl,invpcid,kl,lea-sp,lea-uses-ag,lvi-cfi,lvi-load-hardening,lwp,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,mwaitx,ndd,no-bypass-delay,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pad-short-functions,pclmul,pconfig,pku,popcnt,ppx,prefer-128-bit,prefer-256-bit,prefer-mask-registers,prefer-movmsk-over-vtest,prefer-no-gather,prefer-no-scatter,prefetchi,prefetchwt1,prfchw,ptwrite,push2pop2,raoint,rdpid,rdpru,rdrnd,rdseed,retpoline,retpoline-external-thunk,retpoline-indirect-branches,retpoline-indirect-calls,rtm,sahf,sbb-dep-breaking,serialize,seses,sgx,sha,sha512,shstk,slow-3ops-lea,slow-incdec,slow-lea,slow-pmaddwd,slow-pmulld,slow-shld,slow-two-mem-ops,slow-unaligned-mem-16,slow-unaligned-mem-32,sm3,sm4,soft-float,sse,sse-unaligned-mem,sse2,sse3,sse4.1,sse4.2,sse4a,ssse3,tagged-globals,tbm,tsxldtrk,tuning-fast-imm-vector-shift,uintr,use-glm-div-sqrt-costs,use-slm-arith-costs,usermsr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,widekl,x87,xop,xsave,xsavec,xsaveopt,xsaves"), // TargetArch_arm32: - str_lit("32bit,8msecext,a12,a15,a17,a32,a35,a5,a53,a55,a57,a7,a72,a73,a75,a76,a77,a78c,a8,a9,aapcs-frame-chain,aapcs-frame-chain-leaf,aclass,acquire-release,aes,armv4,armv4t,armv5t,armv5te,armv5tej,armv6,armv6-m,armv6j,armv6k,armv6kz,armv6s-m,armv6t2,armv7-a,armv7-m,armv7-r,armv7e-m,armv7k,armv7s,armv7ve,armv8-a,armv8-m.base,armv8-m.main,armv8-r,armv8.1-a,armv8.1-m.main,armv8.2-a,armv8.3-a,armv8.4-a,armv8.5-a,armv8.6-a,armv8.7-a,armv8.8-a,armv8.9-a,armv9-a,armv9.1-a,armv9.2-a,armv9.3-a,armv9.4-a,atomics-32,avoid-movs-shop,avoid-partial-cpsr,bf16,big-endian-instructions,cde,cdecp0,cdecp1,cdecp2,cdecp3,cdecp4,cdecp5,cdecp6,cdecp7,cheap-predicable-cpsr,clrbhb,cortex-a710,cortex-a78,cortex-x1,cortex-x1c,crc,crypto,d32,db,dfb,disable-postra-scheduler,dont-widen-vmovs,dotprod,dsp,execute-only,expand-fp-mlx,exynos,fix-cmse-cve-2021-35465,fix-cortex-a57-aes-1742098,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp16fml,fp64,fpao,fpregs,fpregs16,fpregs64,fullfp16,fuse-aes,fuse-literals,harden-sls-blr,harden-sls-nocomdat,harden-sls-retbr,hwdiv,hwdiv-arm,i8mm,iwmmxt,iwmmxt2,krait,kryo,lob,long-calls,loop-align,m3,m7,mclass,mp,muxed-units,mve,mve.fp,mve1beat,mve2beat,mve4beat,nacl-trap,neon,neon-fpmovs,neonfp,neoverse-v1,no-branch-predictor,no-bti-at-return-twice,no-movt,no-neg-immediates,noarm,nonpipelined-vfp,pacbti,perfmon,prefer-ishst,prefer-vmovsr,prof-unpr,r4,r5,r52,r7,ras,rclass,read-tp-tpidrprw,read-tp-tpidruro,read-tp-tpidrurw,reserve-r9,ret-addr-stack,sb,sha2,slow-fp-brcc,slow-load-D-subreg,slow-odd-reg,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,soft-float,splat-vfp-neon,strict-align,swift,thumb-mode,thumb2,trustzone,use-mipipeliner,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.1m.main,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8.7a,v8.8a,v8.9a,v8m,v8m.main,v9.1a,v9.2a,v9.3a,v9.4a,v9a,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vldn-align,vmlx-forwarding,vmlx-hazards,wide-stride-vfp,xscale,zcz"), + str_lit("32bit,8msecext,a12,a15,a17,a32,a35,a5,a53,a55,a57,a7,a72,a73,a75,a76,a77,a78c,a8,a9,aapcs-frame-chain,aapcs-frame-chain-leaf,aclass,acquire-release,aes,armv4,armv4t,armv5t,armv5te,armv5tej,armv6,armv6-m,armv6j,armv6k,armv6kz,armv6s-m,armv6t2,armv7-a,armv7-m,armv7-r,armv7e-m,armv7k,armv7s,armv7ve,armv8-a,armv8-m.base,armv8-m.main,armv8-r,armv8.1-a,armv8.1-m.main,armv8.2-a,armv8.3-a,armv8.4-a,armv8.5-a,armv8.6-a,armv8.7-a,armv8.8-a,armv8.9-a,armv9-a,armv9.1-a,armv9.2-a,armv9.3-a,armv9.4-a,armv9.5-a,atomics-32,avoid-movs-shop,avoid-partial-cpsr,bf16,big-endian-instructions,cde,cdecp0,cdecp1,cdecp2,cdecp3,cdecp4,cdecp5,cdecp6,cdecp7,cheap-predicable-cpsr,clrbhb,cortex-a710,cortex-a78,cortex-x1,cortex-x1c,crc,crypto,d32,db,dfb,disable-postra-scheduler,dont-widen-vmovs,dotprod,dsp,execute-only,expand-fp-mlx,exynos,fix-cmse-cve-2021-35465,fix-cortex-a57-aes-1742098,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp16fml,fp64,fpao,fpregs,fpregs16,fpregs64,fullfp16,fuse-aes,fuse-literals,harden-sls-blr,harden-sls-nocomdat,harden-sls-retbr,hwdiv,hwdiv-arm,i8mm,iwmmxt,iwmmxt2,krait,kryo,lob,long-calls,loop-align,m3,m7,mclass,mp,muxed-units,mve,mve.fp,mve1beat,mve2beat,mve4beat,nacl-trap,neon,neon-fpmovs,neonfp,neoverse-v1,no-branch-predictor,no-bti-at-return-twice,no-movt,no-neg-immediates,noarm,nonpipelined-vfp,pacbti,perfmon,prefer-ishst,prefer-vmovsr,prof-unpr,r4,r5,r52,r7,ras,rclass,read-tp-tpidrprw,read-tp-tpidruro,read-tp-tpidrurw,reserve-r9,ret-addr-stack,sb,sha2,slow-fp-brcc,slow-load-D-subreg,slow-odd-reg,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,soft-float,splat-vfp-neon,strict-align,swift,thumb-mode,thumb2,trustzone,use-mipipeliner,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.1m.main,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8.7a,v8.8a,v8.9a,v8m,v8m.main,v9.1a,v9.2a,v9.3a,v9.4a,v9.5a,v9a,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vldn-align,vmlx-forwarding,vmlx-hazards,wide-stride-vfp,xscale,zcz"), // TargetArch_arm64: - str_lit("CONTEXTIDREL2,a35,a510,a53,a55,a57,a64fx,a65,a710,a715,a72,a73,a75,a76,a77,a78,a78c,aes,aggressive-fma,all,alternate-sextload-cvt-f32-pattern,altnzcv,am,ampere1,ampere1a,amvs,apple-a10,apple-a11,apple-a12,apple-a13,apple-a14,apple-a15,apple-a16,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,ascend-store-address,b16b16,balance-fp-ops,bf16,brbe,bti,call-saved-x10,call-saved-x11,call-saved-x12,call-saved-x13,call-saved-x14,call-saved-x15,call-saved-x18,call-saved-x8,call-saved-x9,carmel,ccdp,ccidx,ccpp,chk,clrbhb,cmp-bcc-fusion,complxnum,cortex-r82,cortex-x1,cortex-x2,cortex-x3,crc,crypto,cssc,custom-cheap-as-move,d128,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,enable-select-opt,ete,exynos-cheap-as-move,exynosm3,exynosm4,f32mm,f64mm,falkor,fgt,fix-cortex-a53-835769,flagm,fmv,force-32bit-jump-tables,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-addsub-2reg-const1,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,gcs,harden-sls-blr,harden-sls-nocomdat,harden-sls-retbr,hbc,hcx,i8mm,ite,jsconv,kryo,lor,ls64,lse,lse128,lse2,lsl-fast,mec,mops,mpam,mte,neon,neoverse512tvb,neoversee1,neoversen1,neoversen2,neoversev1,neoversev2,nmi,no-bti-at-return-twice,no-neg-immediates,no-sve-fp-ld1r,no-zcz-fp,nv,outline-atomics,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,prfm-slc-target,rand,ras,rasv2,rcpc,rcpc-immo,rcpc3,rdm,reserve-x1,reserve-x10,reserve-x11,reserve-x12,reserve-x13,reserve-x14,reserve-x15,reserve-x18,reserve-x2,reserve-x20,reserve-x21,reserve-x22,reserve-x23,reserve-x24,reserve-x25,reserve-x26,reserve-x27,reserve-x28,reserve-x3,reserve-x30,reserve-x4,reserve-x5,reserve-x6,reserve-x7,reserve-x9,rme,saphira,sb,sel2,sha2,sha3,slow-misaligned-128store,slow-paired-128,slow-strqro-store,sm4,sme,sme-f16f16,sme-f64f64,sme-i16i64,sme2,sme2p1,spe,spe-eef,specres2,specrestrict,ssbs,strict-align,sve,sve2,sve2-aes,sve2-bitperm,sve2-sha3,sve2-sm4,sve2p1,tagged-globals,the,thunderx,thunderx2t99,thunderx3t110,thunderxt81,thunderxt83,thunderxt88,tlb-rmi,tme,tpidr-el1,tpidr-el2,tpidr-el3,tpidrro-el0,tracev8.4,trbe,tsv110,uaops,use-experimental-zeroing-pseudos,use-postra-scheduler,use-reciprocal-square-root,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8.7a,v8.8a,v8.9a,v8a,v8r,v9.1a,v9.2a,v9.3a,v9.4a,v9a,vh,wfxt,xs,zcm,zcz,zcz-fp-workaround,zcz-gp"), + str_lit("CONTEXTIDREL2,a35,a510,a520,a53,a55,a57,a64fx,a65,a710,a715,a72,a720,a73,a75,a76,a77,a78,a78c,addr-lsl-fast,aes,aggressive-fma,all,alternate-sextload-cvt-f32-pattern,altnzcv,alu-lsl-fast,am,ampere1,ampere1a,ampere1b,amvs,apple-a10,apple-a11,apple-a12,apple-a13,apple-a14,apple-a15,apple-a16,apple-a17,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,ascend-store-address,b16b16,balance-fp-ops,bf16,brbe,bti,call-saved-x10,call-saved-x11,call-saved-x12,call-saved-x13,call-saved-x14,call-saved-x15,call-saved-x18,call-saved-x8,call-saved-x9,carmel,ccdp,ccidx,ccpp,chk,clrbhb,cmp-bcc-fusion,complxnum,cortex-r82,cortex-x1,cortex-x2,cortex-x3,cortex-x4,cpa,crc,crypto,cssc,d128,disable-latency-sched-heuristic,disable-ldp,disable-stp,dit,dotprod,ecv,el2vmsa,el3,enable-select-opt,ete,exynos-cheap-as-move,exynosm3,exynosm4,f32mm,f64mm,falkor,faminmax,fgt,fix-cortex-a53-835769,flagm,fmv,force-32bit-jump-tables,fp-armv8,fp16fml,fp8,fp8dot2,fp8dot4,fp8fma,fpmr,fptoint,fullfp16,fuse-address,fuse-addsub-2reg-const1,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,gcs,harden-sls-blr,harden-sls-nocomdat,harden-sls-retbr,hbc,hcx,i8mm,ite,jsconv,kryo,ldp-aligned-only,lor,ls64,lse,lse128,lse2,lut,mec,mops,mpam,mte,neon,neoverse512tvb,neoversee1,neoversen1,neoversen2,neoversev1,neoversev2,nmi,no-bti-at-return-twice,no-neg-immediates,no-sve-fp-ld1r,no-zcz-fp,nv,outline-atomics,pan,pan-rwv,pauth,pauth-lr,perfmon,predictable-select-expensive,predres,prfm-slc-target,rand,ras,rasv2,rcpc,rcpc-immo,rcpc3,rdm,reserve-x1,reserve-x10,reserve-x11,reserve-x12,reserve-x13,reserve-x14,reserve-x15,reserve-x18,reserve-x2,reserve-x20,reserve-x21,reserve-x22,reserve-x23,reserve-x24,reserve-x25,reserve-x26,reserve-x27,reserve-x28,reserve-x3,reserve-x30,reserve-x4,reserve-x5,reserve-x6,reserve-x7,reserve-x9,rme,saphira,sb,sel2,sha2,sha3,slow-misaligned-128store,slow-paired-128,slow-strqro-store,sm4,sme,sme-f16f16,sme-f64f64,sme-f8f16,sme-f8f32,sme-fa64,sme-i16i64,sme-lutv2,sme2,sme2p1,spe,spe-eef,specres2,specrestrict,ssbs,ssve-fp8dot2,ssve-fp8dot4,ssve-fp8fma,store-pair-suppress,stp-aligned-only,strict-align,sve,sve2,sve2-aes,sve2-bitperm,sve2-sha3,sve2-sm4,sve2p1,tagged-globals,the,thunderx,thunderx2t99,thunderx3t110,thunderxt81,thunderxt83,thunderxt88,tlb-rmi,tlbiw,tme,tpidr-el1,tpidr-el2,tpidr-el3,tpidrro-el0,tracev8.4,trbe,tsv110,uaops,use-experimental-zeroing-pseudos,use-postra-scheduler,use-reciprocal-square-root,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8.7a,v8.8a,v8.9a,v8a,v8r,v9.1a,v9.2a,v9.3a,v9.4a,v9.5a,v9a,vh,wfxt,xs,zcm,zcz,zcz-fp-workaround,zcz-gp"), // TargetArch_wasm32: - str_lit("atomics,bulk-memory,exception-handling,extended-const,multivalue,mutable-globals,nontrapping-fptoint,reference-types,relaxed-simd,sign-ext,simd128,tail-call"), + str_lit("atomics,bulk-memory,exception-handling,extended-const,multimemory,multivalue,mutable-globals,nontrapping-fptoint,reference-types,relaxed-simd,sign-ext,simd128,tail-call"), // TargetArch_wasm64p32: - str_lit("atomics,bulk-memory,exception-handling,extended-const,multivalue,mutable-globals,nontrapping-fptoint,reference-types,relaxed-simd,sign-ext,simd128,tail-call"), + str_lit("atomics,bulk-memory,exception-handling,extended-const,multimemory,multivalue,mutable-globals,nontrapping-fptoint,reference-types,relaxed-simd,sign-ext,simd128,tail-call"), + // TargetArch_riscv64: + str_lit("32bit,64bit,a,auipc-addi-fusion,c,conditional-cmv-fusion,d,dlen-factor-2,e,experimental,experimental-zacas,experimental-zcmop,experimental-zfbfmin,experimental-zicfilp,experimental-zicfiss,experimental-zimop,experimental-ztso,experimental-zvfbfmin,experimental-zvfbfwma,f,fast-unaligned-access,forced-atomics,h,i,ld-add-fusion,lui-addi-fusion,m,no-default-unroll,no-optimized-zero-stride-load,no-rvc-hints,relax,reserve-x1,reserve-x10,reserve-x11,reserve-x12,reserve-x13,reserve-x14,reserve-x15,reserve-x16,reserve-x17,reserve-x18,reserve-x19,reserve-x2,reserve-x20,reserve-x21,reserve-x22,reserve-x23,reserve-x24,reserve-x25,reserve-x26,reserve-x27,reserve-x28,reserve-x29,reserve-x3,reserve-x30,reserve-x31,reserve-x4,reserve-x5,reserve-x6,reserve-x7,reserve-x8,reserve-x9,save-restore,seq-cst-trailing-fence,shifted-zextw-fusion,short-forward-branch-opt,sifive7,smaia,smepmp,ssaia,svinval,svnapot,svpbmt,tagged-globals,unaligned-scalar-mem,use-postra-scheduler,v,ventana-veyron,xcvalu,xcvbi,xcvbitmanip,xcvelw,xcvmac,xcvmem,xcvsimd,xsfvcp,xsfvfnrclipxfqf,xsfvfwmaccqqq,xsfvqmaccdod,xsfvqmaccqoq,xtheadba,xtheadbb,xtheadbs,xtheadcmo,xtheadcondmov,xtheadfmemidx,xtheadmac,xtheadmemidx,xtheadmempair,xtheadsync,xtheadvdot,xventanacondops,za128rs,za64rs,zawrs,zba,zbb,zbc,zbkb,zbkc,zbkx,zbs,zca,zcb,zcd,zce,zcf,zcmp,zcmt,zdinx,zexth-fusion,zextw-fusion,zfa,zfh,zfhmin,zfinx,zhinx,zhinxmin,zic64b,zicbom,zicbop,zicboz,ziccamoa,ziccif,zicclsm,ziccrse,zicntr,zicond,zicsr,zifencei,zihintntl,zihintpause,zihpm,zk,zkn,zknd,zkne,zknh,zkr,zks,zksed,zksh,zkt,zmmul,zvbb,zvbc,zve32f,zve32x,zve64d,zve64f,zve64x,zvfh,zvfhmin,zvkb,zvkg,zvkn,zvknc,zvkned,zvkng,zvknha,zvknhb,zvks,zvksc,zvksed,zvksg,zvksh,zvkt,zvl1024b,zvl128b,zvl16384b,zvl2048b,zvl256b,zvl32768b,zvl32b,zvl4096b,zvl512b,zvl64b,zvl65536b,zvl8192b"), }; // Generated with the featuregen script in `misc/featuregen` @@ -39,17 +43,19 @@ gb_global int target_microarch_counts[TargetArch_COUNT] = { // TargetArch_Invalid: 0, // TargetArch_amd64: - 120, + 127, // TargetArch_i386: - 120, + 127, // TargetArch_arm32: - 90, + 91, // TargetArch_arm64: - 63, + 69, // TargetArch_wasm32: 3, // TargetArch_wasm64p32: 3, + // TargetArch_riscv64: + 26, }; // Generated with the featuregen script in `misc/featuregen` @@ -57,6 +63,9 @@ gb_global MicroarchFeatureList microarch_features_list[] = { // TargetArch_amd64: { str_lit("alderlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("amdfam10"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,vzeroupper,x87") }, + { str_lit("arrowlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("arrowlake-s"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,sha512,shstk,slow-3ops-lea,sm3,sm4,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("arrowlake_s"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,sha512,shstk,slow-3ops-lea,sm3,sm4,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("athlon"), str_lit("3dnow,3dnowa,64bit-mode,cmov,cx8,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, { str_lit("athlon-4"), str_lit("3dnow,3dnowa,64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, { str_lit("athlon-fx"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, @@ -81,6 +90,7 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("c3-2"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, { str_lit("cannonlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vl,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,sha,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("cascadelake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,avx512vnni,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("clearwaterforest"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,sha512,shstk,slow-3ops-lea,sm3,sm4,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,uintr,usermsr,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("cooperlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avx512bf16,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,avx512vnni,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("core-avx-i"), str_lit("64bit,64bit-mode,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, { str_lit("core-avx2"), str_lit("64bit,64bit-mode,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, @@ -103,6 +113,7 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("goldmont"), str_lit("64bit,64bit-mode,aes,clflushopt,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("goldmont-plus"), str_lit("64bit,64bit-mode,aes,clflushopt,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("goldmont_plus"), str_lit("64bit,64bit-mode,aes,clflushopt,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("gracemont"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivl-to-divb,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,nopl,pclmul,pconfig,pku,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("grandridge"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,fast-movbe,fma,fsgsbase,fxsr,gfni,hreset,invpcid,kl,lzcnt,mmx,movbe,movdir64b,movdiri,no-bypass-delay,nopl,pclmul,pconfig,pku,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,uintr,use-glm-div-sqrt-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("graniterapids"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("graniterapids-d"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, @@ -125,12 +136,14 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("knl"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, { str_lit("knm"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,avx512vpopcntdq,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, { str_lit("lakemont"), str_lit("64bit-mode,cx8,slow-unaligned-mem-16,sse,sse2,vzeroupper") }, + { str_lit("lunarlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,sha512,shstk,slow-3ops-lea,sm3,sm4,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("meteorlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("mic_avx512"), str_lit("64bit,64bit-mode,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, { str_lit("nehalem"), str_lit("64bit,64bit-mode,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, { str_lit("nocona"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, { str_lit("opteron"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, { str_lit("opteron-sse3"), str_lit("3dnow,3dnowa,64bit,64bit-mode,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("pantherlake"), str_lit("64bit,64bit-mode,adx,aes,allow-light-256-bit,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,sha512,shstk,slow-3ops-lea,sm3,sm4,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("penryn"), str_lit("64bit,64bit-mode,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,sse4.1,ssse3,vzeroupper,x87") }, { str_lit("pentium"), str_lit("64bit-mode,cx8,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, { str_lit("pentium-m"), str_lit("64bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, @@ -178,6 +191,9 @@ gb_global MicroarchFeatureList microarch_features_list[] = { // TargetArch_i386: { str_lit("alderlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("amdfam10"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,lzcnt,mmx,nopl,popcnt,prfchw,sahf,sbb-dep-breaking,slow-shld,sse,sse2,sse3,sse4a,vzeroupper,x87") }, + { str_lit("arrowlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("arrowlake-s"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,sha512,shstk,slow-3ops-lea,sm3,sm4,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("arrowlake_s"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,sha512,shstk,slow-3ops-lea,sm3,sm4,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("athlon"), str_lit("32bit-mode,3dnow,3dnowa,cmov,cx8,mmx,nopl,slow-shld,slow-unaligned-mem-16,vzeroupper,x87") }, { str_lit("athlon-4"), str_lit("32bit-mode,3dnow,3dnowa,cmov,cx8,fxsr,mmx,nopl,slow-shld,slow-unaligned-mem-16,sse,vzeroupper,x87") }, { str_lit("athlon-fx"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, @@ -202,6 +218,7 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("c3-2"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,slow-unaligned-mem-16,sse,vzeroupper,x87") }, { str_lit("cannonlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512ifma,avx512vbmi,avx512vl,bmi,bmi2,clflushopt,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,sha,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("cascadelake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,avx512vnni,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("clearwaterforest"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,sha512,shstk,slow-3ops-lea,sm3,sm4,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,uintr,usermsr,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("cooperlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avx512bf16,avx512bw,avx512cd,avx512dq,avx512f,avx512vl,avx512vnni,bmi,bmi2,clflushopt,clwb,cmov,crc32,cx16,cx8,ermsb,evex512,f16c,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,faster-shift-than-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pku,popcnt,prefer-256-bit,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("core-avx-i"), str_lit("32bit-mode,64bit,avx,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fsgsbase,fxsr,idivq-to-divl,macrofusion,mmx,no-bypass-delay-mov,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,slow-unaligned-mem-32,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, { str_lit("core-avx2"), str_lit("32bit-mode,64bit,allow-light-256-bit,avx,avx2,bmi,bmi2,cmov,crc32,cx16,cx8,ermsb,f16c,false-deps-lzcnt-tzcnt,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fma,fsgsbase,fxsr,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,popcnt,rdrnd,sahf,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87,xsave,xsaveopt") }, @@ -224,6 +241,7 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("goldmont"), str_lit("32bit-mode,64bit,aes,clflushopt,cmov,crc32,cx16,cx8,false-deps-popcnt,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("goldmont-plus"), str_lit("32bit-mode,64bit,aes,clflushopt,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("goldmont_plus"), str_lit("32bit-mode,64bit,aes,clflushopt,cmov,crc32,cx16,cx8,fast-movbe,fsgsbase,fxsr,mmx,movbe,no-bypass-delay,nopl,pclmul,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,sha,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,use-glm-div-sqrt-costs,vzeroupper,x87,xsave,xsavec,xsaveopt,xsaves") }, + { str_lit("gracemont"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-popcnt,fast-15bytenop,fast-scalar-fsqrt,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivl-to-divb,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,nopl,pclmul,pconfig,pku,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("grandridge"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,fast-movbe,fma,fsgsbase,fxsr,gfni,hreset,invpcid,kl,lzcnt,mmx,movbe,movdir64b,movdiri,no-bypass-delay,nopl,pclmul,pconfig,pku,popcnt,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-incdec,slow-lea,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,uintr,use-glm-div-sqrt-costs,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("graniterapids"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,amx-bf16,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("graniterapids-d"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,amx-bf16,amx-complex,amx-fp16,amx-int8,amx-tile,avx,avx2,avx512bf16,avx512bitalg,avx512bw,avx512cd,avx512dq,avx512f,avx512fp16,avx512ifma,avx512vbmi,avx512vbmi2,avx512vl,avx512vnni,avx512vpopcntdq,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,enqcmd,ermsb,evex512,f16c,false-deps-getmant,false-deps-mulc,false-deps-mullq,false-deps-perm,false-deps-range,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fsrm,fxsr,gfni,idivq-to-divl,invpcid,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-256-bit,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tsxldtrk,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,wbnoinvd,x87,xsave,xsavec,xsaveopt,xsaves") }, @@ -246,12 +264,14 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("knl"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, { str_lit("knm"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,avx512vpopcntdq,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, { str_lit("lakemont"), str_lit("32bit-mode,cx8,slow-unaligned-mem-16,vzeroupper") }, + { str_lit("lunarlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,sha512,shstk,slow-3ops-lea,sm3,sm4,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("meteorlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxvnni,bmi,bmi2,cldemote,clflushopt,clwb,cmov,crc32,cx16,cx8,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,shstk,slow-3ops-lea,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("mic_avx512"), str_lit("32bit-mode,64bit,adx,aes,avx,avx2,avx512cd,avx512er,avx512f,avx512pf,bmi,bmi2,cmov,crc32,cx16,cx8,evex512,f16c,fast-gather,fast-movbe,fma,fsgsbase,fxsr,idivq-to-divl,lzcnt,mmx,movbe,nopl,pclmul,popcnt,prefer-mask-registers,prefetchwt1,prfchw,rdrnd,rdseed,sahf,slow-3ops-lea,slow-incdec,slow-pmaddwd,slow-two-mem-ops,sse,sse2,sse3,sse4.1,sse4.2,ssse3,x87,xsave,xsaveopt") }, { str_lit("nehalem"), str_lit("32bit-mode,64bit,cmov,crc32,cx16,cx8,fxsr,macrofusion,mmx,no-bypass-delay-mov,nopl,popcnt,sahf,sse,sse2,sse3,sse4.1,sse4.2,ssse3,vzeroupper,x87") }, { str_lit("nocona"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, { str_lit("opteron"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, { str_lit("opteron-sse3"), str_lit("32bit-mode,3dnow,3dnowa,64bit,cmov,cx16,cx8,fast-scalar-shift-masks,fxsr,mmx,nopl,sbb-dep-breaking,slow-shld,slow-unaligned-mem-16,sse,sse2,sse3,vzeroupper,x87") }, + { str_lit("pantherlake"), str_lit("32bit-mode,64bit,adx,aes,allow-light-256-bit,avx,avx2,avxifma,avxneconvert,avxvnni,avxvnniint16,avxvnniint8,bmi,bmi2,cldemote,clflushopt,clwb,cmov,cmpccxadd,crc32,cx16,cx8,enqcmd,f16c,false-deps-perm,false-deps-popcnt,fast-15bytenop,fast-gather,fast-scalar-fsqrt,fast-shld-rotate,fast-variable-crosslane-shuffle,fast-variable-perlane-shuffle,fast-vector-fsqrt,fma,fsgsbase,fxsr,gfni,hreset,idivq-to-divl,invpcid,kl,lzcnt,macrofusion,mmx,movbe,movdir64b,movdiri,no-bypass-delay-blend,no-bypass-delay-mov,no-bypass-delay-shuffle,nopl,pclmul,pconfig,pku,popcnt,prefer-movmsk-over-vtest,prefetchi,prfchw,ptwrite,rdpid,rdrnd,rdseed,sahf,serialize,sha,sha512,shstk,slow-3ops-lea,sm3,sm4,sse,sse2,sse3,sse4.1,sse4.2,ssse3,tuning-fast-imm-vector-shift,uintr,vaes,vpclmulqdq,vzeroupper,waitpkg,widekl,x87,xsave,xsavec,xsaveopt,xsaves") }, { str_lit("penryn"), str_lit("32bit-mode,64bit,cmov,cx16,cx8,fxsr,macrofusion,mmx,nopl,sahf,slow-unaligned-mem-16,sse,sse2,sse3,sse4.1,ssse3,vzeroupper,x87") }, { str_lit("pentium"), str_lit("32bit-mode,cx8,slow-unaligned-mem-16,vzeroupper,x87") }, { str_lit("pentium-m"), str_lit("32bit-mode,cmov,cx8,fxsr,mmx,nopl,slow-unaligned-mem-16,sse,sse2,vzeroupper,x87") }, @@ -325,16 +345,16 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("arm968e-s"), str_lit("armv5te,v4t,v5t,v5te") }, { str_lit("arm9e"), str_lit("armv5te,v4t,v5t,v5te") }, { str_lit("arm9tdmi"), str_lit("armv4t,v4t") }, - { str_lit("cortex-a12"), str_lit("a12,aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ret-addr-stack,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vmlx-forwarding") }, - { str_lit("cortex-a15"), str_lit("a15,aclass,armv7-a,avoid-partial-cpsr,d32,db,dont-widen-vmovs,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,muxed-units,neon,perfmon,ret-addr-stack,splat-vfp-neon,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vldn-align") }, - { str_lit("cortex-a17"), str_lit("a17,aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ret-addr-stack,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vmlx-forwarding") }, + { str_lit("cortex-a12"), str_lit("a12,aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,perfmon,ret-addr-stack,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vmlx-forwarding") }, + { str_lit("cortex-a15"), str_lit("a15,aclass,armv7-a,avoid-partial-cpsr,d32,db,dont-widen-vmovs,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,muxed-units,perfmon,ret-addr-stack,splat-vfp-neon,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vldn-align") }, + { str_lit("cortex-a17"), str_lit("a17,aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,perfmon,ret-addr-stack,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vmlx-forwarding") }, { str_lit("cortex-a32"), str_lit("aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, { str_lit("cortex-a35"), str_lit("a35,aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, - { str_lit("cortex-a5"), str_lit("a5,aclass,armv7-a,d32,db,dsp,fp16,fp64,fpregs,fpregs64,mp,neon,perfmon,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,vmlx-forwarding") }, + { str_lit("cortex-a5"), str_lit("a5,aclass,armv7-a,d32,db,dsp,fp16,fp64,fpregs,fpregs64,mp,perfmon,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,vmlx-forwarding") }, { str_lit("cortex-a53"), str_lit("a53,aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpao,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, { str_lit("cortex-a55"), str_lit("a55,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, { str_lit("cortex-a57"), str_lit("a57,aclass,acquire-release,aes,armv8-a,avoid-partial-cpsr,cheap-predicable-cpsr,crc,crypto,d32,db,dsp,fix-cortex-a57-aes-1742098,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpao,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, - { str_lit("cortex-a7"), str_lit("a7,aclass,armv7-a,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vmlx-forwarding,vmlx-hazards") }, + { str_lit("cortex-a7"), str_lit("a7,aclass,armv7-a,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,perfmon,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,vmlx-forwarding,vmlx-hazards") }, { str_lit("cortex-a710"), str_lit("aclass,acquire-release,armv9-a,bf16,cortex-a710,crc,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp16fml,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,i8mm,mp,neon,perfmon,ras,sb,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8m,v9a,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, { str_lit("cortex-a72"), str_lit("a72,aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fix-cortex-a57-aes-1742098,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, { str_lit("cortex-a73"), str_lit("a73,aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, @@ -344,8 +364,8 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("cortex-a77"), str_lit("a77,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, { str_lit("cortex-a78"), str_lit("aclass,acquire-release,aes,armv8.2-a,cortex-a78,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, { str_lit("cortex-a78c"), str_lit("a78c,aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dotprod,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,hwdiv-arm,mp,neon,perfmon,ras,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, - { str_lit("cortex-a8"), str_lit("a8,aclass,armv7-a,d32,db,dsp,fp64,fpregs,fpregs64,neon,nonpipelined-vfp,perfmon,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vmlx-forwarding,vmlx-hazards") }, - { str_lit("cortex-a9"), str_lit("a9,aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,expand-fp-mlx,fp16,fp64,fpregs,fpregs64,mp,muxed-units,neon,neon-fpmovs,perfmon,prefer-vmovsr,ret-addr-stack,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vldn-align,vmlx-forwarding,vmlx-hazards") }, + { str_lit("cortex-a8"), str_lit("a8,aclass,armv7-a,d32,db,dsp,fp64,fpregs,fpregs64,nonpipelined-vfp,perfmon,ret-addr-stack,slow-fp-brcc,slowfpvfmx,slowfpvmlx,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vmlx-forwarding,vmlx-hazards") }, + { str_lit("cortex-a9"), str_lit("a9,aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,expand-fp-mlx,fp16,fp64,fpregs,fpregs64,mp,muxed-units,neon-fpmovs,perfmon,prefer-vmovsr,ret-addr-stack,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vldn-align,vmlx-forwarding,vmlx-hazards") }, { str_lit("cortex-m0"), str_lit("armv6-m,db,mclass,no-branch-predictor,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m") }, { str_lit("cortex-m0plus"), str_lit("armv6-m,db,mclass,no-branch-predictor,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m") }, { str_lit("cortex-m1"), str_lit("armv6-m,db,mclass,no-branch-predictor,noarm,strict-align,thumb-mode,v4t,v5t,v5te,v6,v6m") }, @@ -354,6 +374,7 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("cortex-m33"), str_lit("8msecext,acquire-release,armv8-m.main,db,dsp,fix-cmse-cve-2021-35465,fp-armv8d16sp,fp16,fpregs,hwdiv,loop-align,mclass,no-branch-predictor,noarm,slowfpvfmx,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,v8m.main,vfp2sp,vfp3d16sp,vfp4d16sp") }, { str_lit("cortex-m35p"), str_lit("8msecext,acquire-release,armv8-m.main,db,dsp,fix-cmse-cve-2021-35465,fp-armv8d16sp,fp16,fpregs,hwdiv,loop-align,mclass,no-branch-predictor,noarm,slowfpvfmx,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,v8m.main,vfp2sp,vfp3d16sp,vfp4d16sp") }, { str_lit("cortex-m4"), str_lit("armv7e-m,db,dsp,fp16,fpregs,hwdiv,loop-align,mclass,no-branch-predictor,noarm,slowfpvfmx,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2sp,vfp3d16sp,vfp4d16sp") }, + { str_lit("cortex-m52"), str_lit("8msecext,acquire-release,armv8.1-m.main,db,dsp,fp-armv8d16,fp-armv8d16sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,lob,loop-align,mclass,mve,mve.fp,mve1beat,no-branch-predictor,noarm,pacbti,ras,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8.1m.main,v8m,v8m.main,vfp2,vfp2sp,vfp3d16,vfp3d16sp,vfp4d16,vfp4d16sp") }, { str_lit("cortex-m55"), str_lit("8msecext,acquire-release,armv8.1-m.main,db,dsp,fix-cmse-cve-2021-35465,fp-armv8d16,fp-armv8d16sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,lob,loop-align,mclass,mve,mve.fp,no-branch-predictor,noarm,ras,slowfpvmlx,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8.1m.main,v8m,v8m.main,vfp2,vfp2sp,vfp3d16,vfp3d16sp,vfp4d16,vfp4d16sp") }, { str_lit("cortex-m7"), str_lit("armv7e-m,db,dsp,fp-armv8d16,fp-armv8d16sp,fp16,fp64,fpregs,fpregs64,hwdiv,m7,mclass,noarm,thumb-mode,thumb2,use-mipipeliner,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3d16,vfp3d16sp,vfp4d16,vfp4d16sp") }, { str_lit("cortex-m85"), str_lit("8msecext,acquire-release,armv8.1-m.main,db,dsp,fp-armv8d16,fp-armv8d16sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,hwdiv,lob,mclass,mve,mve.fp,noarm,pacbti,ras,thumb-mode,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8.1m.main,v8m,v8m.main,vfp2,vfp2sp,vfp3d16,vfp3d16sp,vfp4d16,vfp4d16sp") }, @@ -372,7 +393,7 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("exynos-m5"), str_lit("aclass,acquire-release,aes,armv8.2-a,crc,crypto,d32,db,dont-widen-vmovs,dotprod,dsp,expand-fp-mlx,exynos,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs16,fpregs64,fullfp16,fuse-aes,fuse-literals,hwdiv,hwdiv-arm,mp,neon,perfmon,prof-unpr,ras,ret-addr-stack,sha2,slow-fp-brcc,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,splat-vfp-neon,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8.1a,v8.2a,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization,wide-stride-vfp,zcz") }, { str_lit("generic"), str_lit("") }, { str_lit("iwmmxt"), str_lit("armv5te,v4t,v5t,v5te") }, - { str_lit("krait"), str_lit("aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,krait,muxed-units,neon,perfmon,ret-addr-stack,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,vldn-align,vmlx-forwarding") }, + { str_lit("krait"), str_lit("aclass,armv7-a,avoid-partial-cpsr,d32,db,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,krait,muxed-units,perfmon,ret-addr-stack,thumb2,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,vldn-align,vmlx-forwarding") }, { str_lit("kryo"), str_lit("aclass,acquire-release,aes,armv8-a,crc,crypto,d32,db,dsp,fp-armv8,fp-armv8d16,fp-armv8d16sp,fp-armv8sp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,kryo,mp,neon,perfmon,sha2,thumb2,trustzone,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,virtualization") }, { str_lit("mpcore"), str_lit("armv6k,fp64,fpregs,fpregs64,slowfpvmlx,v4t,v5t,v5te,v6,v6k,vfp2,vfp2sp") }, { str_lit("mpcorenovfp"), str_lit("armv6k,v4t,v5t,v5te,v6,v6k") }, @@ -385,12 +406,13 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("strongarm110"), str_lit("armv4") }, { str_lit("strongarm1100"), str_lit("armv4") }, { str_lit("strongarm1110"), str_lit("armv4") }, - { str_lit("swift"), str_lit("aclass,armv7-a,avoid-movs-shop,avoid-partial-cpsr,d32,db,disable-postra-scheduler,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neon,neonfp,perfmon,prefer-ishst,prof-unpr,ret-addr-stack,slow-load-D-subreg,slow-odd-reg,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,swift,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,vmlx-hazards,wide-stride-vfp") }, + { str_lit("swift"), str_lit("aclass,armv7-a,avoid-movs-shop,avoid-partial-cpsr,d32,db,disable-postra-scheduler,dsp,fp16,fp64,fpregs,fpregs64,hwdiv,hwdiv-arm,mp,neonfp,perfmon,prefer-ishst,prof-unpr,ret-addr-stack,slow-load-D-subreg,slow-odd-reg,slow-vdup32,slow-vgetlni32,slowfpvfmx,slowfpvmlx,swift,thumb2,use-misched,v4t,v5t,v5te,v6,v6k,v6m,v6t2,v7,v7clrex,v8m,vfp2,vfp2sp,vfp3,vfp3d16,vfp3d16sp,vfp3sp,vfp4,vfp4d16,vfp4d16sp,vfp4sp,vmlx-hazards,wide-stride-vfp") }, { str_lit("xscale"), str_lit("armv5te,v4t,v5t,v5te") }, // TargetArch_arm64: { str_lit("a64fx"), str_lit("CONTEXTIDREL2,a64fx,aggressive-fma,arith-bcc-fusion,ccpp,complxnum,crc,el2vmsa,el3,fp-armv8,fullfp16,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rdm,sha2,store-pair-suppress,sve,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, { str_lit("ampere1"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,aggressive-fma,altnzcv,alu-lsl-fast,am,ampere1,amvs,arith-bcc-fusion,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fptoint,fuse-address,fuse-aes,fuse-literals,i8mm,jsconv,ldp-aligned-only,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,rand,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,stp-aligned-only,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh") }, { str_lit("ampere1a"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,aggressive-fma,altnzcv,alu-lsl-fast,am,ampere1a,amvs,arith-bcc-fusion,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fptoint,fuse-address,fuse-aes,fuse-literals,i8mm,jsconv,ldp-aligned-only,lor,lse,lse2,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predres,rand,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,sm4,specrestrict,ssbs,store-pair-suppress,stp-aligned-only,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh") }, + { str_lit("ampere1b"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,aggressive-fma,altnzcv,alu-lsl-fast,am,ampere1b,amvs,arith-bcc-fusion,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,cssc,dit,dotprod,ecv,el2vmsa,el3,enable-select-opt,fgt,flagm,fp-armv8,fptoint,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-literals,hcx,i8mm,jsconv,ldp-aligned-only,lor,lse,lse2,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,rand,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,sm4,specrestrict,ssbs,store-pair-suppress,stp-aligned-only,tlb-rmi,tracev8.4,uaops,use-postra-scheduler,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8.7a,v8a,vh,wfxt,xs") }, { str_lit("apple-a10"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a10,arith-bcc-fusion,arith-cbz-fusion,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,lor,neon,pan,perfmon,rdm,sha2,store-pair-suppress,v8a,vh,zcm,zcz,zcz-gp") }, { str_lit("apple-a11"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a11,arith-bcc-fusion,arith-cbz-fusion,ccpp,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fullfp16,fuse-aes,fuse-crypto-eor,lor,lse,neon,pan,pan-rwv,perfmon,ras,rdm,sha2,store-pair-suppress,uaops,v8.1a,v8.2a,v8a,vh,zcm,zcz,zcz-gp") }, { str_lit("apple-a12"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a12,arith-bcc-fusion,arith-cbz-fusion,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fullfp16,fuse-aes,fuse-crypto-eor,jsconv,lor,lse,neon,pan,pan-rwv,pauth,perfmon,ras,rcpc,rdm,sha2,store-pair-suppress,uaops,v8.1a,v8.2a,v8.3a,v8a,vh,zcm,zcz,zcz-gp") }, @@ -398,18 +420,21 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("apple-a14"), str_lit("CONTEXTIDREL2,aes,aggressive-fma,alternate-sextload-cvt-f32-pattern,altnzcv,am,apple-a14,arith-bcc-fusion,arith-cbz-fusion,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,el2vmsa,el3,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh,zcm,zcz,zcz-gp") }, { str_lit("apple-a15"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a15,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, { str_lit("apple-a16"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a16,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,hcx,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-a17"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a17,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,hcx,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, { str_lit("apple-a7"), str_lit("aes,alternate-sextload-cvt-f32-pattern,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,neon,perfmon,sha2,store-pair-suppress,v8a,zcm,zcz,zcz-fp-workaround,zcz-gp") }, { str_lit("apple-a8"), str_lit("aes,alternate-sextload-cvt-f32-pattern,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,neon,perfmon,sha2,store-pair-suppress,v8a,zcm,zcz,zcz-fp-workaround,zcz-gp") }, { str_lit("apple-a9"), str_lit("aes,alternate-sextload-cvt-f32-pattern,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,neon,perfmon,sha2,store-pair-suppress,v8a,zcm,zcz,zcz-fp-workaround,zcz-gp") }, { str_lit("apple-latest"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a16,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,hcx,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, { str_lit("apple-m1"), str_lit("CONTEXTIDREL2,aes,aggressive-fma,alternate-sextload-cvt-f32-pattern,altnzcv,am,apple-a14,arith-bcc-fusion,arith-cbz-fusion,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,el2vmsa,el3,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8a,vh,zcm,zcz,zcz-gp") }, { str_lit("apple-m2"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a15,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, + { str_lit("apple-m3"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,altnzcv,am,amvs,apple-a16,arith-bcc-fusion,arith-cbz-fusion,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,dit,dotprod,ecv,el2vmsa,el3,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-address,fuse-aes,fuse-arith-logic,fuse-crypto-eor,fuse-csel,fuse-literals,hcx,i8mm,jsconv,lor,lse,lse2,mpam,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,sb,sel2,sha2,sha3,specrestrict,ssbs,store-pair-suppress,tlb-rmi,tracev8.4,uaops,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8a,vh,zcm,zcz,zcz-gp") }, { str_lit("apple-s4"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a12,arith-bcc-fusion,arith-cbz-fusion,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fullfp16,fuse-aes,fuse-crypto-eor,jsconv,lor,lse,neon,pan,pan-rwv,pauth,perfmon,ras,rcpc,rdm,sha2,store-pair-suppress,uaops,v8.1a,v8.2a,v8.3a,v8a,vh,zcm,zcz,zcz-gp") }, { str_lit("apple-s5"), str_lit("CONTEXTIDREL2,aes,alternate-sextload-cvt-f32-pattern,apple-a12,arith-bcc-fusion,arith-cbz-fusion,ccidx,ccpp,complxnum,crc,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fullfp16,fuse-aes,fuse-crypto-eor,jsconv,lor,lse,neon,pan,pan-rwv,pauth,perfmon,ras,rcpc,rdm,sha2,store-pair-suppress,uaops,v8.1a,v8.2a,v8.3a,v8a,vh,zcm,zcz,zcz-gp") }, { str_lit("carmel"), str_lit("CONTEXTIDREL2,aes,carmel,ccpp,crc,crypto,el2vmsa,el3,fp-armv8,fullfp16,lor,lse,neon,pan,pan-rwv,ras,rdm,sha2,uaops,v8.1a,v8.2a,v8a,vh") }, { str_lit("cortex-a34"), str_lit("a35,aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,sha2,v8a") }, { str_lit("cortex-a35"), str_lit("a35,aes,crc,crypto,el2vmsa,el3,fp-armv8,neon,perfmon,sha2,v8a") }, { str_lit("cortex-a510"), str_lit("CONTEXTIDREL2,a510,altnzcv,am,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,dit,dotprod,el2vmsa,el3,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("cortex-a520"), str_lit("CONTEXTIDREL2,a520,altnzcv,am,amvs,bf16,bti,ccdp,ccidx,ccpp,complxnum,crc,dit,dotprod,ecv,el2vmsa,el3,ete,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,hcx,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8.7a,v8a,v9.1a,v9.2a,v9a,vh,wfxt,xs") }, { str_lit("cortex-a53"), str_lit("a53,aes,balance-fp-ops,crc,crypto,el2vmsa,el3,fp-armv8,fuse-adrp-add,fuse-aes,neon,perfmon,sha2,use-postra-scheduler,v8a") }, { str_lit("cortex-a55"), str_lit("CONTEXTIDREL2,a55,aes,ccpp,crc,crypto,dotprod,el2vmsa,el3,fp-armv8,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,ras,rcpc,rdm,sha2,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, { str_lit("cortex-a57"), str_lit("a57,aes,balance-fp-ops,crc,crypto,el2vmsa,el3,enable-select-opt,fp-armv8,fuse-adrp-add,fuse-aes,fuse-literals,neon,perfmon,predictable-select-expensive,sha2,use-postra-scheduler,v8a") }, @@ -418,6 +443,7 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("cortex-a710"), str_lit("CONTEXTIDREL2,a710,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, { str_lit("cortex-a715"), str_lit("CONTEXTIDREL2,a715,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,spe,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, { str_lit("cortex-a72"), str_lit("a72,aes,crc,crypto,el2vmsa,el3,enable-select-opt,fp-armv8,fuse-adrp-add,fuse-aes,fuse-literals,neon,perfmon,predictable-select-expensive,sha2,v8a") }, + { str_lit("cortex-a720"), str_lit("CONTEXTIDREL2,a720,addr-lsl-fast,altnzcv,alu-lsl-fast,am,amvs,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,crc,dit,dotprod,ecv,el2vmsa,el3,enable-select-opt,ete,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,hcx,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,spe,spe-eef,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8.7a,v8a,v9.1a,v9.2a,v9a,vh,wfxt,xs") }, { str_lit("cortex-a73"), str_lit("a73,aes,crc,crypto,el2vmsa,el3,enable-select-opt,fp-armv8,fuse-adrp-add,fuse-aes,neon,perfmon,predictable-select-expensive,sha2,v8a") }, { str_lit("cortex-a75"), str_lit("CONTEXTIDREL2,a75,aes,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,uaops,v8.1a,v8.2a,v8a,vh") }, { str_lit("cortex-a76"), str_lit("CONTEXTIDREL2,a76,addr-lsl-fast,aes,alu-lsl-fast,ccpp,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,neon,pan,pan-rwv,perfmon,predictable-select-expensive,ras,rcpc,rdm,sha2,ssbs,uaops,v8.1a,v8.2a,v8a,vh") }, @@ -430,6 +456,7 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("cortex-x1c"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,ccpp,cmp-bcc-fusion,cortex-x1,crc,crypto,dotprod,el2vmsa,el3,enable-select-opt,flagm,fp-armv8,fullfp16,fuse-adrp-add,fuse-aes,lor,lse,lse2,neon,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,ras,rcpc,rcpc-immo,rdm,sha2,spe,ssbs,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh") }, { str_lit("cortex-x2"), str_lit("CONTEXTIDREL2,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,cmp-bcc-fusion,complxnum,cortex-x2,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, { str_lit("cortex-x3"), str_lit("CONTEXTIDREL2,addr-lsl-fast,altnzcv,alu-lsl-fast,am,bf16,bti,ccdp,ccidx,ccpp,complxnum,cortex-x3,crc,dit,dotprod,el2vmsa,el3,enable-select-opt,ete,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,spe,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8a,v9a,vh") }, + { str_lit("cortex-x4"), str_lit("CONTEXTIDREL2,addr-lsl-fast,altnzcv,alu-lsl-fast,am,amvs,bf16,bti,ccdp,ccidx,ccpp,complxnum,cortex-x4,crc,dit,dotprod,ecv,el2vmsa,el3,enable-select-opt,ete,fgt,flagm,fp-armv8,fp16fml,fptoint,fullfp16,fuse-adrp-add,fuse-aes,hcx,i8mm,jsconv,lor,lse,lse2,mec,mpam,mte,neon,nv,pan,pan-rwv,pauth,perfmon,predictable-select-expensive,predres,ras,rcpc,rcpc-immo,rdm,rme,sb,sel2,spe,spe-eef,specrestrict,ssbs,sve,sve2,sve2-bitperm,tlb-rmi,tracev8.4,trbe,uaops,use-postra-scheduler,use-scalar-inc-vl,v8.1a,v8.2a,v8.3a,v8.4a,v8.5a,v8.6a,v8.7a,v8a,v9.1a,v9.2a,v9a,vh,wfxt,xs") }, { str_lit("cyclone"), str_lit("aes,alternate-sextload-cvt-f32-pattern,apple-a7,apple-a7-sysreg,arith-bcc-fusion,arith-cbz-fusion,crypto,disable-latency-sched-heuristic,el2vmsa,el3,fp-armv8,fuse-aes,fuse-crypto-eor,neon,perfmon,sha2,store-pair-suppress,v8a,zcm,zcz,zcz-fp-workaround,zcz-gp") }, { str_lit("exynos-m3"), str_lit("addr-lsl-fast,aes,alu-lsl-fast,crc,crypto,el2vmsa,el3,exynos-cheap-as-move,exynosm3,force-32bit-jump-tables,fp-armv8,fuse-address,fuse-adrp-add,fuse-aes,fuse-csel,fuse-literals,neon,perfmon,predictable-select-expensive,sha2,store-pair-suppress,use-postra-scheduler,v8a") }, { str_lit("exynos-m4"), str_lit("CONTEXTIDREL2,addr-lsl-fast,aes,alu-lsl-fast,arith-bcc-fusion,arith-cbz-fusion,ccpp,crc,crypto,dotprod,el2vmsa,el3,exynos-cheap-as-move,exynosm4,force-32bit-jump-tables,fp-armv8,fullfp16,fuse-address,fuse-adrp-add,fuse-aes,fuse-arith-logic,fuse-csel,fuse-literals,lor,lse,neon,pan,pan-rwv,perfmon,ras,rdm,sha2,store-pair-suppress,uaops,use-postra-scheduler,v8.1a,v8.2a,v8a,vh,zcz,zcz-gp") }, @@ -459,4 +486,31 @@ gb_global MicroarchFeatureList microarch_features_list[] = { { str_lit("bleeding-edge"), str_lit("atomics,bulk-memory,mutable-globals,nontrapping-fptoint,sign-ext,simd128,tail-call") }, { str_lit("generic"), str_lit("mutable-globals,sign-ext") }, { str_lit("mvp"), str_lit("") }, -}; \ No newline at end of file + // TargetArch_riscv64: + { str_lit("generic"), str_lit("64bit") }, + { str_lit("generic-rv32"), str_lit("32bit") }, + { str_lit("generic-rv64"), str_lit("64bit") }, + { str_lit("rocket"), str_lit("") }, + { str_lit("rocket-rv32"), str_lit("32bit,zicsr,zifencei") }, + { str_lit("rocket-rv64"), str_lit("64bit,zicsr,zifencei") }, + { str_lit("sifive-7-series"), str_lit("no-default-unroll,short-forward-branch-opt,sifive7") }, + { str_lit("sifive-e20"), str_lit("32bit,c,m,zicsr,zifencei") }, + { str_lit("sifive-e21"), str_lit("32bit,a,c,m,zicsr,zifencei") }, + { str_lit("sifive-e24"), str_lit("32bit,a,c,f,m,zicsr,zifencei") }, + { str_lit("sifive-e31"), str_lit("32bit,a,c,m,zicsr,zifencei") }, + { str_lit("sifive-e34"), str_lit("32bit,a,c,f,m,zicsr,zifencei") }, + { str_lit("sifive-e76"), str_lit("32bit,a,c,f,m,no-default-unroll,short-forward-branch-opt,sifive7,zicsr,zifencei") }, + { str_lit("sifive-p450"), str_lit("64bit,a,auipc-addi-fusion,c,conditional-cmv-fusion,d,f,fast-unaligned-access,lui-addi-fusion,m,no-default-unroll,za64rs,zba,zbb,zbs,zfhmin,zic64b,zicbom,zicbop,zicboz,ziccamoa,ziccif,zicclsm,ziccrse,zicsr,zifencei,zihintntl,zihintpause,zihpm") }, + { str_lit("sifive-p670"), str_lit("64bit,a,auipc-addi-fusion,c,conditional-cmv-fusion,d,f,fast-unaligned-access,lui-addi-fusion,m,no-default-unroll,v,za64rs,zba,zbb,zbs,zfhmin,zic64b,zicbom,zicbop,zicboz,ziccamoa,ziccif,zicclsm,ziccrse,zicsr,zifencei,zihintntl,zihintpause,zihpm,zvbb,zvbc,zve32f,zve32x,zve64d,zve64f,zve64x,zvkb,zvkg,zvkn,zvknc,zvkned,zvkng,zvknhb,zvks,zvksc,zvksed,zvksg,zvksh,zvkt,zvl128b,zvl32b,zvl64b") }, + { str_lit("sifive-s21"), str_lit("64bit,a,c,m,zicsr,zifencei") }, + { str_lit("sifive-s51"), str_lit("64bit,a,c,m,zicsr,zifencei") }, + { str_lit("sifive-s54"), str_lit("64bit,a,c,d,f,m,zicsr,zifencei") }, + { str_lit("sifive-s76"), str_lit("64bit,a,c,d,f,m,no-default-unroll,short-forward-branch-opt,sifive7,zicsr,zifencei,zihintpause") }, + { str_lit("sifive-u54"), str_lit("64bit,a,c,d,f,m,zicsr,zifencei") }, + { str_lit("sifive-u74"), str_lit("64bit,a,c,d,f,m,no-default-unroll,short-forward-branch-opt,sifive7,zicsr,zifencei") }, + { str_lit("sifive-x280"), str_lit("64bit,a,c,d,dlen-factor-2,f,m,no-default-unroll,short-forward-branch-opt,sifive7,v,zba,zbb,zfh,zfhmin,zicsr,zifencei,zve32f,zve32x,zve64d,zve64f,zve64x,zvfh,zvfhmin,zvl128b,zvl256b,zvl32b,zvl512b,zvl64b") }, + { str_lit("syntacore-scr1-base"), str_lit("32bit,c,no-default-unroll,zicsr,zifencei") }, + { str_lit("syntacore-scr1-max"), str_lit("32bit,c,m,no-default-unroll,zicsr,zifencei") }, + { str_lit("veyron-v1"), str_lit("64bit,a,auipc-addi-fusion,c,d,f,ld-add-fusion,lui-addi-fusion,m,shifted-zextw-fusion,ventana-veyron,xventanacondops,zba,zbb,zbc,zbs,zexth-fusion,zextw-fusion,zicbom,zicbop,zicboz,zicntr,zicsr,zifencei,zihintpause,zihpm") }, + { str_lit("xiangshan-nanhu"), str_lit("64bit,a,c,d,f,m,svinval,zba,zbb,zbc,zbkb,zbkc,zbkx,zbs,zicbom,zicboz,zicsr,zifencei,zkn,zknd,zkne,zknh,zksed,zksh") }, +}; diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index e5282f63e..1c4b88101 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -155,6 +155,11 @@ gb_internal bool does_require_msgSend_stret(Type *return_type) { return false; } + // No objc here so this doesn't matter, right? + if (build_context.metrics.arch == TargetArch_riscv64) { + return false; + } + // if (build_context.metrics.arch == TargetArch_arm32) { // i64 struct_limit = type_size_of(t_uintptr); // // NOTE(bill): This is technically wrong diff --git a/src/checker.cpp b/src/checker.cpp index c8eaf0acc..b24a7afdb 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1039,6 +1039,7 @@ gb_internal void init_universal(void) { {"arm64", TargetArch_arm64}, {"wasm32", TargetArch_wasm32}, {"wasm64p32", TargetArch_wasm64p32}, + {"riscv64", TargetArch_riscv64}, }; auto fields = add_global_enum_type(str_lit("Odin_Arch_Type"), values, gb_count_of(values)); diff --git a/src/linker.cpp b/src/linker.cpp index 046e72d0e..faca28932 100644 --- a/src/linker.cpp +++ b/src/linker.cpp @@ -388,6 +388,12 @@ gb_internal i32 linker_stage(LinkerData *gen) { } else { timings_start_section(timings, str_lit("ld-link")); + // Link using `clang`, unless overridden by `ODIN_CLANG_PATH` environment variable. + const char* clang_path = gb_get_env("ODIN_CLANG_PATH", permanent_allocator()); + if (clang_path == NULL) { + clang_path = "clang"; + } + // NOTE(vassvik): get cwd, for used for local shared libs linking, since those have to be relative to the exe char cwd[256]; #if !defined(GB_SYSTEM_WINDOWS) @@ -458,7 +464,20 @@ gb_internal i32 linker_stage(LinkerData *gen) { } #endif // GB_ARCH_*_BIT - if (is_osx) { + if (build_context.metrics.arch == TargetArch_riscv64) { + result = system_exec_command_line_app("clang", + "%s \"%.*s\" " + "-c -o \"%.*s\" " + "-target %.*s -march=rv64gc " + "%.*s " + "", + clang_path, + LIT(asm_file), + LIT(obj_file), + LIT(build_context.metrics.target_triplet), + LIT(build_context.extra_assembler_flags) + ); + } else if (is_osx) { // `as` comes with MacOS. result = system_exec_command_line_app("as", "as \"%.*s\" " @@ -592,7 +611,7 @@ gb_internal i32 linker_stage(LinkerData *gen) { link_settings = gb_string_appendc(link_settings, "-Wl,-fini,'_odin_exit_point' "); } - } else if (build_context.metrics.os != TargetOs_openbsd && build_context.metrics.os != TargetOs_haiku) { + } else if (build_context.metrics.os != TargetOs_openbsd && build_context.metrics.os != TargetOs_haiku && build_context.metrics.arch != TargetArch_riscv64) { // OpenBSD and Haiku default to PIE executable. do not pass -no-pie for it. link_settings = gb_string_appendc(link_settings, "-no-pie "); } @@ -635,12 +654,6 @@ gb_internal i32 linker_stage(LinkerData *gen) { } } - // Link using `clang`, unless overridden by `ODIN_CLANG_PATH` environment variable. - const char* clang_path = gb_get_env("ODIN_CLANG_PATH", permanent_allocator()); - if (clang_path == NULL) { - clang_path = "clang"; - } - gbString link_command_line = gb_string_make(heap_allocator(), clang_path); defer (gb_string_free(link_command_line)); diff --git a/src/llvm_abi.cpp b/src/llvm_abi.cpp index c21cd0a46..0837e40bf 100644 --- a/src/llvm_abi.cpp +++ b/src/llvm_abi.cpp @@ -332,7 +332,8 @@ gb_internal i64 lb_alignof(LLVMTypeRef type) { } -#define LB_ABI_INFO(name) lbFunctionType *name(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count, LLVMTypeRef return_type, bool return_is_defined, bool return_is_tuple, ProcCallingConvention calling_convention, Type *original_type) +#define LB_ABI_INFO(name) lbFunctionType *name(lbModule *m, LLVMTypeRef *arg_types, unsigned arg_count, LLVMTypeRef return_type, bool return_is_defined, bool return_is_tuple, ProcCallingConvention calling_convention, Type *original_type) +#define LB_ABI_INFO_CTX(name) lbFunctionType *name(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count, LLVMTypeRef return_type, bool return_is_defined, bool return_is_tuple, ProcCallingConvention calling_convention, Type *original_type) typedef LB_ABI_INFO(lbAbiInfoType); #define LB_ABI_COMPUTE_RETURN_TYPE(name) lbArgType name(lbFunctionType *ft, LLVMContextRef c, LLVMTypeRef return_type, bool return_is_defined, bool return_is_tuple) @@ -379,7 +380,7 @@ namespace lbAbi386 { gb_internal Array compute_arg_types(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count); gb_internal LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type); - gb_internal LB_ABI_INFO(abi_info) { + gb_internal LB_ABI_INFO_CTX(abi_info) { lbFunctionType *ft = gb_alloc_item(permanent_allocator(), lbFunctionType); ft->ctx = c; ft->args = compute_arg_types(c, arg_types, arg_count); @@ -460,7 +461,7 @@ namespace lbAbiAmd64Win64 { gb_internal Array compute_arg_types(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count); gb_internal LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type); - gb_internal LB_ABI_INFO(abi_info) { + gb_internal LB_ABI_INFO_CTX(abi_info) { lbFunctionType *ft = gb_alloc_item(permanent_allocator(), lbFunctionType); ft->ctx = c; ft->args = compute_arg_types(c, arg_types, arg_count); @@ -570,7 +571,7 @@ namespace lbAbiAmd64SysV { gb_internal Array classify(LLVMTypeRef t); gb_internal LLVMTypeRef llreg(LLVMContextRef c, Array const ®_classes, LLVMTypeRef type); - gb_internal LB_ABI_INFO(abi_info) { + gb_internal LB_ABI_INFO_CTX(abi_info) { lbFunctionType *ft = gb_alloc_item(permanent_allocator(), lbFunctionType); ft->ctx = c; ft->calling_convention = calling_convention; @@ -1008,7 +1009,7 @@ namespace lbAbiArm64 { gb_internal LB_ABI_COMPUTE_RETURN_TYPE(compute_return_type); gb_internal bool is_homogenous_aggregate(LLVMContextRef c, LLVMTypeRef type, LLVMTypeRef *base_type_, unsigned *member_count_); - gb_internal LB_ABI_INFO(abi_info) { + gb_internal LB_ABI_INFO_CTX(abi_info) { lbFunctionType *ft = gb_alloc_item(permanent_allocator(), lbFunctionType); ft->ctx = c; ft->args = compute_arg_types(c, arg_types, arg_count); @@ -1242,7 +1243,7 @@ namespace lbAbiWasm { enum {MAX_DIRECT_STRUCT_SIZE = 32}; - gb_internal LB_ABI_INFO(abi_info) { + gb_internal LB_ABI_INFO_CTX(abi_info) { lbFunctionType *ft = gb_alloc_item(permanent_allocator(), lbFunctionType); ft->ctx = c; ft->calling_convention = calling_convention; @@ -1407,7 +1408,7 @@ namespace lbAbiArm32 { gb_internal Array compute_arg_types(LLVMContextRef c, LLVMTypeRef *arg_types, unsigned arg_count, ProcCallingConvention calling_convention); gb_internal lbArgType compute_return_type(LLVMContextRef c, LLVMTypeRef return_type, bool return_is_defined); - gb_internal LB_ABI_INFO(abi_info) { + gb_internal LB_ABI_INFO_CTX(abi_info) { lbFunctionType *ft = gb_alloc_item(permanent_allocator(), lbFunctionType); ft->ctx = c; ft->args = compute_arg_types(c, arg_types, arg_count, calling_convention); @@ -1485,8 +1486,256 @@ namespace lbAbiArm32 { } }; +namespace lbAbiRiscv64 { + + gb_internal bool is_register(LLVMTypeRef type) { + LLVMTypeKind kind = LLVMGetTypeKind(type); + switch (kind) { + case LLVMIntegerTypeKind: + case LLVMHalfTypeKind: + case LLVMFloatTypeKind: + case LLVMDoubleTypeKind: + case LLVMPointerTypeKind: + return true; + } + return false; + } + + gb_internal bool is_float(LLVMTypeRef type) { + LLVMTypeKind kind = LLVMGetTypeKind(type); + switch (kind) { + case LLVMHalfTypeKind: + case LLVMFloatTypeKind: + case LLVMDoubleTypeKind: + return true; + default: + return false; + } + } + + gb_internal lbArgType non_struct(LLVMContextRef c, LLVMTypeRef type) { + LLVMAttributeRef attr = nullptr; + LLVMTypeRef i1 = LLVMInt1TypeInContext(c); + if (type == i1) { + attr = lb_create_enum_attribute(c, "zeroext"); + } + return lb_arg_type_direct(type, nullptr, nullptr, attr); + } + + gb_internal void flatten(lbModule *m, Array *fields, LLVMTypeRef type, bool with_padding) { + LLVMTypeKind kind = LLVMGetTypeKind(type); + switch (kind) { + case LLVMStructTypeKind: { + if (LLVMIsPackedStruct(type)) { + array_add(fields, type); + break; + } + + if (!with_padding) { + auto field_remapping = map_get(&m->struct_field_remapping, cast(void *)type); + if (field_remapping) { + auto remap = *field_remapping; + for_array(i, remap) { + flatten(m, fields, LLVMStructGetTypeAtIndex(type, remap[i]), with_padding); + } + break; + } else { + debugf("no field mapping for type: %s\n", LLVMPrintTypeToString(type)); + } + } + + unsigned elem_count = LLVMCountStructElementTypes(type); + for (unsigned i = 0; i < elem_count; i += 1) { + flatten(m, fields, LLVMStructGetTypeAtIndex(type, i), with_padding); + } + break; + } + case LLVMArrayTypeKind: { + unsigned len = LLVMGetArrayLength(type); + LLVMTypeRef elem = OdinLLVMGetArrayElementType(type); + for (unsigned i = 0; i < len; i += 1) { + flatten(m, fields, elem, with_padding); + } + break; + } + default: + array_add(fields, type); + } + } + + gb_internal lbArgType compute_arg_type(lbModule *m, LLVMTypeRef type, int *gprs_left, int *fprs_left, Type *odin_type) { + LLVMContextRef c = m->ctx; + + int xlen = 8; // 8 byte int register size for riscv64. + + // NOTE: we are requiring both of these to be enabled so we can just hard-code 8. + // int flen = 0; + // if (check_target_feature_is_enabled(str_lit("d"), nullptr)) { + // flen = 8; // Double precision floats are enabled. + // } else if (check_target_feature_is_enabled(str_lit("f"), nullptr)) { + // flen = 4; // Single precision floats are enabled. + // } + int flen = 8; + + LLVMTypeKind kind = LLVMGetTypeKind(type); + i64 size = lb_sizeof(type); + + if (size == 0) { + return lb_arg_type_direct(type, LLVMStructTypeInContext(c, nullptr, 0, false), nullptr, nullptr); + } + + LLVMTypeRef orig_type = type; + + // Flatten down the type so it is easier to check all the ABI conditions. + // Note that we also need to remove all implicit padding fields Odin adds so we keep ABI + // compatibility for struct declarations. + if (kind == LLVMStructTypeKind && size <= gb_max(2*xlen, 2*flen)) { + Array fields = array_make(temporary_allocator(), 0, LLVMCountStructElementTypes(type)); + flatten(m, &fields, type, false); + + if (fields.count == 1) { + type = fields[0]; + } else { + type = LLVMStructTypeInContext(c, fields.data, cast(unsigned)fields.count, false); + } + + kind = LLVMGetTypeKind(type); + size = lb_sizeof(type); + GB_ASSERT_MSG(size == lb_sizeof(orig_type), "flattened: %s of size %d, original: %s of size %d", LLVMPrintTypeToString(type), size, LLVMPrintTypeToString(orig_type), lb_sizeof(orig_type)); + } + + if (is_float(type) && size <= flen && *fprs_left >= 1) { + *fprs_left -= 1; + return non_struct(c, orig_type); + } + + if (kind == LLVMStructTypeKind && size <= 2*flen) { + unsigned elem_count = LLVMCountStructElementTypes(type); + if (elem_count == 2) { + LLVMTypeRef ty1 = LLVMStructGetTypeAtIndex(type, 0); + i64 ty1s = lb_sizeof(ty1); + LLVMTypeRef ty2 = LLVMStructGetTypeAtIndex(type, 1); + i64 ty2s = lb_sizeof(ty2); + + if (is_float(ty1) && is_float(ty2) && ty1s <= flen && ty2s <= flen && *fprs_left >= 2) { + *fprs_left -= 2; + return lb_arg_type_direct(orig_type, type, nullptr, nullptr); + } + + if (is_float(ty1) && is_register(ty2) && ty1s <= flen && ty2s <= xlen && *fprs_left >= 1 && *gprs_left >= 1) { + *fprs_left -= 1; + *gprs_left -= 1; + return lb_arg_type_direct(orig_type, type, nullptr, nullptr); + } + + if (is_register(ty1) && is_float(ty2) && ty1s <= xlen && ty2s <= flen && *gprs_left >= 1 && *fprs_left >= 1) { + *fprs_left -= 1; + *gprs_left -= 1; + return lb_arg_type_direct(orig_type, type, nullptr, nullptr); + } + } + } + + // At this point all the cases for floating point registers are exhausted, fit it into + // integer registers or the stack. + // LLVM automatically handles putting args on the stack so we don't check the amount of registers that are left here. + + if (size <= xlen) { + *gprs_left -= 1; + if (is_register(type)) { + return non_struct(c, orig_type); + } else { + return lb_arg_type_direct(orig_type, LLVMIntTypeInContext(c, cast(unsigned)(size*8)), nullptr, nullptr); + } + } else if (size <= 2*xlen) { + LLVMTypeRef *fields = gb_alloc_array(temporary_allocator(), LLVMTypeRef, 2); + fields[0] = LLVMIntTypeInContext(c, cast(unsigned)(xlen*8)); + fields[1] = LLVMIntTypeInContext(c, cast(unsigned)((size-xlen)*8)); + + *gprs_left -= 2; + return lb_arg_type_direct(orig_type, LLVMStructTypeInContext(c, fields, 2, false), nullptr, nullptr); + } else { + return lb_arg_type_indirect(orig_type, nullptr); + } + } + + gb_internal Array compute_arg_types(lbModule *m, LLVMTypeRef *arg_types, unsigned arg_count, ProcCallingConvention calling_convention, Type *odin_type, int *gprs, int *fprs) { + auto args = array_make(lb_function_type_args_allocator(), arg_count); + + for (unsigned i = 0; i < arg_count; i++) { + LLVMTypeRef type = arg_types[i]; + args[i] = compute_arg_type(m, type, gprs, fprs, odin_type); + } + + return args; + } + + gb_internal lbArgType compute_return_type(lbFunctionType *ft, lbModule *m, LLVMTypeRef return_type, bool return_is_defined, bool return_is_tuple, Type *odin_type, int *agprs) { + LLVMContextRef c = m->ctx; + + if (!return_is_defined) { + return lb_arg_type_direct(LLVMVoidTypeInContext(c)); + } + + // There are two registers for return types. + int gprs = 2; + int fprs = 2; + lbArgType ret = compute_arg_type(m, return_type, &gprs, &fprs, odin_type); + + // Return didn't fit into the return registers, so caller allocates and it is returned via + // an out-pointer. + if (ret.kind == lbArg_Indirect) { + + // Transform multiple return into out pointers if possible. + if (return_is_tuple) { + if (lb_is_type_kind(return_type, LLVMStructTypeKind)) { + int field_count = cast(int)LLVMCountStructElementTypes(return_type); + if (field_count > 1 && field_count <= *agprs) { + ft->original_arg_count = ft->args.count; + ft->multiple_return_original_type = return_type; + + for (int i = 0; i < field_count-1; i++) { + LLVMTypeRef field_type = LLVMStructGetTypeAtIndex(return_type, i); + LLVMTypeRef field_pointer_type = LLVMPointerType(field_type, 0); + lbArgType ret_partial = lb_arg_type_direct(field_pointer_type); + array_add(&ft->args, ret_partial); + *agprs -= 1; + } + GB_ASSERT(*agprs >= 0); + + // override the return type for the last field + LLVMTypeRef new_return_type = LLVMStructGetTypeAtIndex(return_type, field_count-1); + return compute_return_type(ft, m, new_return_type, true, false, odin_type, agprs); + } + } + } + + LLVMAttributeRef attr = lb_create_enum_attribute_with_type(c, "sret", ret.type); + return lb_arg_type_indirect(ret.type, attr); + } + + return ret; + } + + gb_internal LB_ABI_INFO(abi_info) { + lbFunctionType *ft = gb_alloc_item(permanent_allocator(), lbFunctionType); + ft->ctx = m->ctx; + ft->calling_convention = calling_convention; + + int gprs = 8; + int fprs = 8; + + ft->args = compute_arg_types(m, arg_types, arg_count, calling_convention, original_type, &gprs, &fprs); + ft->ret = compute_return_type(ft, m, return_type, return_is_defined, return_is_tuple, original_type, &gprs); + + return ft; + } +} + gb_internal LB_ABI_INFO(lb_get_abi_info_internal) { + LLVMContextRef c = m->ctx; + switch (calling_convention) { case ProcCC_None: case ProcCC_InlineAsm: @@ -1534,6 +1783,8 @@ gb_internal LB_ABI_INFO(lb_get_abi_info_internal) { return lbAbiWasm::abi_info(c, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention, original_type); case TargetArch_wasm64p32: return lbAbiWasm::abi_info(c, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention, original_type); + case TargetArch_riscv64: + return lbAbiRiscv64::abi_info(m, arg_types, arg_count, return_type, return_is_defined, return_is_tuple, calling_convention, original_type); } GB_PANIC("Unsupported ABI"); @@ -1543,7 +1794,7 @@ gb_internal LB_ABI_INFO(lb_get_abi_info_internal) { gb_internal LB_ABI_INFO(lb_get_abi_info) { lbFunctionType *ft = lb_get_abi_info_internal( - c, + m, arg_types, arg_count, return_type, return_is_defined, ALLOW_SPLIT_MULTI_RETURNS && return_is_tuple && is_calling_convention_odin(calling_convention), @@ -1555,7 +1806,7 @@ gb_internal LB_ABI_INFO(lb_get_abi_info) { // This is to make it consistent when and how it is handled if (calling_convention == ProcCC_Odin) { // append the `context` pointer - lbArgType context_param = lb_arg_type_direct(LLVMPointerType(LLVMInt8TypeInContext(c), 0)); + lbArgType context_param = lb_arg_type_direct(LLVMPointerType(LLVMInt8TypeInContext(m->ctx), 0)); array_add(&ft->args, context_param); } diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 72ba12516..f852636a6 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -40,7 +40,10 @@ String get_default_microarchitecture() { default_march = str_lit("x86-64-v2"); } } + } else if (build_context.metrics.arch == TargetArch_riscv64) { + default_march = str_lit("generic-rv64"); } + return default_march; } @@ -65,13 +68,33 @@ gb_internal String get_default_features() { } String microarch = get_final_microarchitecture(); + + // NOTE(laytan): for riscv64 to work properly with Odin, we need to enforce some features. + // and we also overwrite the generic target to include more features so we don't default to + // a potato feature set. + if (bc->metrics.arch == TargetArch_riscv64) { + if (microarch == str_lit("generic-rv64")) { + // This is what clang does by default (on -march=rv64gc for General Computing), seems good to also default to. + String features = str_lit("64bit,a,c,d,f,m,relax,zicsr,zifencei"); + + // Update the features string so LLVM uses it later. + if (bc->target_features_string.len > 0) { + bc->target_features_string = concatenate3_strings(permanent_allocator(), features, str_lit(","), bc->target_features_string); + } else { + bc->target_features_string = features; + } + + return features; + } + } + for (int i = off; i < off+target_microarch_counts[bc->metrics.arch]; i += 1) { if (microarch_features_list[i].microarch == microarch) { return microarch_features_list[i].features; } } - GB_PANIC("unknown microarch"); + GB_PANIC("unknown microarch: %.*s", LIT(microarch)); return {}; } @@ -3030,6 +3053,12 @@ gb_internal bool lb_generate_code(lbGenerator *gen) { // Always use PIC for OpenBSD and Haiku: they default to PIE reloc_mode = LLVMRelocPIC; } + + if (build_context.metrics.arch == TargetArch_riscv64) { + // NOTE(laytan): didn't seem to work without this. + reloc_mode = LLVMRelocPIC; + } + break; case RelocMode_Static: reloc_mode = LLVMRelocStatic; diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index b5338297e..842a1cbc8 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -1787,7 +1787,7 @@ gb_internal LLVMTypeRef lb_type_internal_for_procedures_raw(lbModule *m, Type *t } } GB_ASSERT(param_index == param_count); - lbFunctionType *ft = lb_get_abi_info(m->ctx, params, param_count, ret, ret != nullptr, return_is_tuple, type->Proc.calling_convention, type); + lbFunctionType *ft = lb_get_abi_info(m, params, param_count, ret, ret != nullptr, return_is_tuple, type->Proc.calling_convention, type); { for_array(j, ft->args) { auto arg = ft->args[j]; @@ -2114,6 +2114,12 @@ gb_internal LLVMTypeRef lb_type_internal(lbModule *m, Type *type) { llvm_type = LLVMStructCreateNamed(ctx, name); map_set(&m->types, type, llvm_type); lb_clone_struct_type(llvm_type, lb_type(m, base)); + + if (base->kind == Type_Struct) { + map_set(&m->struct_field_remapping, cast(void *)llvm_type, lb_get_struct_remapping(m, base)); + map_set(&m->struct_field_remapping, cast(void *)type, lb_get_struct_remapping(m, base)); + } + return llvm_type; } } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 3326b4041..e850d3364 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2877,6 +2877,31 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu LLVMValueRef inline_asm = nullptr; switch (build_context.metrics.arch) { + case TargetArch_riscv64: + { + GB_ASSERT(arg_count <= 7); + + char asm_string[] = "ecall"; + gbString constraints = gb_string_make(heap_allocator(), "={a0}"); + for (unsigned i = 0; i < arg_count; i++) { + constraints = gb_string_appendc(constraints, ",{"); + static char const *regs[] = { + "a7", + "a0", + "a1", + "a2", + "a3", + "a4", + "a5", + "a6" + }; + constraints = gb_string_appendc(constraints, regs[i]); + constraints = gb_string_appendc(constraints, "}"); + } + + inline_asm = llvm_get_inline_asm(func_type, make_string_c(asm_string), make_string_c(constraints)); + } + break; case TargetArch_amd64: { GB_ASSERT(arg_count <= 7); diff --git a/src/main.cpp b/src/main.cpp index 77758b929..5131bdc21 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3245,6 +3245,15 @@ int main(int arg_count, char const **arg_ptr) { } } + // NOTE(laytan): on riscv64 we want to enforce some features. + if (build_context.metrics.arch == TargetArch_riscv64) { + String disabled; + if (!check_target_feature_is_enabled(str_lit("64bit,f,d,m"), &disabled)) { // 64bit, floats, doubles, integer multiplication. + gb_printf_err("missing required target feature: \"%.*s\", enable it by setting a different -microarch or explicitly adding it through -target-features\n", LIT(disabled)); + gb_exit(1); + } + } + if (build_context.show_debug_messages) { debugf("Selected microarch: %.*s\n", LIT(march)); debugf("Default microarch features: %.*s\n", LIT(default_features)); -- cgit v1.2.3 From 603efa860a5631f1708f6761d753146b6d47b4ba Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 14 Sep 2024 21:43:25 +0200 Subject: add '#caller_expression' --- base/runtime/core_builtin.odin | 4 +-- core/odin/parser/parser.odin | 10 +++++++ core/testing/testing.odin | 12 +++++--- src/check_builtin.cpp | 16 +++++++++++ src/check_expr.cpp | 7 ++++- src/check_type.cpp | 32 ++++++++++++++++++++++ src/entity.cpp | 1 + src/llvm_backend.hpp | 2 +- src/llvm_backend_proc.cpp | 62 ++++++++++++++++++++++++++++++++++++++---- 9 files changed, 133 insertions(+), 13 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/base/runtime/core_builtin.odin b/base/runtime/core_builtin.odin index 8157afe09..67d249d11 100644 --- a/base/runtime/core_builtin.odin +++ b/base/runtime/core_builtin.odin @@ -913,7 +913,7 @@ card :: proc "contextless" (s: $S/bit_set[$E; $U]) -> int { @builtin @(disabled=ODIN_DISABLE_ASSERT) -assert :: proc(condition: bool, message := "", loc := #caller_location) { +assert :: proc(condition: bool, message := #caller_expression(condition), loc := #caller_location) { if !condition { // NOTE(bill): This is wrapped in a procedure call // to improve performance to make the CPU not @@ -952,7 +952,7 @@ unimplemented :: proc(message := "", loc := #caller_location) -> ! { @builtin @(disabled=ODIN_DISABLE_ASSERT) -assert_contextless :: proc "contextless" (condition: bool, message := "", loc := #caller_location) { +assert_contextless :: proc "contextless" (condition: bool, message := #caller_expression(condition), loc := #caller_location) { if !condition { // NOTE(bill): This is wrapped in a procedure call // to improve performance to make the CPU not diff --git a/core/odin/parser/parser.odin b/core/odin/parser/parser.odin index aab59c29d..6f42c17db 100644 --- a/core/odin/parser/parser.odin +++ b/core/odin/parser/parser.odin @@ -2277,6 +2277,16 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr { bd.name = name.text return bd + case "caller_expression": + bd := ast.new(ast.Basic_Directive, tok.pos, end_pos(name)) + bd.tok = tok + bd.name = name.text + + if peek_token_kind(p, .Open_Paren) { + return parse_call_expr(p, bd) + } + return bd + case "location", "exists", "load", "load_directory", "load_hash", "hash", "assert", "panic", "defined", "config": bd := ast.new(ast.Basic_Directive, tok.pos, end_pos(name)) bd.tok = tok diff --git a/core/testing/testing.odin b/core/testing/testing.odin index d5e7c6830..09bf6dc0e 100644 --- a/core/testing/testing.odin +++ b/core/testing/testing.odin @@ -105,9 +105,13 @@ cleanup :: proc(t: ^T, procedure: proc(rawptr), user_data: rawptr) { append(&t.cleanups, Internal_Cleanup{procedure, user_data, context}) } -expect :: proc(t: ^T, ok: bool, msg: string = "", loc := #caller_location) -> bool { +expect :: proc(t: ^T, ok: bool, msg := "", expr := #caller_expression(ok), loc := #caller_location) -> bool { if !ok { - log.error(msg, location=loc) + if msg == "" { + log.errorf("expected %v to be true", expr, location=loc) + } else { + log.error(msg, location=loc) + } } return ok } @@ -119,10 +123,10 @@ expectf :: proc(t: ^T, ok: bool, format: string, args: ..any, loc := #caller_loc return ok } -expect_value :: proc(t: ^T, value, expected: $T, loc := #caller_location) -> bool where intrinsics.type_is_comparable(T) { +expect_value :: proc(t: ^T, value, expected: $T, loc := #caller_location, value_expr := #caller_expression(value)) -> bool where intrinsics.type_is_comparable(T) { ok := value == expected || reflect.is_nil(value) && reflect.is_nil(expected) if !ok { - log.errorf("expected %v, got %v", expected, value, location=loc) + log.errorf("expected %v to be %v, got %v", value_expr, expected, value, location=loc) } return ok } diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index 888aa074d..909eb668e 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -1632,6 +1632,22 @@ gb_internal bool check_builtin_procedure_directive(CheckerContext *c, Operand *o operand->type = t_source_code_location; operand->mode = Addressing_Value; + } else if (name == "caller_expression") { + if (ce->args.count > 1) { + error(ce->args[0], "'#caller_expression' expects either 0 or 1 arguments, got %td", ce->args.count); + } + if (ce->args.count > 0) { + Ast *arg = ce->args[0]; + Operand o = {}; + Entity *e = check_ident(c, &o, arg, nullptr, nullptr, true); + if (e == nullptr || (e->flags & EntityFlag_Param) == 0) { + error(ce->args[0], "'#caller_expression' expected a valid earlier parameter name"); + } + arg->Ident.entity = e; + } + + operand->type = t_string; + operand->mode = Addressing_Value; } else if (name == "exists") { if (ce->args.count != 1) { error(ce->close, "'#exists' expects 1 argument, got %td", ce->args.count); diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 7f82fb58a..6776094bf 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -7807,7 +7807,8 @@ gb_internal ExprKind check_call_expr(CheckerContext *c, Operand *operand, Ast *c name == "load" || name == "load_directory" || name == "load_hash" || - name == "hash" + name == "hash" || + name == "caller_expression" ) { operand->mode = Addressing_Builtin; operand->builtin_id = BuiltinProc_DIRECTIVE; @@ -8725,6 +8726,10 @@ gb_internal ExprKind check_basic_directive_expr(CheckerContext *c, Operand *o, A error(node, "#caller_location may only be used as a default argument parameter"); o->type = t_source_code_location; o->mode = Addressing_Value; + } else if (name == "caller_expression") { + error(node, "#caller_expression may only be used as a default argument parameter"); + o->type = t_string; + o->mode = Addressing_Value; } else { if (name == "location") { init_core_source_code_location(c->checker); diff --git a/src/check_type.cpp b/src/check_type.cpp index 3767f7666..f0e0acb9b 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -1605,6 +1605,25 @@ gb_internal bool is_expr_from_a_parameter(CheckerContext *ctx, Ast *expr) { return false; } +gb_internal bool is_caller_expression(Ast *expr) { + if (expr->kind == Ast_BasicDirective && expr->BasicDirective.name.string == "caller_expression") { + return true; + } + + Ast *call = unparen_expr(expr); + if (call->kind != Ast_CallExpr) { + return false; + } + + ast_node(ce, CallExpr, call); + if (ce->proc->kind != Ast_BasicDirective) { + return false; + } + + ast_node(bd, BasicDirective, ce->proc); + String name = bd->name.string; + return name == "caller_expression"; +} gb_internal ParameterValue handle_parameter_value(CheckerContext *ctx, Type *in_type, Type **out_type_, Ast *expr, bool allow_caller_location) { ParameterValue param_value = {}; @@ -1626,7 +1645,19 @@ gb_internal ParameterValue handle_parameter_value(CheckerContext *ctx, Type *in_ if (in_type) { check_assignment(ctx, &o, in_type, str_lit("parameter value")); } + } else if (is_caller_expression(expr)) { + if (expr->kind != Ast_BasicDirective) { + check_builtin_procedure_directive(ctx, &o, expr, t_string); + } + + param_value.kind = ParameterValue_Expression; + o.type = t_string; + o.mode = Addressing_Value; + o.expr = expr; + if (in_type) { + check_assignment(ctx, &o, in_type, str_lit("parameter value")); + } } else { if (in_type) { check_expr_with_type_hint(ctx, &o, expr, in_type); @@ -1858,6 +1889,7 @@ gb_internal Type *check_get_params(CheckerContext *ctx, Scope *scope, Ast *_para case ParameterValue_Nil: break; case ParameterValue_Location: + case ParameterValue_Expression: case ParameterValue_Value: gbString str = type_to_string(type); error(params[i], "A default value for a parameter must not be a polymorphic constant type, got %s", str); diff --git a/src/entity.cpp b/src/entity.cpp index db6ffdd52..0c4a20df4 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -104,6 +104,7 @@ enum ParameterValueKind { ParameterValue_Constant, ParameterValue_Nil, ParameterValue_Location, + ParameterValue_Expression, ParameterValue_Value, }; diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 29d2ccfe6..68f95cb03 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -528,7 +528,7 @@ gb_internal lbAddr lb_store_range_stmt_val(lbProcedure *p, Ast *stmt_val, lbValu gb_internal lbValue lb_emit_source_code_location_const(lbProcedure *p, String const &procedure, TokenPos const &pos); gb_internal lbValue lb_const_source_code_location_const(lbModule *m, String const &procedure, TokenPos const &pos); -gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const ¶m_value, TokenPos const &pos); +gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const ¶m_value, TypeProc *procedure_type, Ast *call_expression); gb_internal lbValue lb_equal_proc_for_type(lbModule *m, Type *type); gb_internal lbValue lb_hasher_proc_for_type(lbModule *m, Type *type); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index e850d3364..d84599eb0 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -699,7 +699,9 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { } if (e->Variable.param_value.kind != ParameterValue_Invalid) { - lbValue c = lb_handle_param_value(p, e->type, e->Variable.param_value, e->token.pos); + GB_ASSERT(e->Variable.param_value.kind != ParameterValue_Location); + GB_ASSERT(e->Variable.param_value.kind != ParameterValue_Expression); + lbValue c = lb_handle_param_value(p, e->type, e->Variable.param_value, nullptr, nullptr); lb_addr_store(p, res, c); } @@ -3420,7 +3422,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu } -gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const ¶m_value, TokenPos const &pos) { +gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, ParameterValue const ¶m_value, TypeProc *procedure_type, Ast* call_expression) { switch (param_value.kind) { case ParameterValue_Constant: if (is_type_constant_type(parameter_type)) { @@ -3446,8 +3448,60 @@ gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, if (p->entity != nullptr) { proc_name = p->entity->token.string; } + + ast_node(ce, CallExpr, call_expression); + TokenPos pos = ast_token(ce->proc).pos; + return lb_emit_source_code_location_as_global(p, proc_name, pos); } + case ParameterValue_Expression: + { + Ast *orig = param_value.original_ast_expr; + if (orig->kind == Ast_BasicDirective) { + gbString expr = expr_to_string(call_expression, temporary_allocator()); + return lb_const_string(p->module, make_string_c(expr)); + } + + isize param_idx = -1; + String param_str = {0}; + { + Ast *call = unparen_expr(orig); + GB_ASSERT(call->kind == Ast_CallExpr); + ast_node(ce, CallExpr, call); + GB_ASSERT(ce->proc->kind == Ast_BasicDirective); + GB_ASSERT(ce->args.count == 1); + Ast *target = ce->args[0]; + GB_ASSERT(target->kind == Ast_Ident); + String target_str = target->Ident.token.string; + + param_idx = lookup_procedure_parameter(procedure_type, target_str); + param_str = target_str; + } + GB_ASSERT(param_idx >= 0); + + + Ast *target_expr = nullptr; + ast_node(ce, CallExpr, call_expression); + + if (ce->split_args->positional.count > param_idx) { + target_expr = ce->split_args->positional[param_idx]; + } + + for_array(i, ce->split_args->named) { + Ast *arg = ce->split_args->named[i]; + ast_node(fv, FieldValue, arg); + GB_ASSERT(fv->field->kind == Ast_Ident); + String name = fv->field->Ident.token.string; + if (name == param_str) { + target_expr = fv->value; + break; + } + } + + gbString expr = expr_to_string(target_expr, temporary_allocator()); + return lb_const_string(p->module, make_string_c(expr)); + } + case ParameterValue_Value: return lb_build_expr(p, param_value.ast_value); } @@ -3739,8 +3793,6 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { } } - TokenPos pos = ast_token(ce->proc).pos; - if (pt->params != nullptr) { isize min_count = pt->params->Tuple.variables.count; @@ -3764,7 +3816,7 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { args[arg_index] = lb_const_nil(p->module, e->type); break; case Entity_Variable: - args[arg_index] = lb_handle_param_value(p, e->type, e->Variable.param_value, pos); + args[arg_index] = lb_handle_param_value(p, e->type, e->Variable.param_value, pt, expr); break; case Entity_Constant: -- cgit v1.2.3 From da1e09c95d885e8385c7d88b856f130cee41d9c1 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Fri, 25 Oct 2024 14:26:02 +0200 Subject: check packed load and set alignment on all loads, not just lb_emit_load --- src/llvm_backend_expr.cpp | 13 ++++++------ src/llvm_backend_general.cpp | 49 +++++++++++++++++++++++++++++++------------- src/llvm_backend_proc.cpp | 5 ++--- src/llvm_backend_stmt.cpp | 2 +- src/llvm_backend_utility.cpp | 2 +- 5 files changed, 45 insertions(+), 26 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 8967a4e03..de7cadaf3 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -130,7 +130,7 @@ gb_internal lbValue lb_emit_unary_arith(lbProcedure *p, TokenKind op, lbValue x, LLVMTypeRef vector_type = nullptr; if (op != Token_Not && lb_try_vector_cast(p->module, val, &vector_type)) { LLVMValueRef vp = LLVMBuildPointerCast(p->builder, val.value, LLVMPointerType(vector_type, 0), ""); - LLVMValueRef v = LLVMBuildLoad2(p->builder, vector_type, vp, ""); + LLVMValueRef v = OdinLLVMBuildLoad(p, vector_type, vp); LLVMValueRef opv = nullptr; switch (op) { @@ -324,8 +324,8 @@ gb_internal bool lb_try_direct_vector_arith(lbProcedure *p, TokenKind op, lbValu LLVMValueRef lhs_vp = LLVMBuildPointerCast(p->builder, lhs_ptr.value, LLVMPointerType(vector_type, 0), ""); LLVMValueRef rhs_vp = LLVMBuildPointerCast(p->builder, rhs_ptr.value, LLVMPointerType(vector_type, 0), ""); - LLVMValueRef x = LLVMBuildLoad2(p->builder, vector_type, lhs_vp, ""); - LLVMValueRef y = LLVMBuildLoad2(p->builder, vector_type, rhs_vp, ""); + LLVMValueRef x = OdinLLVMBuildLoad(p, vector_type, lhs_vp); + LLVMValueRef y = OdinLLVMBuildLoad(p, vector_type, rhs_vp); LLVMValueRef z = nullptr; if (is_type_float(integral_type)) { @@ -551,15 +551,14 @@ gb_internal LLVMValueRef lb_matrix_to_vector(lbProcedure *p, lbValue matrix) { Type *mt = base_type(matrix.type); GB_ASSERT(mt->kind == Type_Matrix); LLVMTypeRef elem_type = lb_type(p->module, mt->Matrix.elem); - + unsigned total_count = cast(unsigned)matrix_type_total_internal_elems(mt); LLVMTypeRef total_matrix_type = LLVMVectorType(elem_type, total_count); - + #if 1 LLVMValueRef ptr = lb_address_from_load_or_generate_local(p, matrix).value; LLVMValueRef matrix_vector_ptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(total_matrix_type, 0), ""); - LLVMValueRef matrix_vector = LLVMBuildLoad2(p->builder, total_matrix_type, matrix_vector_ptr, ""); - LLVMSetAlignment(matrix_vector, cast(unsigned)type_align_of(mt)); + LLVMValueRef matrix_vector = OdinLLVMBuildLoadAligned(p, total_matrix_type, matrix_vector_ptr, type_align_of(mt)); return matrix_vector; #else LLVMValueRef matrix_vector = LLVMBuildBitCast(p->builder, matrix.value, total_matrix_type, ""); diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 43c9f738a..686724f6a 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -774,6 +774,36 @@ gb_internal bool lb_try_vector_cast(lbModule *m, lbValue ptr, LLVMTypeRef *vecto return false; } +gb_internal LLVMValueRef OdinLLVMBuildLoad(lbProcedure *p, LLVMTypeRef type, LLVMValueRef value) { + LLVMValueRef result = LLVMBuildLoad2(p->builder, type, value, ""); + + // If it is not an instruction it isn't a GEP, so we don't need to track alignment in the metadata, + // which is not possible anyway (only LLVM instructions can have metadata). + if (LLVMIsAInstruction(value)) { + u64 is_packed = lb_get_metadata_custom_u64(p->module, value, ODIN_METADATA_IS_PACKED); + if (is_packed != 0) { + LLVMSetAlignment(result, 1); + } + } + + return result; +} + +gb_internal LLVMValueRef OdinLLVMBuildLoadAligned(lbProcedure *p, LLVMTypeRef type, LLVMValueRef value, i64 alignment) { + LLVMValueRef result = LLVMBuildLoad2(p->builder, type, value, ""); + + LLVMSetAlignment(result, cast(unsigned)alignment); + + if (LLVMIsAInstruction(value)) { + u64 is_packed = lb_get_metadata_custom_u64(p->module, value, ODIN_METADATA_IS_PACKED); + if (is_packed != 0) { + LLVMSetAlignment(result, 1); + } + } + + return result; +} + gb_internal void lb_addr_store(lbProcedure *p, lbAddr addr, lbValue value) { if (addr.addr.value == nullptr) { return; @@ -1119,7 +1149,7 @@ gb_internal lbValue lb_emit_load(lbProcedure *p, lbValue value) { Type *vt = base_type(value.type); GB_ASSERT(vt->kind == Type_MultiPointer); Type *t = vt->MultiPointer.elem; - LLVMValueRef v = LLVMBuildLoad2(p->builder, lb_type(p->module, t), value.value, ""); + LLVMValueRef v = OdinLLVMBuildLoad(p, lb_type(p->module, t), value.value); return lbValue{v, t}; } else if (is_type_soa_pointer(value.type)) { lbValue ptr = lb_emit_struct_ev(p, value, 0); @@ -1130,16 +1160,7 @@ gb_internal lbValue lb_emit_load(lbProcedure *p, lbValue value) { GB_ASSERT_MSG(is_type_pointer(value.type), "%s", type_to_string(value.type)); Type *t = type_deref(value.type); - LLVMValueRef v = LLVMBuildLoad2(p->builder, lb_type(p->module, t), value.value, ""); - - // If it is not an instruction it isn't a GEP, so we don't need to track alignment in the metadata, - // which is not possible anyway (only LLVM instructions can have metadata). - if (LLVMIsAInstruction(value.value)) { - u64 is_packed = lb_get_metadata_custom_u64(p->module, value.value, ODIN_METADATA_IS_PACKED); - if (is_packed != 0) { - LLVMSetAlignment(v, 1); - } - } + LLVMValueRef v = OdinLLVMBuildLoad(p, lb_type(p->module, t), value.value); return lbValue{v, t}; } @@ -1413,7 +1434,7 @@ gb_internal lbValue lb_addr_load(lbProcedure *p, lbAddr const &addr) { LLVMTypeRef vector_type = nullptr; if (lb_try_vector_cast(p->module, addr.addr, &vector_type)) { LLVMValueRef vp = LLVMBuildPointerCast(p->builder, addr.addr.value, LLVMPointerType(vector_type, 0), ""); - LLVMValueRef v = LLVMBuildLoad2(p->builder, vector_type, vp, ""); + LLVMValueRef v = OdinLLVMBuildLoad(p, vector_type, vp); LLVMValueRef scalars[4] = {}; for (u8 i = 0; i < addr.swizzle.count; i++) { scalars[i] = LLVMConstInt(lb_type(p->module, t_u32), addr.swizzle.indices[i], false); @@ -2710,7 +2731,7 @@ general_end:; if (LLVMIsALoadInst(val) && (src_size >= dst_size && src_align >= dst_align)) { LLVMValueRef val_ptr = LLVMGetOperand(val, 0); val_ptr = LLVMBuildPointerCast(p->builder, val_ptr, LLVMPointerType(dst_type, 0), ""); - LLVMValueRef loaded_val = LLVMBuildLoad2(p->builder, dst_type, val_ptr, ""); + LLVMValueRef loaded_val = OdinLLVMBuildLoad(p, dst_type, val_ptr); // LLVMSetAlignment(loaded_val, gb_min(src_align, dst_align)); @@ -2726,7 +2747,7 @@ general_end:; LLVMValueRef nptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(src_type, 0), ""); LLVMBuildStore(p->builder, val, nptr); - return LLVMBuildLoad2(p->builder, dst_type, ptr, ""); + return OdinLLVMBuildLoad(p, dst_type, ptr); } } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index d84599eb0..5aee5b639 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -2568,7 +2568,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu case BuiltinProc_atomic_load_explicit: { lbValue dst = lb_build_expr(p, ce->args[0]); - LLVMValueRef instr = LLVMBuildLoad2(p->builder, lb_type(p->module, type_deref(dst.type)), dst.value, ""); + LLVMValueRef instr = OdinLLVMBuildLoad(p, lb_type(p->module, type_deref(dst.type)), dst.value); switch (id) { case BuiltinProc_non_temporal_load: { @@ -2621,8 +2621,7 @@ gb_internal lbValue lb_build_builtin_proc(lbProcedure *p, Ast *expr, TypeAndValu if (is_type_simd_vector(t)) { lbValue res = {}; res.type = t; - res.value = LLVMBuildLoad2(p->builder, lb_type(p->module, t), src.value, ""); - LLVMSetAlignment(res.value, 1); + res.value = OdinLLVMBuildLoadAligned(p, lb_type(p->module, t), src.value, 1); return res; } else { lbAddr dst = lb_add_local_generated(p, t, false); diff --git a/src/llvm_backend_stmt.cpp b/src/llvm_backend_stmt.cpp index df3d4bc03..288e7206a 100644 --- a/src/llvm_backend_stmt.cpp +++ b/src/llvm_backend_stmt.cpp @@ -2001,7 +2001,7 @@ gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res) { LLVMValueRef ptr = p->temp_callee_return_struct_memory; LLVMValueRef nptr = LLVMBuildPointerCast(p->builder, ptr, LLVMPointerType(src_type, 0), ""); LLVMBuildStore(p->builder, ret_val, nptr); - ret_val = LLVMBuildLoad2(p->builder, ret_type, ptr, ""); + ret_val = OdinLLVMBuildLoad(p, ret_type, ptr); } else { ret_val = OdinLLVMBuildTransmute(p, ret_val, ret_type); } diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index f63c42ab9..6367d0118 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -269,7 +269,7 @@ gb_internal lbValue lb_emit_transmute(lbProcedure *p, lbValue value, Type *t) { if (lb_try_update_alignment(ptr, align)) { LLVMTypeRef result_type = lb_type(p->module, t); res.value = LLVMBuildPointerCast(p->builder, ptr.value, LLVMPointerType(result_type, 0), ""); - res.value = LLVMBuildLoad2(p->builder, result_type, res.value, ""); + res.value = OdinLLVMBuildLoad(p, result_type, res.value); return res; } lbAddr addr = lb_add_local_generated(p, t, false); -- cgit v1.2.3 From b36a81ef535b55afa3630eda6ff0b94f77f6c11e Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 5 Dec 2024 10:44:53 +0000 Subject: ABI change: for indirect parameters size_of <= 16, do callee stack copy --- src/build_settings.cpp | 1 + src/llvm_backend_proc.cpp | 19 +++++++++++++++++++ src/main.cpp | 5 +++++ 3 files changed, 25 insertions(+) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 50fae93b8..15cc4f71d 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -467,6 +467,7 @@ struct BuildContext { BuildCacheData build_cache_data; bool internal_no_inline; + bool internal_by_value; bool no_threaded_checker; diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 5aee5b639..fee825a2f 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -579,6 +579,8 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { p->raw_input_parameters = array_make(permanent_allocator(), raw_input_parameters_count); LLVMGetParams(p->value, p->raw_input_parameters.data); + bool is_odin_cc = is_calling_convention_odin(ft->calling_convention); + unsigned param_index = 0; for_array(i, params->variables) { Entity *e = params->variables[i]; @@ -613,9 +615,26 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { } } else if (arg_type->kind == lbArg_Indirect) { if (e->token.string.len != 0 && !is_blank_ident(e->token.string)) { + i64 sz = type_size_of(e->type); + bool do_callee_copy = false; + + if (is_odin_cc) { + do_callee_copy = sz <= 16; + if (build_context.internal_by_value) { + do_callee_copy = true; + } + } + lbValue ptr = {}; ptr.value = LLVMGetParam(p->value, param_offset+param_index); ptr.type = alloc_type_pointer(e->type); + + if (do_callee_copy) { + lbValue new_ptr = lb_add_local_generated(p, e->type, false).addr; + lb_mem_copy_non_overlapping(p, new_ptr, ptr, lb_const_int(p->module, t_uint, sz)); + ptr = new_ptr; + } + lb_add_entity(p->module, e, ptr); lb_add_debug_param_variable(p, ptr.value, e->type, e->token, param_index+1, p->decl_block); } diff --git a/src/main.cpp b/src/main.cpp index 015269438..4d85a9e72 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -401,6 +401,7 @@ enum BuildFlagKind { BuildFlag_InternalModulePerFile, BuildFlag_InternalCached, BuildFlag_InternalNoInline, + BuildFlag_InternalByValue, BuildFlag_Tilde, @@ -612,6 +613,7 @@ gb_internal bool parse_build_flags(Array args) { add_flag(&build_flags, BuildFlag_InternalModulePerFile, str_lit("internal-module-per-file"), BuildFlagParam_None, Command_all); add_flag(&build_flags, BuildFlag_InternalCached, str_lit("internal-cached"), BuildFlagParam_None, Command_all); add_flag(&build_flags, BuildFlag_InternalNoInline, str_lit("internal-no-inline"), BuildFlagParam_None, Command_all); + add_flag(&build_flags, BuildFlag_InternalByValue, str_lit("internal-by-value"), BuildFlagParam_None, Command_all); #if ALLOW_TILDE add_flag(&build_flags, BuildFlag_Tilde, str_lit("tilde"), BuildFlagParam_None, Command__does_build); @@ -1508,6 +1510,9 @@ gb_internal bool parse_build_flags(Array args) { case BuildFlag_InternalNoInline: build_context.internal_no_inline = true; break; + case BuildFlag_InternalByValue: + build_context.internal_by_value = true; + break; case BuildFlag_Tilde: build_context.tilde_backend = true; -- cgit v1.2.3 From c8f0d27ceecaaa4d9803ae7fd0fea69ca5aff227 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Mon, 9 Dec 2024 19:12:42 +0100 Subject: compiler: remove viral `#force(_no)_inline` If a procedure was marked `#force_no_inline`, any procedure calls within it would also implicitly be. This is not expected for multiple reasons: 1. `#force(_no)_inline` on a call expr works differently than on a procedure literal. 2. Adding the attribute on it and every called proc blows up the amount of work for the inliner pass and may increase the time it takes. 3. Putting `#force_no_inline` on a procedure to keep executable size down (like we do for some map procedures), benchmark it, or find it in asm/ir has the added effect of slowing those procedures down significantly and not representing truth. --- src/llvm_backend_proc.cpp | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index fee825a2f..712e13592 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1136,10 +1136,6 @@ gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array c } } - if (inlining == ProcInlining_none) { - inlining = p->inlining; - } - Type *rt = reduce_tuple_to_single_type(results); Type *original_rt = rt; if (split_returns) { -- cgit v1.2.3 From 7b334d2bd9e881b450fb19e394d6d71840a62cf9 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 1 Jan 2025 17:26:15 +0000 Subject: Add `#branch_location` --- src/check_expr.cpp | 12 +++++++ src/entity.cpp | 1 + src/llvm_backend.hpp | 7 +++- src/llvm_backend_expr.cpp | 12 +++++-- src/llvm_backend_proc.cpp | 6 +++- src/llvm_backend_stmt.cpp | 81 ++++++++++++++++++++++++++++---------------- src/llvm_backend_utility.cpp | 8 ++--- 7 files changed, 89 insertions(+), 38 deletions(-) (limited to 'src/llvm_backend_proc.cpp') diff --git a/src/check_expr.cpp b/src/check_expr.cpp index cc9483187..fba9b8dad 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -8725,6 +8725,18 @@ gb_internal ExprKind check_basic_directive_expr(CheckerContext *c, Operand *o, A error(node, "#caller_expression may only be used as a default argument parameter"); o->type = t_string; o->mode = Addressing_Value; + } else if (name == "branch_location") { + if (!c->in_defer) { + error(node, "#branch_location may only be used within a 'defer' statement"); + } else if (c->curr_proc_decl) { + Entity *e = c->curr_proc_decl->entity; + if (e != nullptr) { + GB_ASSERT(e->kind == Entity_Procedure); + e->Procedure.uses_branch_location = true; + } + } + o->type = t_source_code_location; + o->mode = Addressing_Value; } else { if (name == "location") { init_core_source_code_location(c->checker); diff --git a/src/entity.cpp b/src/entity.cpp index 0c4a20df4..802b381f9 100644 --- a/src/entity.cpp +++ b/src/entity.cpp @@ -256,6 +256,7 @@ struct Entity { bool entry_point_only : 1; bool has_instrumentation : 1; bool is_memcpy_like : 1; + bool uses_branch_location : 1; } Procedure; struct { Array entities; diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index e84ffd1cd..8ca11bf28 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -359,6 +359,10 @@ struct lbProcedure { bool in_multi_assignment; Array raw_input_parameters; + bool uses_branch_location; + TokenPos branch_location_pos; + TokenPos curr_token_pos; + Array variadic_reuses; lbAddr variadic_reuse_base_array_ptr; @@ -444,7 +448,8 @@ gb_internal lbValue lb_emit_matrix_ev(lbProcedure *p, lbValue s, isize row, isiz gb_internal lbValue lb_emit_arith(lbProcedure *p, TokenKind op, lbValue lhs, lbValue rhs, Type *type); gb_internal lbValue lb_emit_byte_swap(lbProcedure *p, lbValue value, Type *end_type); -gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block); +gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, TokenPos pos); +gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, Ast *node); gb_internal lbValue lb_emit_transmute(lbProcedure *p, lbValue value, Type *t); gb_internal lbValue lb_emit_comp(lbProcedure *p, TokenKind op_kind, lbValue left, lbValue right); gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array const &args, ProcInlining inlining = ProcInlining_none); diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index 9c325e088..3b238bcd8 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -3502,7 +3502,13 @@ gb_internal lbValue lb_build_expr_internal(lbProcedure *p, Ast *expr) { case_ast_node(bd, BasicDirective, expr); TokenPos pos = bd->token.pos; - GB_PANIC("Non-constant basic literal %s - %.*s", token_pos_to_string(pos), LIT(bd->name.string)); + String name = bd->name.string; + if (name == "branch_location") { + GB_ASSERT(p->uses_branch_location); + String proc_name = p->entity->token.string; + return lb_emit_source_code_location_as_global(p, proc_name, p->branch_location_pos); + } + GB_PANIC("Non-constant basic literal %s - %.*s", token_pos_to_string(pos), LIT(name)); case_end; case_ast_node(i, Implicit, expr); @@ -3668,7 +3674,7 @@ gb_internal lbValue lb_build_expr_internal(lbProcedure *p, Ast *expr) { lb_emit_if(p, lb_emit_try_has_value(p, rhs), then, else_); lb_start_block(p, else_); - lb_emit_defer_stmts(p, lbDeferExit_Branch, block); + lb_emit_defer_stmts(p, lbDeferExit_Branch, block, expr); lb_emit_jump(p, block); lb_start_block(p, then); @@ -5493,7 +5499,7 @@ gb_internal lbAddr lb_build_addr_internal(lbProcedure *p, Ast *expr) { lb_emit_if(p, lb_emit_try_has_value(p, rhs), then, else_); lb_start_block(p, else_); - lb_emit_defer_stmts(p, lbDeferExit_Branch, block); + lb_emit_defer_stmts(p, lbDeferExit_Branch, block, expr); lb_emit_jump(p, block); lb_start_block(p, then); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 712e13592..7e44a0046 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -125,6 +125,10 @@ gb_internal lbProcedure *lb_create_procedure(lbModule *m, Entity *entity, bool i // map_init(&p->selector_addr, 0); // map_init(&p->tuple_fix_map, 0); + if (p->entity != nullptr && p->entity->Procedure.uses_branch_location) { + p->uses_branch_location = true; + } + if (p->is_foreign) { lb_add_foreign_library_path(p->module, entity->Procedure.foreign_library); } @@ -757,7 +761,7 @@ gb_internal void lb_end_procedure_body(lbProcedure *p) { if (p->type->Proc.result_count == 0) { instr = LLVMGetLastInstruction(p->curr_block->block); if (!lb_is_instr_terminating(instr)) { - lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr); + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, p->body); lb_set_debug_position_to_procedure_end(p); LLVMBuildRetVoid(p->builder); } diff --git a/src/llvm_backend_stmt.cpp b/src/llvm_backend_stmt.cpp index 9a5f25712..a2f0d2f4a 100644 --- a/src/llvm_backend_stmt.cpp +++ b/src/llvm_backend_stmt.cpp @@ -208,8 +208,8 @@ gb_internal void lb_open_scope(lbProcedure *p, Scope *s) { } -gb_internal void lb_close_scope(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, bool pop_stack=true) { - lb_emit_defer_stmts(p, kind, block); +gb_internal void lb_close_scope(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, Ast *node, bool pop_stack=true) { + lb_emit_defer_stmts(p, kind, block, node); GB_ASSERT(p->scope_index > 0); // NOTE(bill): Remove `context`s made in that scope @@ -721,7 +721,7 @@ gb_internal void lb_build_range_interval(lbProcedure *p, AstBinaryExpr *node, lb_build_stmt(p, rs->body); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, node->left); lb_pop_target_list(p); if (check != nullptr) { @@ -854,7 +854,7 @@ gb_internal void lb_build_range_tuple(lbProcedure *p, AstRangeStmt *rs, Scope *s lb_build_stmt(p, rs->body); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, rs->body); lb_pop_target_list(p); lb_emit_jump(p, loop); lb_start_block(p, done); @@ -976,7 +976,7 @@ gb_internal void lb_build_range_stmt_struct_soa(lbProcedure *p, AstRangeStmt *rs lb_build_stmt(p, rs->body); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, rs->body); lb_pop_target_list(p); lb_emit_jump(p, loop); lb_start_block(p, done); @@ -1192,7 +1192,7 @@ gb_internal void lb_build_range_stmt(lbProcedure *p, AstRangeStmt *rs, Scope *sc lb_build_stmt(p, rs->body); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, rs->body); lb_pop_target_list(p); lb_emit_jump(p, loop); lb_start_block(p, done); @@ -1363,7 +1363,7 @@ gb_internal void lb_build_unroll_range_stmt(lbProcedure *p, AstUnrollRangeStmt * } - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, rs->body); } gb_internal bool lb_switch_stmt_can_be_trivial_jump_table(AstSwitchStmt *ss, bool *default_found_) { @@ -1433,6 +1433,7 @@ gb_internal void lb_build_switch_stmt(lbProcedure *p, AstSwitchStmt *ss, Scope * ast_node(body, BlockStmt, ss->body); isize case_count = body->stmts.count; + Ast *default_clause = nullptr; Slice default_stmts = {}; lbBlock *default_fall = nullptr; lbBlock *default_block = nullptr; @@ -1482,6 +1483,7 @@ gb_internal void lb_build_switch_stmt(lbProcedure *p, AstSwitchStmt *ss, Scope * if (cc->list.count == 0) { // default case + default_clause = clause; default_stmts = cc->stmts; default_fall = fall; if (switch_instr == nullptr) { @@ -1552,7 +1554,7 @@ gb_internal void lb_build_switch_stmt(lbProcedure *p, AstSwitchStmt *ss, Scope * lb_push_target_list(p, ss->label, done, nullptr, fall); lb_open_scope(p, body->scope); lb_build_stmt_list(p, cc->stmts); - lb_close_scope(p, lbDeferExit_Default, body); + lb_close_scope(p, lbDeferExit_Default, body, clause); lb_pop_target_list(p); lb_emit_jump(p, done); @@ -1570,13 +1572,13 @@ gb_internal void lb_build_switch_stmt(lbProcedure *p, AstSwitchStmt *ss, Scope * lb_push_target_list(p, ss->label, done, nullptr, default_fall); lb_open_scope(p, default_block->scope); lb_build_stmt_list(p, default_stmts); - lb_close_scope(p, lbDeferExit_Default, default_block); + lb_close_scope(p, lbDeferExit_Default, default_block, default_clause); lb_pop_target_list(p); } lb_emit_jump(p, done); lb_start_block(p, done); - lb_close_scope(p, lbDeferExit_Default, done); + lb_close_scope(p, lbDeferExit_Default, done, ss->body); } gb_internal void lb_store_type_case_implicit(lbProcedure *p, Ast *clause, lbValue value, bool is_default_case) { @@ -1627,7 +1629,7 @@ gb_internal void lb_type_case_body(lbProcedure *p, Ast *label, Ast *clause, lbBl lb_push_target_list(p, label, done, nullptr, nullptr); lb_build_stmt_list(p, cc->stmts); - lb_close_scope(p, lbDeferExit_Default, body); + lb_close_scope(p, lbDeferExit_Default, body, clause); lb_pop_target_list(p); lb_emit_jump(p, done); @@ -1835,7 +1837,7 @@ gb_internal void lb_build_type_switch_stmt(lbProcedure *p, AstTypeSwitchStmt *ss lb_emit_jump(p, done); lb_start_block(p, done); - lb_close_scope(p, lbDeferExit_Default, done); + lb_close_scope(p, lbDeferExit_Default, done, ss->body); } @@ -1959,7 +1961,7 @@ gb_internal void lb_build_assignment(lbProcedure *p, Array &lvals, Slice p->in_multi_assignment = prev_in_assignment; } -gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res) { +gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res, TokenPos pos) { lbFunctionType *ft = lb_get_function_type(p->module, p->type); bool return_by_pointer = ft->ret.kind == lbArg_Indirect; bool split_returns = ft->multiple_return_original_type != nullptr; @@ -1982,7 +1984,7 @@ gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res) { LLVMBuildStore(p->builder, LLVMConstNull(p->abi_function_type->ret.type), p->return_ptr.addr.value); } - lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr); + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, pos); // Check for terminator in the defer stmts LLVMValueRef instr = LLVMGetLastInstruction(p->curr_block->block); @@ -2012,7 +2014,7 @@ gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res) { ret_val = OdinLLVMBuildTransmute(p, ret_val, ret_type); } - lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr); + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, pos); // Check for terminator in the defer stmts LLVMValueRef instr = LLVMGetLastInstruction(p->curr_block->block); @@ -2021,7 +2023,7 @@ gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res) { } } } -gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return_results) { +gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return_results, TokenPos pos) { lb_ensure_abi_function_type(p->module, p); isize return_count = p->type->Proc.result_count; @@ -2029,7 +2031,7 @@ gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return if (return_count == 0) { // No return values - lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr); + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, pos); // Check for terminator in the defer stmts LLVMValueRef instr = LLVMGetLastInstruction(p->curr_block->block); @@ -2138,11 +2140,11 @@ gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return GB_ASSERT(result_values.count-1 == result_eps.count); lb_addr_store(p, p->return_ptr, result_values[result_values.count-1]); - lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr); + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, pos); LLVMBuildRetVoid(p->builder); return; } else { - return lb_build_return_stmt_internal(p, result_values[result_values.count-1]); + return lb_build_return_stmt_internal(p, result_values[result_values.count-1], pos); } } else { @@ -2169,7 +2171,7 @@ gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return } if (return_by_pointer) { - lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr); + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, pos); LLVMBuildRetVoid(p->builder); return; } @@ -2177,13 +2179,13 @@ gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return res = lb_emit_load(p, res); } } - lb_build_return_stmt_internal(p, res); + lb_build_return_stmt_internal(p, res, pos); } gb_internal void lb_build_if_stmt(lbProcedure *p, Ast *node) { ast_node(is, IfStmt, node); lb_open_scope(p, is->scope); // Scope #1 - defer (lb_close_scope(p, lbDeferExit_Default, nullptr)); + defer (lb_close_scope(p, lbDeferExit_Default, nullptr, node)); lbBlock *then = lb_create_block(p, "if.then"); lbBlock *done = lb_create_block(p, "if.done"); @@ -2234,7 +2236,7 @@ gb_internal void lb_build_if_stmt(lbProcedure *p, Ast *node) { lb_open_scope(p, scope_of_node(is->else_stmt)); lb_build_stmt(p, is->else_stmt); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, is->else_stmt); } lb_emit_jump(p, done); @@ -2251,7 +2253,7 @@ gb_internal void lb_build_if_stmt(lbProcedure *p, Ast *node) { lb_open_scope(p, scope_of_node(is->else_stmt)); lb_build_stmt(p, is->else_stmt); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, is->else_stmt); lb_emit_jump(p, done); } @@ -2322,7 +2324,7 @@ gb_internal void lb_build_for_stmt(lbProcedure *p, Ast *node) { } lb_start_block(p, done); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, node); } gb_internal void lb_build_assign_stmt_array(lbProcedure *p, TokenKind op, lbAddr const &lhs, lbValue const &value) { @@ -2588,7 +2590,7 @@ gb_internal void lb_build_stmt(lbProcedure *p, Ast *node) { lb_open_scope(p, bs->scope); lb_build_stmt_list(p, bs->stmts); - lb_close_scope(p, lbDeferExit_Default, nullptr); + lb_close_scope(p, lbDeferExit_Default, nullptr, node); if (done != nullptr) { lb_emit_jump(p, done); @@ -2702,7 +2704,7 @@ gb_internal void lb_build_stmt(lbProcedure *p, Ast *node) { case_end; case_ast_node(rs, ReturnStmt, node); - lb_build_return_stmt(p, rs->results); + lb_build_return_stmt(p, rs->results, ast_token(node).pos); case_end; case_ast_node(is, IfStmt, node); @@ -2755,7 +2757,7 @@ gb_internal void lb_build_stmt(lbProcedure *p, Ast *node) { } } if (block != nullptr) { - lb_emit_defer_stmts(p, lbDeferExit_Branch, block); + lb_emit_defer_stmts(p, lbDeferExit_Branch, block, node); } lb_emit_jump(p, block); lb_start_block(p, lb_create_block(p, "unreachable")); @@ -2795,7 +2797,13 @@ gb_internal void lb_build_defer_stmt(lbProcedure *p, lbDefer const &d) { } } -gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block) { +gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, TokenPos pos) { + TokenPos prev_token_pos = p->branch_location_pos; + if (p->uses_branch_location) { + p->branch_location_pos = pos; + } + defer (p->branch_location_pos = prev_token_pos); + isize count = p->defer_stmts.count; isize i = count; while (i --> 0) { @@ -2822,6 +2830,21 @@ gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlo } } +gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, Ast *node) { + TokenPos pos = {}; + if (node) { + if (node->kind == Ast_BlockStmt) { + pos = ast_end_token(node).pos; + } else if (node->kind == Ast_CaseClause) { + pos = ast_end_token(node).pos; + } else { + pos = ast_token(node).pos; + } + } + return lb_emit_defer_stmts(p, kind, block, pos); +} + + gb_internal void lb_add_defer_node(lbProcedure *p, isize scope_index, Ast *stmt) { Type *pt = base_type(p->type); GB_ASSERT(pt->kind == Type_Proc); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index a2a0ba4cc..3e4393a8f 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -476,8 +476,8 @@ gb_internal lbValue lb_emit_or_else(lbProcedure *p, Ast *arg, Ast *else_expr, Ty } } -gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return_results); -gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res); +gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return_results, TokenPos pos); +gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res, TokenPos pos); gb_internal lbValue lb_emit_or_return(lbProcedure *p, Ast *arg, TypeAndValue const &tv) { lbValue lhs = {}; @@ -506,10 +506,10 @@ gb_internal lbValue lb_emit_or_return(lbProcedure *p, Ast *arg, TypeAndValue con lbValue found = map_must_get(&p->module->values, end_entity); lb_emit_store(p, found, rhs); - lb_build_return_stmt(p, {}); + lb_build_return_stmt(p, {}, ast_token(arg).pos); } else { GB_ASSERT(tuple->variables.count == 1); - lb_build_return_stmt_internal(p, rhs); + lb_build_return_stmt_internal(p, rhs, ast_token(arg).pos); } } lb_start_block(p, continue_block); -- cgit v1.2.3