aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2022-05-28 15:54:41 +0100
committergingerBill <bill@gingerbill.org>2022-05-28 15:54:41 +0100
commit910799cc5ff8032c1ff12709fced65d98b37902d (patch)
treeb05331e03a5a7c52dec310a27a40beb7c636d35f
parentc60d7842cd829371129f52064dd8722516e5d197 (diff)
Add `cpu_features` for `core:simd/x86`
-rw-r--r--core/simd/x86/cpu.odin94
1 files changed, 94 insertions, 0 deletions
diff --git a/core/simd/x86/cpu.odin b/core/simd/x86/cpu.odin
new file mode 100644
index 000000000..14e90c0f0
--- /dev/null
+++ b/core/simd/x86/cpu.odin
@@ -0,0 +1,94 @@
+//+build i386, amd64
+package simd_x86
+
+import "core:intrinsics"
+
+// cpuid :: proc(ax, cx: u32) -> (eax, ebc, ecx, edx: u32) ---
+cpuid :: intrinsics.x86_cpuid
+
+// xgetbv :: proc(cx: u32) -> (eax, edx: u32) ---
+xgetbv :: intrinsics.x86_xgetbv
+
+
+CPU_Feature :: enum u64 {
+ aes, // AES hardware implementation (AES NI)
+ adx, // Multi-precision add-carry instruction extensions
+ avx, // Advanced vector extension
+ avx2, // Advanced vector extension 2
+ bmi1, // Bit manipulation instruction set 1
+ bmi2, // Bit manipulation instruction set 2
+ erms, // Enhanced REP for MOVSB and STOSB
+ fma, // Fused-multiply-add instructions
+ os_xsave, // OS supports XSAVE/XRESTOR for saving/restoring XMM registers.
+ pclmulqdq, // PCLMULQDQ instruction - most often used for AES-GCM
+ popcnt, // Hamming weight instruction POPCNT.
+ rdrand, // RDRAND instruction (on-chip random number generator)
+ rdseed, // RDSEED instruction (on-chip random number generator)
+ sse2, // Streaming SIMD extension 2 (always available on amd64)
+ sse3, // Streaming SIMD extension 3
+ ssse3, // Supplemental streaming SIMD extension 3
+ sse41, // Streaming SIMD extension 4 and 4.1
+ sse42, // Streaming SIMD extension 4 and 4.2
+}
+
+CPU_Features :: distinct bit_set[CPU_Feature; u64]
+
+cpu_features: Maybe(CPU_Features)
+
+@(init, private)
+init_cpu_features :: proc "c" () {
+ is_set :: #force_inline proc "c" (hwc: u32, value: u32) -> bool {
+ return hwc&value != 0
+ }
+ try_set :: #force_inline proc "c" (set: ^CPU_Features, feature: CPU_Feature, hwc: u32, value: u32) {
+ if is_set(hwc, value) {
+ set^ += {feature}
+ }
+ }
+
+ max_id, _, _, _ := cpuid(0, 0)
+ if max_id < 1 {
+ return
+ }
+
+ set: CPU_Features
+
+ _, _, ecx1, edx1 := cpuid(1, 0)
+
+ try_set(&set, .sse2, 26, edx1)
+ try_set(&set, .sse3, 0, ecx1)
+ try_set(&set, .pclmulqdq, 1, ecx1)
+ try_set(&set, .ssse3, 9, ecx1)
+ try_set(&set, .fma, 12, ecx1)
+ try_set(&set, .sse41, 19, ecx1)
+ try_set(&set, .sse42, 20, ecx1)
+ try_set(&set, .popcnt, 23, ecx1)
+ try_set(&set, .aes, 25, ecx1)
+ try_set(&set, .os_xsave, 27, ecx1)
+ try_set(&set, .rdrand, 30, ecx1)
+
+ os_supports_avx := false
+ if .os_xsave in set {
+ eax, _ := xgetbv(0)
+ os_supports_avx = is_set(1, eax) && is_set(2, eax)
+ }
+ if os_supports_avx {
+ try_set(&set, .avx, 28, ecx1)
+ }
+
+ if max_id < 7 {
+ return
+ }
+
+ _, ebx7, _, _ := cpuid(7, 0)
+ try_set(&set, .bmi1, 3, ebx7)
+ if os_supports_avx {
+ try_set(&set, .avx2, 5, ebx7)
+ }
+ try_set(&set, .bmi2, 8, ebx7)
+ try_set(&set, .erms, 9, ebx7)
+ try_set(&set, .rdseed, 18, ebx7)
+ try_set(&set, .adx, 19, ebx7)
+
+ cpu_features = set
+}