1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
|
package simd
import "base:builtin"
import "base:intrinsics"
// 128-bit vector aliases
u8x16 :: #simd[16]u8
i8x16 :: #simd[16]i8
u16x8 :: #simd[8]u16
i16x8 :: #simd[8]i16
u32x4 :: #simd[4]u32
i32x4 :: #simd[4]i32
u64x2 :: #simd[2]u64
i64x2 :: #simd[2]i64
f32x4 :: #simd[4]f32
f64x2 :: #simd[2]f64
boolx16 :: #simd[16]bool
b8x16 :: #simd[16]b8
b16x8 :: #simd[8]b16
b32x4 :: #simd[4]b32
b64x2 :: #simd[2]b64
// 256-bit vector aliases
u8x32 :: #simd[32]u8
i8x32 :: #simd[32]i8
u16x16 :: #simd[16]u16
i16x16 :: #simd[16]i16
u32x8 :: #simd[8]u32
i32x8 :: #simd[8]i32
u64x4 :: #simd[4]u64
i64x4 :: #simd[4]i64
f32x8 :: #simd[8]f32
f64x4 :: #simd[4]f64
boolx32 :: #simd[32]bool
b8x32 :: #simd[32]b8
b16x16 :: #simd[16]b16
b32x8 :: #simd[8]b32
b64x4 :: #simd[4]b64
// 512-bit vector aliases
u8x64 :: #simd[64]u8
i8x64 :: #simd[64]i8
u16x32 :: #simd[32]u16
i16x32 :: #simd[32]i16
u32x16 :: #simd[16]u32
i32x16 :: #simd[16]i32
u64x8 :: #simd[8]u64
i64x8 :: #simd[8]i64
f32x16 :: #simd[16]f32
f64x8 :: #simd[8]f64
boolx64 :: #simd[64]bool
b8x64 :: #simd[64]b8
b16x32 :: #simd[32]b16
b32x16 :: #simd[16]b32
b64x8 :: #simd[8]b64
add :: intrinsics.simd_add
sub :: intrinsics.simd_sub
mul :: intrinsics.simd_mul
div :: intrinsics.simd_div // floats only
// Keeps Odin's Behaviour
// (x << y) if y <= mask else 0
shl :: intrinsics.simd_shl
shr :: intrinsics.simd_shr
// Similar to C's Behaviour
// x << (y & mask)
shl_masked :: intrinsics.simd_shl_masked
shr_masked :: intrinsics.simd_shr_masked
// Saturation Arithmetic
add_sat :: intrinsics.simd_add_sat
sub_sat :: intrinsics.simd_sub_sat
bit_and :: intrinsics.simd_bit_and
bit_or :: intrinsics.simd_bit_or
bit_xor :: intrinsics.simd_bit_xor
bit_and_not :: intrinsics.simd_bit_and_not
neg :: intrinsics.simd_neg
abs :: intrinsics.simd_abs
min :: intrinsics.simd_min
max :: intrinsics.simd_max
clamp :: intrinsics.simd_clamp
// Return an unsigned integer of the same size as the input type
// NOT A BOOLEAN
// element-wise:
// false => 0x00...00
// true => 0xff...ff
lanes_eq :: intrinsics.simd_lanes_eq
lanes_ne :: intrinsics.simd_lanes_ne
lanes_lt :: intrinsics.simd_lanes_lt
lanes_le :: intrinsics.simd_lanes_le
lanes_gt :: intrinsics.simd_lanes_gt
lanes_ge :: intrinsics.simd_lanes_ge
// extract :: proc(a: #simd[N]T, idx: uint) -> T
extract :: intrinsics.simd_extract
// replace :: proc(a: #simd[N]T, idx: uint, elem: T) -> #simd[N]T
replace :: intrinsics.simd_replace
reduce_add_ordered :: intrinsics.simd_reduce_add_ordered
reduce_mul_ordered :: intrinsics.simd_reduce_mul_ordered
reduce_min :: intrinsics.simd_reduce_min
reduce_max :: intrinsics.simd_reduce_max
reduce_and :: intrinsics.simd_reduce_and
reduce_or :: intrinsics.simd_reduce_or
reduce_xor :: intrinsics.simd_reduce_xor
// swizzle :: proc(a: #simd[N]T, indices: ..int) -> #simd[len(indices)]T
swizzle :: builtin.swizzle
// shuffle :: proc(a, b: #simd[N]T, indices: #simd[max 2*N]u32) -> #simd[len(indices)]T
shuffle :: intrinsics.simd_shuffle
// select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T
select :: intrinsics.simd_select
sqrt :: intrinsics.sqrt
ceil :: intrinsics.simd_ceil
floor :: intrinsics.simd_floor
trunc :: intrinsics.simd_trunc
nearest :: intrinsics.simd_nearest
to_bits :: intrinsics.simd_to_bits
lanes_reverse :: intrinsics.simd_lanes_reverse
lanes_rotate_left :: intrinsics.simd_lanes_rotate_left
lanes_rotate_right :: intrinsics.simd_lanes_rotate_right
count_ones :: intrinsics.count_ones
count_zeros :: intrinsics.count_zeros
count_trailing_zeros :: intrinsics.count_trailing_zeros
count_leading_zeros :: intrinsics.count_leading_zeros
reverse_bits :: intrinsics.reverse_bits
fused_mul_add :: intrinsics.fused_mul_add
fma :: intrinsics.fused_mul_add
to_array_ptr :: #force_inline proc "contextless" (v: ^#simd[$LANES]$E) -> ^[LANES]E {
return (^[LANES]E)(v)
}
to_array :: #force_inline proc "contextless" (v: #simd[$LANES]$E) -> [LANES]E {
return transmute([LANES]E)(v)
}
from_array :: #force_inline proc "contextless" (v: $A/[$LANES]$E) -> #simd[LANES]E {
return transmute(#simd[LANES]E)v
}
from_slice :: proc($T: typeid/#simd[$LANES]$E, slice: []E) -> T {
assert(len(slice) >= LANES, "slice length must be a least the number of lanes")
array: [LANES]E
#no_bounds_check for i in 0..<LANES {
array[i] = slice[i]
}
return transmute(T)array
}
bit_not :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_integer(E) {
return xor(v, T(~E(0)))
}
copysign :: #force_inline proc "contextless" (v, sign: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_float(E) {
neg_zero := to_bits(T(-0.0))
sign_bit := to_bits(sign) & neg_zero
magnitude := to_bits(v) &~ neg_zero
return transmute(T)(sign_bit|magnitude)
}
signum :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_float(E) {
is_nan := lanes_ne(v, v)
return select(is_nan, v, copysign(T(1), v))
}
recip :: #force_inline proc "contextless" (v: $T/#simd[$LANES]$E) -> T where intrinsics.type_is_float(E) {
return T(1) / v
}
|