aboutsummaryrefslogtreecommitdiff
path: root/core/simd/x86/sse42.odin
blob: 1a5cb3f504ecb11a00b42ec843647ae73fd255a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#+build i386, amd64
package simd_x86

import "core:simd"

_SIDD_UBYTE_OPS                :: 0b0000_0000
_SIDD_UWORD_OPS                :: 0b0000_0001
_SIDD_SBYTE_OPS                :: 0b0000_0010
_SIDD_SWORD_OPS                :: 0b0000_0011

_SIDD_CMP_EQUAL_ANY            :: 0b0000_0000
_SIDD_CMP_RANGES               :: 0b0000_0100
_SIDD_CMP_EQUAL_EACH           :: 0b0000_1000
_SIDD_CMP_EQUAL_ORDERED        :: 0b0000_1100

_SIDD_POSITIVE_POLARITY        :: 0b0000_0000
_SIDD_NEGATIVE_POLARITY        :: 0b0001_0000
_SIDD_MASKED_POSITIVE_POLARITY :: 0b0010_0000
_SIDD_MASKED_NEGATIVE_POLARITY :: 0b0011_0000

_SIDD_LEAST_SIGNIFICANT        :: 0b0000_0000
_SIDD_MOST_SIGNIFICANT         :: 0b0100_0000

_SIDD_BIT_MASK                 :: 0b0000_0000
_SIDD_UNIT_MASK                :: 0b0100_0000

@(require_results, enable_target_feature="sse4.2")
_mm_cmpistrm :: #force_inline proc "c" (a: __m128i, b: __m128i, $IMM8: i8) -> __m128i {
	return transmute(__m128i)pcmpistrm128(transmute(i8x16)a, transmute(i8x16)b, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpistri :: #force_inline proc "c" (a: __m128i, b: __m128i, $IMM8: i8) -> i32 {
	return pcmpistri128(transmute(i8x16)a, transmute(i8x16)b, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpistrz :: #force_inline proc "c" (a: __m128i, b: __m128i, $IMM8: i8) -> i32 {
	return pcmpistriz128(transmute(i8x16)a, transmute(i8x16)b, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpistrc :: #force_inline proc "c" (a: __m128i, b: __m128i, $IMM8: i8) -> i32 {
	return pcmpistric128(transmute(i8x16)a, transmute(i8x16)b, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpistrs :: #force_inline proc "c" (a: __m128i, b: __m128i, $IMM8: i8) -> i32 {
	return pcmpistris128(transmute(i8x16)a, transmute(i8x16)b, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpistro :: #force_inline proc "c" (a: __m128i, b: __m128i, $IMM8: i8) -> i32 {
	return pcmpistrio128(transmute(i8x16)a, transmute(i8x16)b, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpistra :: #force_inline proc "c" (a: __m128i, b: __m128i, $IMM8: i8) -> i32 {
	return pcmpistria128(transmute(i8x16)a, transmute(i8x16)b, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpestrm :: #force_inline proc "c" (a: __m128i, la: i32, b: __m128i, lb: i32, $IMM8: i8) -> __m128i {
	return transmute(__m128i)pcmpestrm128(transmute(i8x16)a, la, transmute(i8x16)b, lb, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpestri :: #force_inline proc "c" (a: __m128i, la: i32, b: __m128i, lb: i32, $IMM8: i8) -> i32 {
	return pcmpestri128(transmute(i8x16)a, la, transmute(i8x16)b, lb, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpestrz :: #force_inline proc "c" (a: __m128i, la: i32, b: __m128i, lb: i32, $IMM8: i8) -> i32 {
	return pcmpestriz128(transmute(i8x16)a, la, transmute(i8x16)b, lb, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpestrc :: #force_inline proc "c" (a: __m128i, la: i32, b: __m128i, lb: i32, $IMM8: i8) -> i32 {
	return pcmpestric128(transmute(i8x16)a, la, transmute(i8x16)b, lb, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpestrs :: #force_inline proc "c" (a: __m128i, la: i32, b: __m128i, lb: i32, $IMM8: i8) -> i32 {
	return pcmpestris128(transmute(i8x16)a, la, transmute(i8x16)b, lb, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpestro :: #force_inline proc "c" (a: __m128i, la: i32, b: __m128i, lb: i32, $IMM8: i8) -> i32 {
	return pcmpestrio128(transmute(i8x16)a, la, transmute(i8x16)b, lb, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpestra :: #force_inline proc "c" (a: __m128i, la: i32, b: __m128i, lb: i32, $IMM8: i8) -> i32 {
	return pcmpestria128(transmute(i8x16)a, la, transmute(i8x16)b, lb, IMM8)
}
@(require_results, enable_target_feature="sse4.2")
_mm_crc32_u8 :: #force_inline proc "c" (crc: u32, v: u8) -> u32 {
	return crc32_32_8(crc, v)
}
@(require_results, enable_target_feature="sse4.2")
_mm_crc32_u16 :: #force_inline proc "c" (crc: u32, v: u16) -> u32 {
	return crc32_32_16(crc, v)
}
@(require_results, enable_target_feature="sse4.2")
_mm_crc32_u32 :: #force_inline proc "c" (crc: u32, v: u32) -> u32 {
	return crc32_32_32(crc, v)
}
@(require_results, enable_target_feature="sse4.2")
_mm_cmpgt_epi64 :: #force_inline proc "c" (a: __m128i, b: __m128i) -> __m128i {
	return transmute(__m128i)simd.lanes_gt(a, b)
}

when ODIN_ARCH == .amd64 {
	@(require_results, enable_target_feature="sse4.2")
	_mm_crc32_u64 :: #force_inline proc "c" (crc: u64, v: u64) -> u64 {
		return crc32_64_64(crc, v)
	}
}

@(private, default_calling_convention="none")
foreign _ {
	// SSE 4.2 string and text comparison ops
	@(link_name="llvm.x86.sse42.pcmpestrm128")
	pcmpestrm128 :: proc(a: i8x16, la: i32, b: i8x16, lb: i32, #const imm8: i8) -> u8x16 ---
	@(link_name="llvm.x86.sse42.pcmpestri128")
	pcmpestri128 :: proc(a: i8x16, la: i32, b: i8x16, lb: i32, #const imm8: i8) -> i32 ---
	@(link_name="llvm.x86.sse42.pcmpestriz128")
	pcmpestriz128 :: proc(a: i8x16, la: i32, b: i8x16, lb: i32, #const imm8: i8) -> i32 ---
	@(link_name="llvm.x86.sse42.pcmpestric128")
	pcmpestric128 :: proc(a: i8x16, la: i32, b: i8x16, lb: i32, #const imm8: i8) -> i32 ---
	@(link_name="llvm.x86.sse42.pcmpestris128")
	pcmpestris128 :: proc(a: i8x16, la: i32, b: i8x16, lb: i32, #const imm8: i8) -> i32 ---
	@(link_name="llvm.x86.sse42.pcmpestrio128")
	pcmpestrio128 :: proc(a: i8x16, la: i32, b: i8x16, lb: i32, #const imm8: i8) -> i32 ---
	@(link_name="llvm.x86.sse42.pcmpestria128")
	pcmpestria128 :: proc(a: i8x16, la: i32, b: i8x16, lb: i32, #const imm8: i8) -> i32 ---
	@(link_name="llvm.x86.sse42.pcmpistrm128")
	pcmpistrm128 :: proc(a, b: i8x16, #const imm8: i8) -> i8x16 ---
	@(link_name="llvm.x86.sse42.pcmpistri128")
	pcmpistri128 :: proc(a, b: i8x16, #const imm8: i8) -> i32 ---
	@(link_name="llvm.x86.sse42.pcmpistriz128")
	pcmpistriz128 :: proc(a, b: i8x16, #const imm8: i8) -> i32 ---
	@(link_name="llvm.x86.sse42.pcmpistric128")
	pcmpistric128 :: proc(a, b: i8x16, #const imm8: i8) -> i32 ---
	@(link_name="llvm.x86.sse42.pcmpistris128")
	pcmpistris128 :: proc(a, b: i8x16, #const imm8: i8) -> i32 ---
	@(link_name="llvm.x86.sse42.pcmpistrio128")
	pcmpistrio128 :: proc(a, b: i8x16, #const imm8: i8) -> i32 ---
	@(link_name="llvm.x86.sse42.pcmpistria128")
	pcmpistria128 :: proc(a, b: i8x16, #const imm8: i8) -> i32 ---
	// SSE 4.2 CRC instructions
	@(link_name="llvm.x86.sse42.crc32.32.8")
	crc32_32_8 :: proc(crc: u32, v: u8) -> u32 ---
	@(link_name="llvm.x86.sse42.crc32.32.16")
	crc32_32_16 :: proc(crc: u32, v: u16) -> u32 ---
	@(link_name="llvm.x86.sse42.crc32.32.32")
	crc32_32_32 :: proc(crc: u32, v: u32) -> u32 ---

	// AMD64 Only
	@(link_name="llvm.x86.sse42.crc32.64.64")
	crc32_64_64 :: proc(crc: u64, v: u64) -> u64 ---
}