diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2024-07-09 16:50:55 +0200 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2024-07-09 16:50:55 +0200 |
| commit | 7b31acd2d70d766fd31d8e7e65d0192be1daa45c (patch) | |
| tree | e347e1958b22f60999e1cd09c8ea8fa53d70d099 /core/simd | |
| parent | b83822fd354835e596ef7fb1dca85fd7ce721851 (diff) | |
Let simd/x86 pass new transmute/cast vet.
Diffstat (limited to 'core/simd')
| -rw-r--r-- | core/simd/x86/sse2.odin | 18 | ||||
| -rw-r--r-- | core/simd/x86/sse41.odin | 22 | ||||
| -rw-r--r-- | core/simd/x86/sse42.odin | 2 |
3 files changed, 21 insertions, 21 deletions
diff --git a/core/simd/x86/sse2.odin b/core/simd/x86/sse2.odin index fc2fec300..52286cbb8 100644 --- a/core/simd/x86/sse2.odin +++ b/core/simd/x86/sse2.odin @@ -35,7 +35,7 @@ _mm_add_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { } @(require_results, enable_target_feature="sse2") _mm_add_epi64 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { - return transmute(__m128i)simd.add(transmute(i64x2)a, transmute(i64x2)b) + return simd.add(a, b) } @(require_results, enable_target_feature="sse2") _mm_adds_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { @@ -118,7 +118,7 @@ _mm_sub_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { } @(require_results, enable_target_feature="sse2") _mm_sub_epi64 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { - return transmute(__m128i)simd.sub(transmute(i64x2)a, transmute(i64x2)b) + return simd.sub(a, b) } @(require_results, enable_target_feature="sse2") _mm_subs_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { @@ -229,7 +229,7 @@ _mm_slli_epi64 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> __m128i { } @(require_results, enable_target_feature="sse2") _mm_sll_epi64 :: #force_inline proc "c" (a, count: __m128i) -> __m128i { - return transmute(__m128i)psllq(transmute(i64x2)a, transmute(i64x2)count) + return psllq(a, count) } @(require_results, enable_target_feature="sse2") _mm_srai_epi16 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> __m128i { @@ -275,7 +275,7 @@ _mm_srli_epi64 :: #force_inline proc "c" (a: __m128i, $IMM8: u32) -> __m128i { } @(require_results, enable_target_feature="sse2") _mm_srl_epi64 :: #force_inline proc "c" (a, count: __m128i) -> __m128i { - return transmute(__m128i)psrlq(transmute(i64x2)a, transmute(i64x2)count) + return psrlq(a, count) } @@ -363,7 +363,7 @@ _mm_cvtsi128_si32 :: #force_inline proc "c" (a: __m128i) -> i32 { @(require_results, enable_target_feature="sse2") _mm_set_epi64x :: #force_inline proc "c" (e1, e0: i64) -> __m128i { - return transmute(__m128i)i64x2{e0, e1} + return i64x2{e0, e1} } @(require_results, enable_target_feature="sse2") _mm_set_epi32 :: #force_inline proc "c" (e3, e2, e1, e0: i32) -> __m128i { @@ -453,7 +453,7 @@ _mm_stream_si32 :: #force_inline proc "c" (mem_addr: ^i32, a: i32) { @(require_results, enable_target_feature="sse2") _mm_move_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i { zero := _mm_setzero_si128() - return transmute(__m128i)simd.shuffle(transmute(i64x2)a, transmute(i64x2)zero, 0, 2) + return simd.shuffle(a, zero, 0, 2) } @@ -545,7 +545,7 @@ _mm_unpackhi_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { } @(require_results, enable_target_feature="sse2") _mm_unpackhi_epi64 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { - return transmute(__m128i)simd.shuffle(transmute(i64x2)a, transmute(i64x2)b, 1, 3) + return simd.shuffle(a, b, 1, 3) } @(require_results, enable_target_feature="sse2") _mm_unpacklo_epi8 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { @@ -565,7 +565,7 @@ _mm_unpacklo_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { } @(require_results, enable_target_feature="sse2") _mm_unpacklo_epi64 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { - return transmute(__m128i)simd.shuffle(transmute(i64x2)a, transmute(i64x2)b, 0, 2) + return simd.shuffle(a, b, 0, 2) } @@ -1023,7 +1023,7 @@ when ODIN_ARCH == .amd64 { } @(require_results, enable_target_feature="sse2") _mm_cvtsi128_si64 :: #force_inline proc "c" (a: __m128i) -> i64 { - return simd.extract(transmute(i64x2)a, 0) + return simd.extract(a, 0) } @(require_results, enable_target_feature="sse2") _mm_cvtsi128_si64x :: #force_inline proc "c" (a: __m128i) -> i64 { diff --git a/core/simd/x86/sse41.odin b/core/simd/x86/sse41.odin index 0b9c5986f..c2c1abc2d 100644 --- a/core/simd/x86/sse41.odin +++ b/core/simd/x86/sse41.odin @@ -106,7 +106,7 @@ _mm_packus_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { } @(require_results, enable_target_feature="sse4.1") _mm_cmpeq_epi64 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { - return transmute(__m128i)simd.lanes_eq(transmute(i64x2)a, transmute(i64x2)b) + return transmute(__m128i)simd.lanes_eq(a, b) } @(require_results, enable_target_feature="sse4.1") _mm_cvtepi8_epi16 :: #force_inline proc "c" (a: __m128i) -> __m128i { @@ -124,7 +124,7 @@ _mm_cvtepi8_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i { _mm_cvtepi8_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i { x := transmute(i8x16)a y := simd.shuffle(x, x, 0, 1) - return transmute(__m128i)i64x2(y) + return i64x2(y) } @(require_results, enable_target_feature="sse4.1") _mm_cvtepi16_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i { @@ -136,13 +136,13 @@ _mm_cvtepi16_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i { _mm_cvtepi16_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i { x := transmute(i16x8)a y := simd.shuffle(x, x, 0, 1) - return transmute(__m128i)i64x2(y) + return i64x2(y) } @(require_results, enable_target_feature="sse4.1") _mm_cvtepi32_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i { x := transmute(i32x4)a y := simd.shuffle(x, x, 0, 1) - return transmute(__m128i)i64x2(y) + return i64x2(y) } @(require_results, enable_target_feature="sse4.1") _mm_cvtepu8_epi16 :: #force_inline proc "c" (a: __m128i) -> __m128i { @@ -160,7 +160,7 @@ _mm_cvtepu8_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i { _mm_cvtepu8_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i { x := transmute(u8x16)a y := simd.shuffle(x, x, 0, 1) - return transmute(__m128i)i64x2(y) + return i64x2(y) } @(require_results, enable_target_feature="sse4.1") _mm_cvtepu16_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i { @@ -172,13 +172,13 @@ _mm_cvtepu16_epi32 :: #force_inline proc "c" (a: __m128i) -> __m128i { _mm_cvtepu16_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i { x := transmute(u16x8)a y := simd.shuffle(x, x, 0, 1) - return transmute(__m128i)i64x2(y) + return i64x2(y) } @(require_results, enable_target_feature="sse4.1") _mm_cvtepu32_epi64 :: #force_inline proc "c" (a: __m128i) -> __m128i { x := transmute(u32x4)a y := simd.shuffle(x, x, 0, 1) - return transmute(__m128i)i64x2(y) + return i64x2(y) } @(require_results, enable_target_feature="sse4.1") _mm_dp_pd :: #force_inline proc "c" (a, b: __m128d, $IMM8: u8) -> __m128d { @@ -242,7 +242,7 @@ _mm_minpos_epu16 :: #force_inline proc "c" (a: __m128i) -> __m128i { } @(require_results, enable_target_feature="sse4.1") _mm_mul_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { - return transmute(__m128i)pmuldq(transmute(i32x4)a, transmute(i32x4)b) + return pmuldq(transmute(i32x4)a, transmute(i32x4)b) } @(require_results, enable_target_feature="sse4.1") _mm_mullo_epi32 :: #force_inline proc "c" (a, b: __m128i) -> __m128i { @@ -254,15 +254,15 @@ _mm_mpsadbw_epu8 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u8) -> __m128i } @(require_results, enable_target_feature="sse4.1") _mm_testz_si128 :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 { - return ptestz(transmute(i64x2)a, transmute(i64x2)mask) + return ptestz(a, mask) } @(require_results, enable_target_feature="sse4.1") _mm_testc_si128 :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 { - return ptestc(transmute(i64x2)a, transmute(i64x2)mask) + return ptestc(a, mask) } @(require_results, enable_target_feature="sse4.1") _mm_testnzc_si128 :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 { - return ptestnzc(transmute(i64x2)a, transmute(i64x2)mask) + return ptestnzc(a, mask) } @(require_results, enable_target_feature="sse4.1") _mm_test_all_zeros :: #force_inline proc "c" (a: __m128i, mask: __m128i) -> i32 { diff --git a/core/simd/x86/sse42.odin b/core/simd/x86/sse42.odin index 621346342..7a674176b 100644 --- a/core/simd/x86/sse42.odin +++ b/core/simd/x86/sse42.odin @@ -94,7 +94,7 @@ _mm_crc32_u32 :: #force_inline proc "c" (crc: u32, v: u32) -> u32 { } @(require_results, enable_target_feature="sse4.2") _mm_cmpgt_epi64 :: #force_inline proc "c" (a: __m128i, b: __m128i) -> __m128i { - return transmute(__m128i)simd.lanes_gt(transmute(i64x2)a, transmute(i64x2)b) + return transmute(__m128i)simd.lanes_gt(a, b) } when ODIN_ARCH == .amd64 { |