aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYawning Angel <yawning@schwanenlied.me>2024-05-20 21:46:18 +0900
committerYawning Angel <yawning@schwanenlied.me>2024-06-01 22:55:42 +0900
commitcba58924a895822d8160c957b0e859a358a29391 (patch)
treec881b3ce40af916ce0e501be43aa465fbe0a4ad4
parentf49575f1fbb0009b57d98cdacb90f2fed6b2c075 (diff)
core/crypto/_aes: 64-bit portable implementation
-rw-r--r--core/crypto/_aes/aes.odin22
-rw-r--r--core/crypto/_aes/ct64/api.odin96
-rw-r--r--core/crypto/_aes/ct64/ct64.odin265
-rw-r--r--core/crypto/_aes/ct64/ct64_dec.odin135
-rw-r--r--core/crypto/_aes/ct64/ct64_enc.odin95
-rw-r--r--core/crypto/_aes/ct64/ct64_keysched.odin179
-rw-r--r--core/crypto/_aes/ct64/helpers.odin75
7 files changed, 867 insertions, 0 deletions
diff --git a/core/crypto/_aes/aes.odin b/core/crypto/_aes/aes.odin
new file mode 100644
index 000000000..74906fcd4
--- /dev/null
+++ b/core/crypto/_aes/aes.odin
@@ -0,0 +1,22 @@
+package _aes
+
+// KEY_SIZE_128 is the AES-128 key size in bytes.
+KEY_SIZE_128 :: 16
+// KEY_SIZE_192 is the AES-192 key size in bytes.
+KEY_SIZE_192 :: 24
+// KEY_SIZE_256 is the AES-256 key size in bytes.
+KEY_SIZE_256 :: 32
+
+// BLOCK_SIZE is the AES block size in bytes.
+BLOCK_SIZE :: 16
+
+
+// ROUNDS_128 is the number of rounds for AES-128.
+ROUNDS_128 :: 10
+// ROUNDS_192 is the number of rounds for AES-192.
+ROUNDS_192 :: 12
+// ROUNDS_256 is the number of rounds for AES-256.
+ROUNDS_256 :: 14
+
+// RCON is the AES keyschedule round constants.
+RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36}
diff --git a/core/crypto/_aes/ct64/api.odin b/core/crypto/_aes/ct64/api.odin
new file mode 100644
index 000000000..ae624971c
--- /dev/null
+++ b/core/crypto/_aes/ct64/api.odin
@@ -0,0 +1,96 @@
+package aes_ct64
+
+import "base:intrinsics"
+import "core:mem"
+
+STRIDE :: 4
+
+// Context is a keyed AES (ECB) instance.
+Context :: struct {
+ _sk_exp: [120]u64,
+ _num_rounds: int,
+ _is_initialized: bool,
+}
+
+// init initializes a context for AES with the provided key.
+init :: proc(ctx: ^Context, key: []byte) {
+ skey: [30]u64 = ---
+
+ ctx._num_rounds = keysched(skey[:], key)
+ skey_expand(ctx._sk_exp[:], skey[:], ctx._num_rounds)
+ ctx._is_initialized = true
+}
+
+// encrypt_block sets `dst` to `AES-ECB-Encrypt(src)`.
+encrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
+ assert(ctx._is_initialized)
+
+ q: [8]u64
+ load_blockx1(&q, src)
+ _encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
+ store_blockx1(dst, &q)
+}
+
+// encrypt_block sets `dst` to `AES-ECB-Decrypt(src)`.
+decrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
+ assert(ctx._is_initialized)
+
+ q: [8]u64
+ load_blockx1(&q, src)
+ _decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
+ store_blockx1(dst, &q)
+}
+
+// encrypt_blocks sets `dst` to `AES-ECB-Encrypt(src[0], .. src[n])`.
+encrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
+ assert(ctx._is_initialized)
+
+ q: [8]u64 = ---
+ src, dst := src, dst
+
+ n := len(src)
+ for n > 4 {
+ load_blocks(&q, src[0:4])
+ _encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
+ store_blocks(dst[0:4], &q)
+
+ src = src[4:]
+ dst = dst[4:]
+ n -= 4
+ }
+ if n > 0 {
+ load_blocks(&q, src)
+ _encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
+ store_blocks(dst, &q)
+ }
+}
+
+// decrypt_blocks sets dst to `AES-ECB-Decrypt(src[0], .. src[n])`.
+decrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
+ assert(ctx._is_initialized)
+
+ q: [8]u64 = ---
+ src, dst := src, dst
+
+ n := len(src)
+ for n > 4 {
+ load_blocks(&q, src[0:4])
+ _decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
+ store_blocks(dst[0:4], &q)
+
+ src = src[4:]
+ dst = dst[4:]
+ n -= 4
+ }
+ if n > 0 {
+ load_blocks(&q, src)
+ _decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
+ store_blocks(dst, &q)
+ }
+}
+
+// reset sanitizes the Context. The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
+ mem.zero_explicit(ctx, size_of(ctx))
+}
diff --git a/core/crypto/_aes/ct64/ct64.odin b/core/crypto/_aes/ct64/ct64.odin
new file mode 100644
index 000000000..f198cab81
--- /dev/null
+++ b/core/crypto/_aes/ct64/ct64.odin
@@ -0,0 +1,265 @@
+// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package aes_ct64
+
+import "base:intrinsics"
+
+// Bitsliced AES for 64-bit general purpose (integer) registers. Each
+// invocation will process up to 4 blocks at a time. This implementation
+// is derived from the BearSSL ct64 code, and distributed under a 1-clause
+// BSD license with permission from the original author.
+//
+// WARNING: "hic sunt dracones"
+//
+// This package also deliberately exposes enough internals to be able to
+// function as a replacement for `AESENC` and `AESDEC` from AES-NI, to
+// allow the implementation of non-AES primitives that use the AES round
+// function such as AEGIS and Deoxys-II. This should ONLY be done when
+// implementing something other than AES itself.
+
+sub_bytes :: proc "contextless" (q: ^[8]u64) {
+ // This S-box implementation is a straightforward translation of
+ // the circuit described by Boyar and Peralta in "A new
+ // combinational logic minimization technique with applications
+ // to cryptology" (https://eprint.iacr.org/2009/191.pdf).
+ //
+ // Note that variables x* (input) and s* (output) are numbered
+ // in "reverse" order (x0 is the high bit, x7 is the low bit).
+
+ x0 := q[7]
+ x1 := q[6]
+ x2 := q[5]
+ x3 := q[4]
+ x4 := q[3]
+ x5 := q[2]
+ x6 := q[1]
+ x7 := q[0]
+
+ // Top linear transformation.
+ y14 := x3 ~ x5
+ y13 := x0 ~ x6
+ y9 := x0 ~ x3
+ y8 := x0 ~ x5
+ t0 := x1 ~ x2
+ y1 := t0 ~ x7
+ y4 := y1 ~ x3
+ y12 := y13 ~ y14
+ y2 := y1 ~ x0
+ y5 := y1 ~ x6
+ y3 := y5 ~ y8
+ t1 := x4 ~ y12
+ y15 := t1 ~ x5
+ y20 := t1 ~ x1
+ y6 := y15 ~ x7
+ y10 := y15 ~ t0
+ y11 := y20 ~ y9
+ y7 := x7 ~ y11
+ y17 := y10 ~ y11
+ y19 := y10 ~ y8
+ y16 := t0 ~ y11
+ y21 := y13 ~ y16
+ y18 := x0 ~ y16
+
+ // Non-linear section.
+ t2 := y12 & y15
+ t3 := y3 & y6
+ t4 := t3 ~ t2
+ t5 := y4 & x7
+ t6 := t5 ~ t2
+ t7 := y13 & y16
+ t8 := y5 & y1
+ t9 := t8 ~ t7
+ t10 := y2 & y7
+ t11 := t10 ~ t7
+ t12 := y9 & y11
+ t13 := y14 & y17
+ t14 := t13 ~ t12
+ t15 := y8 & y10
+ t16 := t15 ~ t12
+ t17 := t4 ~ t14
+ t18 := t6 ~ t16
+ t19 := t9 ~ t14
+ t20 := t11 ~ t16
+ t21 := t17 ~ y20
+ t22 := t18 ~ y19
+ t23 := t19 ~ y21
+ t24 := t20 ~ y18
+
+ t25 := t21 ~ t22
+ t26 := t21 & t23
+ t27 := t24 ~ t26
+ t28 := t25 & t27
+ t29 := t28 ~ t22
+ t30 := t23 ~ t24
+ t31 := t22 ~ t26
+ t32 := t31 & t30
+ t33 := t32 ~ t24
+ t34 := t23 ~ t33
+ t35 := t27 ~ t33
+ t36 := t24 & t35
+ t37 := t36 ~ t34
+ t38 := t27 ~ t36
+ t39 := t29 & t38
+ t40 := t25 ~ t39
+
+ t41 := t40 ~ t37
+ t42 := t29 ~ t33
+ t43 := t29 ~ t40
+ t44 := t33 ~ t37
+ t45 := t42 ~ t41
+ z0 := t44 & y15
+ z1 := t37 & y6
+ z2 := t33 & x7
+ z3 := t43 & y16
+ z4 := t40 & y1
+ z5 := t29 & y7
+ z6 := t42 & y11
+ z7 := t45 & y17
+ z8 := t41 & y10
+ z9 := t44 & y12
+ z10 := t37 & y3
+ z11 := t33 & y4
+ z12 := t43 & y13
+ z13 := t40 & y5
+ z14 := t29 & y2
+ z15 := t42 & y9
+ z16 := t45 & y14
+ z17 := t41 & y8
+
+ // Bottom linear transformation.
+ t46 := z15 ~ z16
+ t47 := z10 ~ z11
+ t48 := z5 ~ z13
+ t49 := z9 ~ z10
+ t50 := z2 ~ z12
+ t51 := z2 ~ z5
+ t52 := z7 ~ z8
+ t53 := z0 ~ z3
+ t54 := z6 ~ z7
+ t55 := z16 ~ z17
+ t56 := z12 ~ t48
+ t57 := t50 ~ t53
+ t58 := z4 ~ t46
+ t59 := z3 ~ t54
+ t60 := t46 ~ t57
+ t61 := z14 ~ t57
+ t62 := t52 ~ t58
+ t63 := t49 ~ t58
+ t64 := z4 ~ t59
+ t65 := t61 ~ t62
+ t66 := z1 ~ t63
+ s0 := t59 ~ t63
+ s6 := t56 ~ ~t62
+ s7 := t48 ~ ~t60
+ t67 := t64 ~ t65
+ s3 := t53 ~ t66
+ s4 := t51 ~ t66
+ s5 := t47 ~ t65
+ s1 := t64 ~ ~s3
+ s2 := t55 ~ ~t67
+
+ q[7] = s0
+ q[6] = s1
+ q[5] = s2
+ q[4] = s3
+ q[3] = s4
+ q[2] = s5
+ q[1] = s6
+ q[0] = s7
+}
+
+orthogonalize :: proc "contextless" (q: ^[8]u64) {
+ CL2 :: 0x5555555555555555
+ CH2 :: 0xAAAAAAAAAAAAAAAA
+ q[0], q[1] = (q[0] & CL2) | ((q[1] & CL2) << 1), ((q[0] & CH2) >> 1) | (q[1] & CH2)
+ q[2], q[3] = (q[2] & CL2) | ((q[3] & CL2) << 1), ((q[2] & CH2) >> 1) | (q[3] & CH2)
+ q[4], q[5] = (q[4] & CL2) | ((q[5] & CL2) << 1), ((q[4] & CH2) >> 1) | (q[5] & CH2)
+ q[6], q[7] = (q[6] & CL2) | ((q[7] & CL2) << 1), ((q[6] & CH2) >> 1) | (q[7] & CH2)
+
+ CL4 :: 0x3333333333333333
+ CH4 :: 0xCCCCCCCCCCCCCCCC
+ q[0], q[2] = (q[0] & CL4) | ((q[2] & CL4) << 2), ((q[0] & CH4) >> 2) | (q[2] & CH4)
+ q[1], q[3] = (q[1] & CL4) | ((q[3] & CL4) << 2), ((q[1] & CH4) >> 2) | (q[3] & CH4)
+ q[4], q[6] = (q[4] & CL4) | ((q[6] & CL4) << 2), ((q[4] & CH4) >> 2) | (q[6] & CH4)
+ q[5], q[7] = (q[5] & CL4) | ((q[7] & CL4) << 2), ((q[5] & CH4) >> 2) | (q[7] & CH4)
+
+ CL8 :: 0x0F0F0F0F0F0F0F0F
+ CH8 :: 0xF0F0F0F0F0F0F0F0
+ q[0], q[4] = (q[0] & CL8) | ((q[4] & CL8) << 4), ((q[0] & CH8) >> 4) | (q[4] & CH8)
+ q[1], q[5] = (q[1] & CL8) | ((q[5] & CL8) << 4), ((q[1] & CH8) >> 4) | (q[5] & CH8)
+ q[2], q[6] = (q[2] & CL8) | ((q[6] & CL8) << 4), ((q[2] & CH8) >> 4) | (q[6] & CH8)
+ q[3], q[7] = (q[3] & CL8) | ((q[7] & CL8) << 4), ((q[3] & CH8) >> 4) | (q[7] & CH8)
+}
+
+@(require_results)
+interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check {
+ if len(w) < 4 {
+ intrinsics.trap()
+ }
+ x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3])
+ x0 |= (x0 << 16)
+ x1 |= (x1 << 16)
+ x2 |= (x2 << 16)
+ x3 |= (x3 << 16)
+ x0 &= 0x0000FFFF0000FFFF
+ x1 &= 0x0000FFFF0000FFFF
+ x2 &= 0x0000FFFF0000FFFF
+ x3 &= 0x0000FFFF0000FFFF
+ x0 |= (x0 << 8)
+ x1 |= (x1 << 8)
+ x2 |= (x2 << 8)
+ x3 |= (x3 << 8)
+ x0 &= 0x00FF00FF00FF00FF
+ x1 &= 0x00FF00FF00FF00FF
+ x2 &= 0x00FF00FF00FF00FF
+ x3 &= 0x00FF00FF00FF00FF
+ q0 = x0 | (x2 << 8)
+ q1 = x1 | (x3 << 8)
+ return
+}
+
+@(require_results)
+interleave_out :: proc "contextless" (q0, q1: u64) -> (w0, w1, w2, w3: u32) {
+ x0 := q0 & 0x00FF00FF00FF00FF
+ x1 := q1 & 0x00FF00FF00FF00FF
+ x2 := (q0 >> 8) & 0x00FF00FF00FF00FF
+ x3 := (q1 >> 8) & 0x00FF00FF00FF00FF
+ x0 |= (x0 >> 8)
+ x1 |= (x1 >> 8)
+ x2 |= (x2 >> 8)
+ x3 |= (x3 >> 8)
+ x0 &= 0x0000FFFF0000FFFF
+ x1 &= 0x0000FFFF0000FFFF
+ x2 &= 0x0000FFFF0000FFFF
+ x3 &= 0x0000FFFF0000FFFF
+ w0 = u32(x0) | u32(x0 >> 16)
+ w1 = u32(x1) | u32(x1 >> 16)
+ w2 = u32(x2) | u32(x2 >> 16)
+ w3 = u32(x3) | u32(x3 >> 16)
+ return
+}
+
+@(private)
+rotr32 :: #force_inline proc "contextless" (x: u64) -> u64 {
+ return (x << 32) | (x >> 32)
+}
diff --git a/core/crypto/_aes/ct64/ct64_dec.odin b/core/crypto/_aes/ct64/ct64_dec.odin
new file mode 100644
index 000000000..408ee6002
--- /dev/null
+++ b/core/crypto/_aes/ct64/ct64_dec.odin
@@ -0,0 +1,135 @@
+// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package aes_ct64
+
+import "base:intrinsics"
+
+inv_sub_bytes :: proc "contextless" (q: ^[8]u64) {
+ // AES S-box is:
+ // S(x) = A(I(x)) ^ 0x63
+ // where I() is inversion in GF(256), and A() is a linear
+ // transform (0 is formally defined to be its own inverse).
+ // Since inversion is an involution, the inverse S-box can be
+ // computed from the S-box as:
+ // iS(x) = B(S(B(x ^ 0x63)) ^ 0x63)
+ // where B() is the inverse of A(). Indeed, for any y in GF(256):
+ // iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y
+ //
+ // Note: we reuse the implementation of the forward S-box,
+ // instead of duplicating it here, so that total code size is
+ // lower. By merging the B() transforms into the S-box circuit
+ // we could make faster CBC decryption, but CBC decryption is
+ // already quite faster than CBC encryption because we can
+ // process four blocks in parallel.
+
+ q0 := ~q[0]
+ q1 := ~q[1]
+ q2 := q[2]
+ q3 := q[3]
+ q4 := q[4]
+ q5 := ~q[5]
+ q6 := ~q[6]
+ q7 := q[7]
+ q[7] = q1 ~ q4 ~ q6
+ q[6] = q0 ~ q3 ~ q5
+ q[5] = q7 ~ q2 ~ q4
+ q[4] = q6 ~ q1 ~ q3
+ q[3] = q5 ~ q0 ~ q2
+ q[2] = q4 ~ q7 ~ q1
+ q[1] = q3 ~ q6 ~ q0
+ q[0] = q2 ~ q5 ~ q7
+
+ sub_bytes(q)
+
+ q0 = ~q[0]
+ q1 = ~q[1]
+ q2 = q[2]
+ q3 = q[3]
+ q4 = q[4]
+ q5 = ~q[5]
+ q6 = ~q[6]
+ q7 = q[7]
+ q[7] = q1 ~ q4 ~ q6
+ q[6] = q0 ~ q3 ~ q5
+ q[5] = q7 ~ q2 ~ q4
+ q[4] = q6 ~ q1 ~ q3
+ q[3] = q5 ~ q0 ~ q2
+ q[2] = q4 ~ q7 ~ q1
+ q[1] = q3 ~ q6 ~ q0
+ q[0] = q2 ~ q5 ~ q7
+}
+
+inv_shift_rows :: proc "contextless" (q: ^[8]u64) {
+ for x, i in q {
+ q[i] =
+ (x & 0x000000000000FFFF) |
+ ((x & 0x000000000FFF0000) << 4) |
+ ((x & 0x00000000F0000000) >> 12) |
+ ((x & 0x000000FF00000000) << 8) |
+ ((x & 0x0000FF0000000000) >> 8) |
+ ((x & 0x000F000000000000) << 12) |
+ ((x & 0xFFF0000000000000) >> 4)
+ }
+}
+
+inv_mix_columns :: proc "contextless" (q: ^[8]u64) {
+ q0 := q[0]
+ q1 := q[1]
+ q2 := q[2]
+ q3 := q[3]
+ q4 := q[4]
+ q5 := q[5]
+ q6 := q[6]
+ q7 := q[7]
+ r0 := (q0 >> 16) | (q0 << 48)
+ r1 := (q1 >> 16) | (q1 << 48)
+ r2 := (q2 >> 16) | (q2 << 48)
+ r3 := (q3 >> 16) | (q3 << 48)
+ r4 := (q4 >> 16) | (q4 << 48)
+ r5 := (q5 >> 16) | (q5 << 48)
+ r6 := (q6 >> 16) | (q6 << 48)
+ r7 := (q7 >> 16) | (q7 << 48)
+
+ q[0] = q5 ~ q6 ~ q7 ~ r0 ~ r5 ~ r7 ~ rotr32(q0 ~ q5 ~ q6 ~ r0 ~ r5)
+ q[1] = q0 ~ q5 ~ r0 ~ r1 ~ r5 ~ r6 ~ r7 ~ rotr32(q1 ~ q5 ~ q7 ~ r1 ~ r5 ~ r6)
+ q[2] = q0 ~ q1 ~ q6 ~ r1 ~ r2 ~ r6 ~ r7 ~ rotr32(q0 ~ q2 ~ q6 ~ r2 ~ r6 ~ r7)
+ q[3] = q0 ~ q1 ~ q2 ~ q5 ~ q6 ~ r0 ~ r2 ~ r3 ~ r5 ~ rotr32(q0 ~ q1 ~ q3 ~ q5 ~ q6 ~ q7 ~ r0 ~ r3 ~ r5 ~ r7)
+ q[4] = q1 ~ q2 ~ q3 ~ q5 ~ r1 ~ r3 ~ r4 ~ r5 ~ r6 ~ r7 ~ rotr32(q1 ~ q2 ~ q4 ~ q5 ~ q7 ~ r1 ~ r4 ~ r5 ~ r6)
+ q[5] = q2 ~ q3 ~ q4 ~ q6 ~ r2 ~ r4 ~ r5 ~ r6 ~ r7 ~ rotr32(q2 ~ q3 ~ q5 ~ q6 ~ r2 ~ r5 ~ r6 ~ r7)
+ q[6] = q3 ~ q4 ~ q5 ~ q7 ~ r3 ~ r5 ~ r6 ~ r7 ~ rotr32(q3 ~ q4 ~ q6 ~ q7 ~ r3 ~ r6 ~ r7)
+ q[7] = q4 ~ q5 ~ q6 ~ r4 ~ r6 ~ r7 ~ rotr32(q4 ~ q5 ~ q7 ~ r4 ~ r7)
+}
+
+@(private)
+_decrypt :: proc "contextless" (q: ^[8]u64, skey: []u64, num_rounds: int) {
+ add_round_key(q, skey[num_rounds << 3:])
+ for u := num_rounds - 1; u > 0; u -= 1 {
+ inv_shift_rows(q)
+ inv_sub_bytes(q)
+ add_round_key(q, skey[u << 3:])
+ inv_mix_columns(q)
+ }
+ inv_shift_rows(q)
+ inv_sub_bytes(q)
+ add_round_key(q, skey)
+}
diff --git a/core/crypto/_aes/ct64/ct64_enc.odin b/core/crypto/_aes/ct64/ct64_enc.odin
new file mode 100644
index 000000000..36d4aebc8
--- /dev/null
+++ b/core/crypto/_aes/ct64/ct64_enc.odin
@@ -0,0 +1,95 @@
+// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package aes_ct64
+
+import "base:intrinsics"
+
+add_round_key :: proc "contextless" (q: ^[8]u64, sk: []u64) #no_bounds_check {
+ if len(sk) < 8 {
+ intrinsics.trap()
+ }
+
+ q[0] ~= sk[0]
+ q[1] ~= sk[1]
+ q[2] ~= sk[2]
+ q[3] ~= sk[3]
+ q[4] ~= sk[4]
+ q[5] ~= sk[5]
+ q[6] ~= sk[6]
+ q[7] ~= sk[7]
+}
+
+shift_rows :: proc "contextless" (q: ^[8]u64) {
+ for x, i in q {
+ q[i] =
+ (x & 0x000000000000FFFF) |
+ ((x & 0x00000000FFF00000) >> 4) |
+ ((x & 0x00000000000F0000) << 12) |
+ ((x & 0x0000FF0000000000) >> 8) |
+ ((x & 0x000000FF00000000) << 8) |
+ ((x & 0xF000000000000000) >> 12) |
+ ((x & 0x0FFF000000000000) << 4)
+ }
+}
+
+mix_columns :: proc "contextless" (q: ^[8]u64) {
+ q0 := q[0]
+ q1 := q[1]
+ q2 := q[2]
+ q3 := q[3]
+ q4 := q[4]
+ q5 := q[5]
+ q6 := q[6]
+ q7 := q[7]
+ r0 := (q0 >> 16) | (q0 << 48)
+ r1 := (q1 >> 16) | (q1 << 48)
+ r2 := (q2 >> 16) | (q2 << 48)
+ r3 := (q3 >> 16) | (q3 << 48)
+ r4 := (q4 >> 16) | (q4 << 48)
+ r5 := (q5 >> 16) | (q5 << 48)
+ r6 := (q6 >> 16) | (q6 << 48)
+ r7 := (q7 >> 16) | (q7 << 48)
+
+ q[0] = q7 ~ r7 ~ r0 ~ rotr32(q0 ~ r0)
+ q[1] = q0 ~ r0 ~ q7 ~ r7 ~ r1 ~ rotr32(q1 ~ r1)
+ q[2] = q1 ~ r1 ~ r2 ~ rotr32(q2 ~ r2)
+ q[3] = q2 ~ r2 ~ q7 ~ r7 ~ r3 ~ rotr32(q3 ~ r3)
+ q[4] = q3 ~ r3 ~ q7 ~ r7 ~ r4 ~ rotr32(q4 ~ r4)
+ q[5] = q4 ~ r4 ~ r5 ~ rotr32(q5 ~ r5)
+ q[6] = q5 ~ r5 ~ r6 ~ rotr32(q6 ~ r6)
+ q[7] = q6 ~ r6 ~ r7 ~ rotr32(q7 ~ r7)
+}
+
+@(private)
+_encrypt :: proc "contextless" (q: ^[8]u64, skey: []u64, num_rounds: int) {
+ add_round_key(q, skey)
+ for u in 1 ..< num_rounds {
+ sub_bytes(q)
+ shift_rows(q)
+ mix_columns(q)
+ add_round_key(q, skey[u << 3:])
+ }
+ sub_bytes(q)
+ shift_rows(q)
+ add_round_key(q, skey[num_rounds << 3:])
+}
diff --git a/core/crypto/_aes/ct64/ct64_keysched.odin b/core/crypto/_aes/ct64/ct64_keysched.odin
new file mode 100644
index 000000000..060a2c03e
--- /dev/null
+++ b/core/crypto/_aes/ct64/ct64_keysched.odin
@@ -0,0 +1,179 @@
+// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
+// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
+// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package aes_ct64
+
+import "base:intrinsics"
+import "core:crypto/_aes"
+import "core:encoding/endian"
+import "core:mem"
+
+@(private, require_results)
+sub_word :: proc "contextless" (x: u32) -> u32 {
+ q := [8]u64{u64(x), 0, 0, 0, 0, 0, 0, 0}
+
+ orthogonalize(&q)
+ sub_bytes(&q)
+ orthogonalize(&q)
+ ret := u32(q[0])
+
+ mem.zero_explicit(&q[0], size_of(u64))
+
+ return ret
+}
+
+@(private, require_results)
+keysched :: proc(comp_skey: []u64, key: []byte) -> int {
+ num_rounds, key_len := 0, len(key)
+ switch key_len {
+ case _aes.KEY_SIZE_128:
+ num_rounds = _aes.ROUNDS_128
+ case _aes.KEY_SIZE_192:
+ num_rounds = _aes.ROUNDS_192
+ case _aes.KEY_SIZE_256:
+ num_rounds = _aes.ROUNDS_256
+ case:
+ panic("crypto/aes: invalid AES key size")
+ }
+
+ skey: [60]u32 = ---
+ nk, nkf := key_len >> 2, (num_rounds + 1) << 2
+ for i in 0 ..< nk {
+ skey[i] = endian.unchecked_get_u32le(key[i << 2:])
+ }
+ tmp := skey[(key_len >> 2) - 1]
+ for i, j, k := nk, 0, 0; i < nkf; i += 1 {
+ if j == 0 {
+ tmp = (tmp << 24) | (tmp >> 8)
+ tmp = sub_word(tmp) ~ u32(_aes.RCON[k])
+ } else if nk > 6 && j == 4 {
+ tmp = sub_word(tmp)
+ }
+ tmp ~= skey[i - nk]
+ skey[i] = tmp
+ if j += 1; j == nk {
+ j = 0
+ k += 1
+ }
+ }
+
+ q: [8]u64 = ---
+ for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 {
+ q[0], q[4] = interleave_in(skey[i:])
+ q[1] = q[0]
+ q[2] = q[0]
+ q[3] = q[0]
+ q[5] = q[4]
+ q[6] = q[4]
+ q[7] = q[4]
+ orthogonalize(&q)
+ comp_skey[j + 0] =
+ (q[0] & 0x1111111111111111) |
+ (q[1] & 0x2222222222222222) |
+ (q[2] & 0x4444444444444444) |
+ (q[3] & 0x8888888888888888)
+ comp_skey[j + 1] =
+ (q[4] & 0x1111111111111111) |
+ (q[5] & 0x2222222222222222) |
+ (q[6] & 0x4444444444444444) |
+ (q[7] & 0x8888888888888888)
+ }
+
+ mem.zero_explicit(&skey, size_of(skey))
+ mem.zero_explicit(&q, size_of(q))
+
+ return num_rounds
+}
+
+@(private)
+skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) {
+ n := (num_rounds + 1) << 1
+ for u, v := 0, 0; u < n; u, v = u + 1, v + 4 {
+ x0 := comp_skey[u]
+ x1, x2, x3 := x0, x0, x0
+ x0 &= 0x1111111111111111
+ x1 &= 0x2222222222222222
+ x2 &= 0x4444444444444444
+ x3 &= 0x8888888888888888
+ x1 >>= 1
+ x2 >>= 2
+ x3 >>= 3
+ skey[v + 0] = (x0 << 4) - x0
+ skey[v + 1] = (x1 << 4) - x1
+ skey[v + 2] = (x2 << 4) - x2
+ skey[v + 3] = (x3 << 4) - x3
+ }
+}
+
+orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) {
+ if len(qq) < 8 || len(key) != 16 {
+ intrinsics.trap()
+ }
+
+ skey: [4]u32 = ---
+ skey[0] = endian.unchecked_get_u32le(key[0:])
+ skey[1] = endian.unchecked_get_u32le(key[4:])
+ skey[2] = endian.unchecked_get_u32le(key[8:])
+ skey[3] = endian.unchecked_get_u32le(key[12:])
+
+ q: [8]u64 = ---
+ q[0], q[4] = interleave_in(skey[:])
+ q[1] = q[0]
+ q[2] = q[0]
+ q[3] = q[0]
+ q[5] = q[4]
+ q[6] = q[4]
+ q[7] = q[4]
+ orthogonalize(&q)
+
+ comp_skey: [2]u64 = ---
+ comp_skey[0] =
+ (q[0] & 0x1111111111111111) |
+ (q[1] & 0x2222222222222222) |
+ (q[2] & 0x4444444444444444) |
+ (q[3] & 0x8888888888888888)
+ comp_skey[1] =
+ (q[4] & 0x1111111111111111) |
+ (q[5] & 0x2222222222222222) |
+ (q[6] & 0x4444444444444444) |
+ (q[7] & 0x8888888888888888)
+
+ for x, u in comp_skey {
+ x0 := x
+ x1, x2, x3 := x0, x0, x0
+ x0 &= 0x1111111111111111
+ x1 &= 0x2222222222222222
+ x2 &= 0x4444444444444444
+ x3 &= 0x8888888888888888
+ x1 >>= 1
+ x2 >>= 2
+ x3 >>= 3
+ qq[u * 4 + 0] = (x0 << 4) - x0
+ qq[u * 4 + 1] = (x1 << 4) - x1
+ qq[u * 4 + 2] = (x2 << 4) - x2
+ qq[u * 4 + 3] = (x3 << 4) - x3
+ }
+
+ mem.zero_explicit(&skey, size_of(skey))
+ mem.zero_explicit(&q, size_of(q))
+ mem.zero_explicit(&comp_skey, size_of(comp_skey))
+}
diff --git a/core/crypto/_aes/ct64/helpers.odin b/core/crypto/_aes/ct64/helpers.odin
new file mode 100644
index 000000000..169271f6d
--- /dev/null
+++ b/core/crypto/_aes/ct64/helpers.odin
@@ -0,0 +1,75 @@
+package aes_ct64
+
+import "base:intrinsics"
+import "core:crypto/_aes"
+import "core:encoding/endian"
+
+load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) {
+ if len(src) != _aes.BLOCK_SIZE {
+ intrinsics.trap()
+ }
+
+ w: [4]u32 = ---
+ w[0] = endian.unchecked_get_u32le(src[0:])
+ w[1] = endian.unchecked_get_u32le(src[4:])
+ w[2] = endian.unchecked_get_u32le(src[8:])
+ w[3] = endian.unchecked_get_u32le(src[12:])
+ q[0], q[4] = interleave_in(w[:])
+ orthogonalize(q)
+}
+
+store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) {
+ if len(dst) != _aes.BLOCK_SIZE {
+ intrinsics.trap()
+ }
+
+ orthogonalize(q)
+ w0, w1, w2, w3 := interleave_out(q[0], q[4])
+ endian.unchecked_put_u32le(dst[0:], w0)
+ endian.unchecked_put_u32le(dst[4:], w1)
+ endian.unchecked_put_u32le(dst[8:], w2)
+ endian.unchecked_put_u32le(dst[12:], w3)
+}
+
+load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) {
+ if n := len(src); n > STRIDE || n == 0 {
+ intrinsics.trap()
+ }
+
+ w: [4]u32 = ---
+ for s, i in src {
+ if len(s) != _aes.BLOCK_SIZE {
+ intrinsics.trap()
+ }
+
+ w[0] = endian.unchecked_get_u32le(s[0:])
+ w[1] = endian.unchecked_get_u32le(s[4:])
+ w[2] = endian.unchecked_get_u32le(s[8:])
+ w[3] = endian.unchecked_get_u32le(s[12:])
+ q[i], q[i + 4] = interleave_in(w[:])
+ }
+ orthogonalize(q)
+}
+
+store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) {
+ if n := len(dst); n > STRIDE || n == 0 {
+ intrinsics.trap()
+ }
+
+ orthogonalize(q)
+ for d, i in dst {
+ // Allow storing [0,4] blocks.
+ if d == nil {
+ break
+ }
+ if len(d) != _aes.BLOCK_SIZE {
+ intrinsics.trap()
+ }
+
+ w0, w1, w2, w3 := interleave_out(q[i], q[i + 4])
+ endian.unchecked_put_u32le(d[0:], w0)
+ endian.unchecked_put_u32le(d[4:], w1)
+ endian.unchecked_put_u32le(d[8:], w2)
+ endian.unchecked_put_u32le(d[12:], w3)
+ }
+}