diff options
| author | Yawning Angel <yawning@schwanenlied.me> | 2024-05-20 21:46:18 +0900 |
|---|---|---|
| committer | Yawning Angel <yawning@schwanenlied.me> | 2024-06-01 22:55:42 +0900 |
| commit | cba58924a895822d8160c957b0e859a358a29391 (patch) | |
| tree | c881b3ce40af916ce0e501be43aa465fbe0a4ad4 | |
| parent | f49575f1fbb0009b57d98cdacb90f2fed6b2c075 (diff) | |
core/crypto/_aes: 64-bit portable implementation
| -rw-r--r-- | core/crypto/_aes/aes.odin | 22 | ||||
| -rw-r--r-- | core/crypto/_aes/ct64/api.odin | 96 | ||||
| -rw-r--r-- | core/crypto/_aes/ct64/ct64.odin | 265 | ||||
| -rw-r--r-- | core/crypto/_aes/ct64/ct64_dec.odin | 135 | ||||
| -rw-r--r-- | core/crypto/_aes/ct64/ct64_enc.odin | 95 | ||||
| -rw-r--r-- | core/crypto/_aes/ct64/ct64_keysched.odin | 179 | ||||
| -rw-r--r-- | core/crypto/_aes/ct64/helpers.odin | 75 |
7 files changed, 867 insertions, 0 deletions
diff --git a/core/crypto/_aes/aes.odin b/core/crypto/_aes/aes.odin new file mode 100644 index 000000000..74906fcd4 --- /dev/null +++ b/core/crypto/_aes/aes.odin @@ -0,0 +1,22 @@ +package _aes + +// KEY_SIZE_128 is the AES-128 key size in bytes. +KEY_SIZE_128 :: 16 +// KEY_SIZE_192 is the AES-192 key size in bytes. +KEY_SIZE_192 :: 24 +// KEY_SIZE_256 is the AES-256 key size in bytes. +KEY_SIZE_256 :: 32 + +// BLOCK_SIZE is the AES block size in bytes. +BLOCK_SIZE :: 16 + + +// ROUNDS_128 is the number of rounds for AES-128. +ROUNDS_128 :: 10 +// ROUNDS_192 is the number of rounds for AES-192. +ROUNDS_192 :: 12 +// ROUNDS_256 is the number of rounds for AES-256. +ROUNDS_256 :: 14 + +// RCON is the AES keyschedule round constants. +RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36} diff --git a/core/crypto/_aes/ct64/api.odin b/core/crypto/_aes/ct64/api.odin new file mode 100644 index 000000000..ae624971c --- /dev/null +++ b/core/crypto/_aes/ct64/api.odin @@ -0,0 +1,96 @@ +package aes_ct64 + +import "base:intrinsics" +import "core:mem" + +STRIDE :: 4 + +// Context is a keyed AES (ECB) instance. +Context :: struct { + _sk_exp: [120]u64, + _num_rounds: int, + _is_initialized: bool, +} + +// init initializes a context for AES with the provided key. +init :: proc(ctx: ^Context, key: []byte) { + skey: [30]u64 = --- + + ctx._num_rounds = keysched(skey[:], key) + skey_expand(ctx._sk_exp[:], skey[:], ctx._num_rounds) + ctx._is_initialized = true +} + +// encrypt_block sets `dst` to `AES-ECB-Encrypt(src)`. +encrypt_block :: proc(ctx: ^Context, dst, src: []byte) { + assert(ctx._is_initialized) + + q: [8]u64 + load_blockx1(&q, src) + _encrypt(&q, ctx._sk_exp[:], ctx._num_rounds) + store_blockx1(dst, &q) +} + +// encrypt_block sets `dst` to `AES-ECB-Decrypt(src)`. +decrypt_block :: proc(ctx: ^Context, dst, src: []byte) { + assert(ctx._is_initialized) + + q: [8]u64 + load_blockx1(&q, src) + _decrypt(&q, ctx._sk_exp[:], ctx._num_rounds) + store_blockx1(dst, &q) +} + +// encrypt_blocks sets `dst` to `AES-ECB-Encrypt(src[0], .. src[n])`. +encrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) { + assert(ctx._is_initialized) + + q: [8]u64 = --- + src, dst := src, dst + + n := len(src) + for n > 4 { + load_blocks(&q, src[0:4]) + _encrypt(&q, ctx._sk_exp[:], ctx._num_rounds) + store_blocks(dst[0:4], &q) + + src = src[4:] + dst = dst[4:] + n -= 4 + } + if n > 0 { + load_blocks(&q, src) + _encrypt(&q, ctx._sk_exp[:], ctx._num_rounds) + store_blocks(dst, &q) + } +} + +// decrypt_blocks sets dst to `AES-ECB-Decrypt(src[0], .. src[n])`. +decrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) { + assert(ctx._is_initialized) + + q: [8]u64 = --- + src, dst := src, dst + + n := len(src) + for n > 4 { + load_blocks(&q, src[0:4]) + _decrypt(&q, ctx._sk_exp[:], ctx._num_rounds) + store_blocks(dst[0:4], &q) + + src = src[4:] + dst = dst[4:] + n -= 4 + } + if n > 0 { + load_blocks(&q, src) + _decrypt(&q, ctx._sk_exp[:], ctx._num_rounds) + store_blocks(dst, &q) + } +} + +// reset sanitizes the Context. The Context must be re-initialized to +// be used again. +reset :: proc(ctx: ^Context) { + mem.zero_explicit(ctx, size_of(ctx)) +} diff --git a/core/crypto/_aes/ct64/ct64.odin b/core/crypto/_aes/ct64/ct64.odin new file mode 100644 index 000000000..f198cab81 --- /dev/null +++ b/core/crypto/_aes/ct64/ct64.odin @@ -0,0 +1,265 @@ +// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package aes_ct64 + +import "base:intrinsics" + +// Bitsliced AES for 64-bit general purpose (integer) registers. Each +// invocation will process up to 4 blocks at a time. This implementation +// is derived from the BearSSL ct64 code, and distributed under a 1-clause +// BSD license with permission from the original author. +// +// WARNING: "hic sunt dracones" +// +// This package also deliberately exposes enough internals to be able to +// function as a replacement for `AESENC` and `AESDEC` from AES-NI, to +// allow the implementation of non-AES primitives that use the AES round +// function such as AEGIS and Deoxys-II. This should ONLY be done when +// implementing something other than AES itself. + +sub_bytes :: proc "contextless" (q: ^[8]u64) { + // This S-box implementation is a straightforward translation of + // the circuit described by Boyar and Peralta in "A new + // combinational logic minimization technique with applications + // to cryptology" (https://eprint.iacr.org/2009/191.pdf). + // + // Note that variables x* (input) and s* (output) are numbered + // in "reverse" order (x0 is the high bit, x7 is the low bit). + + x0 := q[7] + x1 := q[6] + x2 := q[5] + x3 := q[4] + x4 := q[3] + x5 := q[2] + x6 := q[1] + x7 := q[0] + + // Top linear transformation. + y14 := x3 ~ x5 + y13 := x0 ~ x6 + y9 := x0 ~ x3 + y8 := x0 ~ x5 + t0 := x1 ~ x2 + y1 := t0 ~ x7 + y4 := y1 ~ x3 + y12 := y13 ~ y14 + y2 := y1 ~ x0 + y5 := y1 ~ x6 + y3 := y5 ~ y8 + t1 := x4 ~ y12 + y15 := t1 ~ x5 + y20 := t1 ~ x1 + y6 := y15 ~ x7 + y10 := y15 ~ t0 + y11 := y20 ~ y9 + y7 := x7 ~ y11 + y17 := y10 ~ y11 + y19 := y10 ~ y8 + y16 := t0 ~ y11 + y21 := y13 ~ y16 + y18 := x0 ~ y16 + + // Non-linear section. + t2 := y12 & y15 + t3 := y3 & y6 + t4 := t3 ~ t2 + t5 := y4 & x7 + t6 := t5 ~ t2 + t7 := y13 & y16 + t8 := y5 & y1 + t9 := t8 ~ t7 + t10 := y2 & y7 + t11 := t10 ~ t7 + t12 := y9 & y11 + t13 := y14 & y17 + t14 := t13 ~ t12 + t15 := y8 & y10 + t16 := t15 ~ t12 + t17 := t4 ~ t14 + t18 := t6 ~ t16 + t19 := t9 ~ t14 + t20 := t11 ~ t16 + t21 := t17 ~ y20 + t22 := t18 ~ y19 + t23 := t19 ~ y21 + t24 := t20 ~ y18 + + t25 := t21 ~ t22 + t26 := t21 & t23 + t27 := t24 ~ t26 + t28 := t25 & t27 + t29 := t28 ~ t22 + t30 := t23 ~ t24 + t31 := t22 ~ t26 + t32 := t31 & t30 + t33 := t32 ~ t24 + t34 := t23 ~ t33 + t35 := t27 ~ t33 + t36 := t24 & t35 + t37 := t36 ~ t34 + t38 := t27 ~ t36 + t39 := t29 & t38 + t40 := t25 ~ t39 + + t41 := t40 ~ t37 + t42 := t29 ~ t33 + t43 := t29 ~ t40 + t44 := t33 ~ t37 + t45 := t42 ~ t41 + z0 := t44 & y15 + z1 := t37 & y6 + z2 := t33 & x7 + z3 := t43 & y16 + z4 := t40 & y1 + z5 := t29 & y7 + z6 := t42 & y11 + z7 := t45 & y17 + z8 := t41 & y10 + z9 := t44 & y12 + z10 := t37 & y3 + z11 := t33 & y4 + z12 := t43 & y13 + z13 := t40 & y5 + z14 := t29 & y2 + z15 := t42 & y9 + z16 := t45 & y14 + z17 := t41 & y8 + + // Bottom linear transformation. + t46 := z15 ~ z16 + t47 := z10 ~ z11 + t48 := z5 ~ z13 + t49 := z9 ~ z10 + t50 := z2 ~ z12 + t51 := z2 ~ z5 + t52 := z7 ~ z8 + t53 := z0 ~ z3 + t54 := z6 ~ z7 + t55 := z16 ~ z17 + t56 := z12 ~ t48 + t57 := t50 ~ t53 + t58 := z4 ~ t46 + t59 := z3 ~ t54 + t60 := t46 ~ t57 + t61 := z14 ~ t57 + t62 := t52 ~ t58 + t63 := t49 ~ t58 + t64 := z4 ~ t59 + t65 := t61 ~ t62 + t66 := z1 ~ t63 + s0 := t59 ~ t63 + s6 := t56 ~ ~t62 + s7 := t48 ~ ~t60 + t67 := t64 ~ t65 + s3 := t53 ~ t66 + s4 := t51 ~ t66 + s5 := t47 ~ t65 + s1 := t64 ~ ~s3 + s2 := t55 ~ ~t67 + + q[7] = s0 + q[6] = s1 + q[5] = s2 + q[4] = s3 + q[3] = s4 + q[2] = s5 + q[1] = s6 + q[0] = s7 +} + +orthogonalize :: proc "contextless" (q: ^[8]u64) { + CL2 :: 0x5555555555555555 + CH2 :: 0xAAAAAAAAAAAAAAAA + q[0], q[1] = (q[0] & CL2) | ((q[1] & CL2) << 1), ((q[0] & CH2) >> 1) | (q[1] & CH2) + q[2], q[3] = (q[2] & CL2) | ((q[3] & CL2) << 1), ((q[2] & CH2) >> 1) | (q[3] & CH2) + q[4], q[5] = (q[4] & CL2) | ((q[5] & CL2) << 1), ((q[4] & CH2) >> 1) | (q[5] & CH2) + q[6], q[7] = (q[6] & CL2) | ((q[7] & CL2) << 1), ((q[6] & CH2) >> 1) | (q[7] & CH2) + + CL4 :: 0x3333333333333333 + CH4 :: 0xCCCCCCCCCCCCCCCC + q[0], q[2] = (q[0] & CL4) | ((q[2] & CL4) << 2), ((q[0] & CH4) >> 2) | (q[2] & CH4) + q[1], q[3] = (q[1] & CL4) | ((q[3] & CL4) << 2), ((q[1] & CH4) >> 2) | (q[3] & CH4) + q[4], q[6] = (q[4] & CL4) | ((q[6] & CL4) << 2), ((q[4] & CH4) >> 2) | (q[6] & CH4) + q[5], q[7] = (q[5] & CL4) | ((q[7] & CL4) << 2), ((q[5] & CH4) >> 2) | (q[7] & CH4) + + CL8 :: 0x0F0F0F0F0F0F0F0F + CH8 :: 0xF0F0F0F0F0F0F0F0 + q[0], q[4] = (q[0] & CL8) | ((q[4] & CL8) << 4), ((q[0] & CH8) >> 4) | (q[4] & CH8) + q[1], q[5] = (q[1] & CL8) | ((q[5] & CL8) << 4), ((q[1] & CH8) >> 4) | (q[5] & CH8) + q[2], q[6] = (q[2] & CL8) | ((q[6] & CL8) << 4), ((q[2] & CH8) >> 4) | (q[6] & CH8) + q[3], q[7] = (q[3] & CL8) | ((q[7] & CL8) << 4), ((q[3] & CH8) >> 4) | (q[7] & CH8) +} + +@(require_results) +interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check { + if len(w) < 4 { + intrinsics.trap() + } + x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3]) + x0 |= (x0 << 16) + x1 |= (x1 << 16) + x2 |= (x2 << 16) + x3 |= (x3 << 16) + x0 &= 0x0000FFFF0000FFFF + x1 &= 0x0000FFFF0000FFFF + x2 &= 0x0000FFFF0000FFFF + x3 &= 0x0000FFFF0000FFFF + x0 |= (x0 << 8) + x1 |= (x1 << 8) + x2 |= (x2 << 8) + x3 |= (x3 << 8) + x0 &= 0x00FF00FF00FF00FF + x1 &= 0x00FF00FF00FF00FF + x2 &= 0x00FF00FF00FF00FF + x3 &= 0x00FF00FF00FF00FF + q0 = x0 | (x2 << 8) + q1 = x1 | (x3 << 8) + return +} + +@(require_results) +interleave_out :: proc "contextless" (q0, q1: u64) -> (w0, w1, w2, w3: u32) { + x0 := q0 & 0x00FF00FF00FF00FF + x1 := q1 & 0x00FF00FF00FF00FF + x2 := (q0 >> 8) & 0x00FF00FF00FF00FF + x3 := (q1 >> 8) & 0x00FF00FF00FF00FF + x0 |= (x0 >> 8) + x1 |= (x1 >> 8) + x2 |= (x2 >> 8) + x3 |= (x3 >> 8) + x0 &= 0x0000FFFF0000FFFF + x1 &= 0x0000FFFF0000FFFF + x2 &= 0x0000FFFF0000FFFF + x3 &= 0x0000FFFF0000FFFF + w0 = u32(x0) | u32(x0 >> 16) + w1 = u32(x1) | u32(x1 >> 16) + w2 = u32(x2) | u32(x2 >> 16) + w3 = u32(x3) | u32(x3 >> 16) + return +} + +@(private) +rotr32 :: #force_inline proc "contextless" (x: u64) -> u64 { + return (x << 32) | (x >> 32) +} diff --git a/core/crypto/_aes/ct64/ct64_dec.odin b/core/crypto/_aes/ct64/ct64_dec.odin new file mode 100644 index 000000000..408ee6002 --- /dev/null +++ b/core/crypto/_aes/ct64/ct64_dec.odin @@ -0,0 +1,135 @@ +// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package aes_ct64 + +import "base:intrinsics" + +inv_sub_bytes :: proc "contextless" (q: ^[8]u64) { + // AES S-box is: + // S(x) = A(I(x)) ^ 0x63 + // where I() is inversion in GF(256), and A() is a linear + // transform (0 is formally defined to be its own inverse). + // Since inversion is an involution, the inverse S-box can be + // computed from the S-box as: + // iS(x) = B(S(B(x ^ 0x63)) ^ 0x63) + // where B() is the inverse of A(). Indeed, for any y in GF(256): + // iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y + // + // Note: we reuse the implementation of the forward S-box, + // instead of duplicating it here, so that total code size is + // lower. By merging the B() transforms into the S-box circuit + // we could make faster CBC decryption, but CBC decryption is + // already quite faster than CBC encryption because we can + // process four blocks in parallel. + + q0 := ~q[0] + q1 := ~q[1] + q2 := q[2] + q3 := q[3] + q4 := q[4] + q5 := ~q[5] + q6 := ~q[6] + q7 := q[7] + q[7] = q1 ~ q4 ~ q6 + q[6] = q0 ~ q3 ~ q5 + q[5] = q7 ~ q2 ~ q4 + q[4] = q6 ~ q1 ~ q3 + q[3] = q5 ~ q0 ~ q2 + q[2] = q4 ~ q7 ~ q1 + q[1] = q3 ~ q6 ~ q0 + q[0] = q2 ~ q5 ~ q7 + + sub_bytes(q) + + q0 = ~q[0] + q1 = ~q[1] + q2 = q[2] + q3 = q[3] + q4 = q[4] + q5 = ~q[5] + q6 = ~q[6] + q7 = q[7] + q[7] = q1 ~ q4 ~ q6 + q[6] = q0 ~ q3 ~ q5 + q[5] = q7 ~ q2 ~ q4 + q[4] = q6 ~ q1 ~ q3 + q[3] = q5 ~ q0 ~ q2 + q[2] = q4 ~ q7 ~ q1 + q[1] = q3 ~ q6 ~ q0 + q[0] = q2 ~ q5 ~ q7 +} + +inv_shift_rows :: proc "contextless" (q: ^[8]u64) { + for x, i in q { + q[i] = + (x & 0x000000000000FFFF) | + ((x & 0x000000000FFF0000) << 4) | + ((x & 0x00000000F0000000) >> 12) | + ((x & 0x000000FF00000000) << 8) | + ((x & 0x0000FF0000000000) >> 8) | + ((x & 0x000F000000000000) << 12) | + ((x & 0xFFF0000000000000) >> 4) + } +} + +inv_mix_columns :: proc "contextless" (q: ^[8]u64) { + q0 := q[0] + q1 := q[1] + q2 := q[2] + q3 := q[3] + q4 := q[4] + q5 := q[5] + q6 := q[6] + q7 := q[7] + r0 := (q0 >> 16) | (q0 << 48) + r1 := (q1 >> 16) | (q1 << 48) + r2 := (q2 >> 16) | (q2 << 48) + r3 := (q3 >> 16) | (q3 << 48) + r4 := (q4 >> 16) | (q4 << 48) + r5 := (q5 >> 16) | (q5 << 48) + r6 := (q6 >> 16) | (q6 << 48) + r7 := (q7 >> 16) | (q7 << 48) + + q[0] = q5 ~ q6 ~ q7 ~ r0 ~ r5 ~ r7 ~ rotr32(q0 ~ q5 ~ q6 ~ r0 ~ r5) + q[1] = q0 ~ q5 ~ r0 ~ r1 ~ r5 ~ r6 ~ r7 ~ rotr32(q1 ~ q5 ~ q7 ~ r1 ~ r5 ~ r6) + q[2] = q0 ~ q1 ~ q6 ~ r1 ~ r2 ~ r6 ~ r7 ~ rotr32(q0 ~ q2 ~ q6 ~ r2 ~ r6 ~ r7) + q[3] = q0 ~ q1 ~ q2 ~ q5 ~ q6 ~ r0 ~ r2 ~ r3 ~ r5 ~ rotr32(q0 ~ q1 ~ q3 ~ q5 ~ q6 ~ q7 ~ r0 ~ r3 ~ r5 ~ r7) + q[4] = q1 ~ q2 ~ q3 ~ q5 ~ r1 ~ r3 ~ r4 ~ r5 ~ r6 ~ r7 ~ rotr32(q1 ~ q2 ~ q4 ~ q5 ~ q7 ~ r1 ~ r4 ~ r5 ~ r6) + q[5] = q2 ~ q3 ~ q4 ~ q6 ~ r2 ~ r4 ~ r5 ~ r6 ~ r7 ~ rotr32(q2 ~ q3 ~ q5 ~ q6 ~ r2 ~ r5 ~ r6 ~ r7) + q[6] = q3 ~ q4 ~ q5 ~ q7 ~ r3 ~ r5 ~ r6 ~ r7 ~ rotr32(q3 ~ q4 ~ q6 ~ q7 ~ r3 ~ r6 ~ r7) + q[7] = q4 ~ q5 ~ q6 ~ r4 ~ r6 ~ r7 ~ rotr32(q4 ~ q5 ~ q7 ~ r4 ~ r7) +} + +@(private) +_decrypt :: proc "contextless" (q: ^[8]u64, skey: []u64, num_rounds: int) { + add_round_key(q, skey[num_rounds << 3:]) + for u := num_rounds - 1; u > 0; u -= 1 { + inv_shift_rows(q) + inv_sub_bytes(q) + add_round_key(q, skey[u << 3:]) + inv_mix_columns(q) + } + inv_shift_rows(q) + inv_sub_bytes(q) + add_round_key(q, skey) +} diff --git a/core/crypto/_aes/ct64/ct64_enc.odin b/core/crypto/_aes/ct64/ct64_enc.odin new file mode 100644 index 000000000..36d4aebc8 --- /dev/null +++ b/core/crypto/_aes/ct64/ct64_enc.odin @@ -0,0 +1,95 @@ +// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package aes_ct64 + +import "base:intrinsics" + +add_round_key :: proc "contextless" (q: ^[8]u64, sk: []u64) #no_bounds_check { + if len(sk) < 8 { + intrinsics.trap() + } + + q[0] ~= sk[0] + q[1] ~= sk[1] + q[2] ~= sk[2] + q[3] ~= sk[3] + q[4] ~= sk[4] + q[5] ~= sk[5] + q[6] ~= sk[6] + q[7] ~= sk[7] +} + +shift_rows :: proc "contextless" (q: ^[8]u64) { + for x, i in q { + q[i] = + (x & 0x000000000000FFFF) | + ((x & 0x00000000FFF00000) >> 4) | + ((x & 0x00000000000F0000) << 12) | + ((x & 0x0000FF0000000000) >> 8) | + ((x & 0x000000FF00000000) << 8) | + ((x & 0xF000000000000000) >> 12) | + ((x & 0x0FFF000000000000) << 4) + } +} + +mix_columns :: proc "contextless" (q: ^[8]u64) { + q0 := q[0] + q1 := q[1] + q2 := q[2] + q3 := q[3] + q4 := q[4] + q5 := q[5] + q6 := q[6] + q7 := q[7] + r0 := (q0 >> 16) | (q0 << 48) + r1 := (q1 >> 16) | (q1 << 48) + r2 := (q2 >> 16) | (q2 << 48) + r3 := (q3 >> 16) | (q3 << 48) + r4 := (q4 >> 16) | (q4 << 48) + r5 := (q5 >> 16) | (q5 << 48) + r6 := (q6 >> 16) | (q6 << 48) + r7 := (q7 >> 16) | (q7 << 48) + + q[0] = q7 ~ r7 ~ r0 ~ rotr32(q0 ~ r0) + q[1] = q0 ~ r0 ~ q7 ~ r7 ~ r1 ~ rotr32(q1 ~ r1) + q[2] = q1 ~ r1 ~ r2 ~ rotr32(q2 ~ r2) + q[3] = q2 ~ r2 ~ q7 ~ r7 ~ r3 ~ rotr32(q3 ~ r3) + q[4] = q3 ~ r3 ~ q7 ~ r7 ~ r4 ~ rotr32(q4 ~ r4) + q[5] = q4 ~ r4 ~ r5 ~ rotr32(q5 ~ r5) + q[6] = q5 ~ r5 ~ r6 ~ rotr32(q6 ~ r6) + q[7] = q6 ~ r6 ~ r7 ~ rotr32(q7 ~ r7) +} + +@(private) +_encrypt :: proc "contextless" (q: ^[8]u64, skey: []u64, num_rounds: int) { + add_round_key(q, skey) + for u in 1 ..< num_rounds { + sub_bytes(q) + shift_rows(q) + mix_columns(q) + add_round_key(q, skey[u << 3:]) + } + sub_bytes(q) + shift_rows(q) + add_round_key(q, skey[num_rounds << 3:]) +} diff --git a/core/crypto/_aes/ct64/ct64_keysched.odin b/core/crypto/_aes/ct64/ct64_keysched.odin new file mode 100644 index 000000000..060a2c03e --- /dev/null +++ b/core/crypto/_aes/ct64/ct64_keysched.odin @@ -0,0 +1,179 @@ +// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package aes_ct64 + +import "base:intrinsics" +import "core:crypto/_aes" +import "core:encoding/endian" +import "core:mem" + +@(private, require_results) +sub_word :: proc "contextless" (x: u32) -> u32 { + q := [8]u64{u64(x), 0, 0, 0, 0, 0, 0, 0} + + orthogonalize(&q) + sub_bytes(&q) + orthogonalize(&q) + ret := u32(q[0]) + + mem.zero_explicit(&q[0], size_of(u64)) + + return ret +} + +@(private, require_results) +keysched :: proc(comp_skey: []u64, key: []byte) -> int { + num_rounds, key_len := 0, len(key) + switch key_len { + case _aes.KEY_SIZE_128: + num_rounds = _aes.ROUNDS_128 + case _aes.KEY_SIZE_192: + num_rounds = _aes.ROUNDS_192 + case _aes.KEY_SIZE_256: + num_rounds = _aes.ROUNDS_256 + case: + panic("crypto/aes: invalid AES key size") + } + + skey: [60]u32 = --- + nk, nkf := key_len >> 2, (num_rounds + 1) << 2 + for i in 0 ..< nk { + skey[i] = endian.unchecked_get_u32le(key[i << 2:]) + } + tmp := skey[(key_len >> 2) - 1] + for i, j, k := nk, 0, 0; i < nkf; i += 1 { + if j == 0 { + tmp = (tmp << 24) | (tmp >> 8) + tmp = sub_word(tmp) ~ u32(_aes.RCON[k]) + } else if nk > 6 && j == 4 { + tmp = sub_word(tmp) + } + tmp ~= skey[i - nk] + skey[i] = tmp + if j += 1; j == nk { + j = 0 + k += 1 + } + } + + q: [8]u64 = --- + for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 { + q[0], q[4] = interleave_in(skey[i:]) + q[1] = q[0] + q[2] = q[0] + q[3] = q[0] + q[5] = q[4] + q[6] = q[4] + q[7] = q[4] + orthogonalize(&q) + comp_skey[j + 0] = + (q[0] & 0x1111111111111111) | + (q[1] & 0x2222222222222222) | + (q[2] & 0x4444444444444444) | + (q[3] & 0x8888888888888888) + comp_skey[j + 1] = + (q[4] & 0x1111111111111111) | + (q[5] & 0x2222222222222222) | + (q[6] & 0x4444444444444444) | + (q[7] & 0x8888888888888888) + } + + mem.zero_explicit(&skey, size_of(skey)) + mem.zero_explicit(&q, size_of(q)) + + return num_rounds +} + +@(private) +skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) { + n := (num_rounds + 1) << 1 + for u, v := 0, 0; u < n; u, v = u + 1, v + 4 { + x0 := comp_skey[u] + x1, x2, x3 := x0, x0, x0 + x0 &= 0x1111111111111111 + x1 &= 0x2222222222222222 + x2 &= 0x4444444444444444 + x3 &= 0x8888888888888888 + x1 >>= 1 + x2 >>= 2 + x3 >>= 3 + skey[v + 0] = (x0 << 4) - x0 + skey[v + 1] = (x1 << 4) - x1 + skey[v + 2] = (x2 << 4) - x2 + skey[v + 3] = (x3 << 4) - x3 + } +} + +orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) { + if len(qq) < 8 || len(key) != 16 { + intrinsics.trap() + } + + skey: [4]u32 = --- + skey[0] = endian.unchecked_get_u32le(key[0:]) + skey[1] = endian.unchecked_get_u32le(key[4:]) + skey[2] = endian.unchecked_get_u32le(key[8:]) + skey[3] = endian.unchecked_get_u32le(key[12:]) + + q: [8]u64 = --- + q[0], q[4] = interleave_in(skey[:]) + q[1] = q[0] + q[2] = q[0] + q[3] = q[0] + q[5] = q[4] + q[6] = q[4] + q[7] = q[4] + orthogonalize(&q) + + comp_skey: [2]u64 = --- + comp_skey[0] = + (q[0] & 0x1111111111111111) | + (q[1] & 0x2222222222222222) | + (q[2] & 0x4444444444444444) | + (q[3] & 0x8888888888888888) + comp_skey[1] = + (q[4] & 0x1111111111111111) | + (q[5] & 0x2222222222222222) | + (q[6] & 0x4444444444444444) | + (q[7] & 0x8888888888888888) + + for x, u in comp_skey { + x0 := x + x1, x2, x3 := x0, x0, x0 + x0 &= 0x1111111111111111 + x1 &= 0x2222222222222222 + x2 &= 0x4444444444444444 + x3 &= 0x8888888888888888 + x1 >>= 1 + x2 >>= 2 + x3 >>= 3 + qq[u * 4 + 0] = (x0 << 4) - x0 + qq[u * 4 + 1] = (x1 << 4) - x1 + qq[u * 4 + 2] = (x2 << 4) - x2 + qq[u * 4 + 3] = (x3 << 4) - x3 + } + + mem.zero_explicit(&skey, size_of(skey)) + mem.zero_explicit(&q, size_of(q)) + mem.zero_explicit(&comp_skey, size_of(comp_skey)) +} diff --git a/core/crypto/_aes/ct64/helpers.odin b/core/crypto/_aes/ct64/helpers.odin new file mode 100644 index 000000000..169271f6d --- /dev/null +++ b/core/crypto/_aes/ct64/helpers.odin @@ -0,0 +1,75 @@ +package aes_ct64 + +import "base:intrinsics" +import "core:crypto/_aes" +import "core:encoding/endian" + +load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) { + if len(src) != _aes.BLOCK_SIZE { + intrinsics.trap() + } + + w: [4]u32 = --- + w[0] = endian.unchecked_get_u32le(src[0:]) + w[1] = endian.unchecked_get_u32le(src[4:]) + w[2] = endian.unchecked_get_u32le(src[8:]) + w[3] = endian.unchecked_get_u32le(src[12:]) + q[0], q[4] = interleave_in(w[:]) + orthogonalize(q) +} + +store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) { + if len(dst) != _aes.BLOCK_SIZE { + intrinsics.trap() + } + + orthogonalize(q) + w0, w1, w2, w3 := interleave_out(q[0], q[4]) + endian.unchecked_put_u32le(dst[0:], w0) + endian.unchecked_put_u32le(dst[4:], w1) + endian.unchecked_put_u32le(dst[8:], w2) + endian.unchecked_put_u32le(dst[12:], w3) +} + +load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) { + if n := len(src); n > STRIDE || n == 0 { + intrinsics.trap() + } + + w: [4]u32 = --- + for s, i in src { + if len(s) != _aes.BLOCK_SIZE { + intrinsics.trap() + } + + w[0] = endian.unchecked_get_u32le(s[0:]) + w[1] = endian.unchecked_get_u32le(s[4:]) + w[2] = endian.unchecked_get_u32le(s[8:]) + w[3] = endian.unchecked_get_u32le(s[12:]) + q[i], q[i + 4] = interleave_in(w[:]) + } + orthogonalize(q) +} + +store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) { + if n := len(dst); n > STRIDE || n == 0 { + intrinsics.trap() + } + + orthogonalize(q) + for d, i in dst { + // Allow storing [0,4] blocks. + if d == nil { + break + } + if len(d) != _aes.BLOCK_SIZE { + intrinsics.trap() + } + + w0, w1, w2, w3 := interleave_out(q[i], q[i + 4]) + endian.unchecked_put_u32le(d[0:], w0) + endian.unchecked_put_u32le(d[4:], w1) + endian.unchecked_put_u32le(d[8:], w2) + endian.unchecked_put_u32le(d[12:], w3) + } +} |