aboutsummaryrefslogtreecommitdiff
path: root/core/crypto/_aes/ct64/ct64.odin
blob: af2b42c1effa682e19ddaaa48b414c8ce25688e0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
//   1. Redistributions of source code must retain the above copyright
//      notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

package aes_ct64

// Bitsliced AES for 64-bit general purpose (integer) registers.  Each
// invocation will process up to 4 blocks at a time.  This implementation
// is derived from the BearSSL ct64 code, and distributed under a 1-clause
// BSD license with permission from the original author.
//
// WARNING: "hic sunt dracones"
//
// This package also deliberately exposes enough internals to be able to
// function as a replacement for `AESENC` and `AESDEC` from AES-NI, to
// allow the implementation of non-AES primitives that use the AES round
// function such as AEGIS and Deoxys-II.  This should ONLY be done when
// implementing something other than AES itself.

sub_bytes :: proc "contextless" (q: ^[8]u64) {
	// This S-box implementation is a straightforward translation of
	// the circuit described by Boyar and Peralta in "A new
	// combinational logic minimization technique with applications
	// to cryptology" (https://eprint.iacr.org/2009/191.pdf).
	//
	// Note that variables x* (input) and s* (output) are numbered
	// in "reverse" order (x0 is the high bit, x7 is the low bit).

	x0 := q[7]
	x1 := q[6]
	x2 := q[5]
	x3 := q[4]
	x4 := q[3]
	x5 := q[2]
	x6 := q[1]
	x7 := q[0]

	// Top linear transformation.
	y14 := x3 ~ x5
	y13 := x0 ~ x6
	y9 := x0 ~ x3
	y8 := x0 ~ x5
	t0 := x1 ~ x2
	y1 := t0 ~ x7
	y4 := y1 ~ x3
	y12 := y13 ~ y14
	y2 := y1 ~ x0
	y5 := y1 ~ x6
	y3 := y5 ~ y8
	t1 := x4 ~ y12
	y15 := t1 ~ x5
	y20 := t1 ~ x1
	y6 := y15 ~ x7
	y10 := y15 ~ t0
	y11 := y20 ~ y9
	y7 := x7 ~ y11
	y17 := y10 ~ y11
	y19 := y10 ~ y8
	y16 := t0 ~ y11
	y21 := y13 ~ y16
	y18 := x0 ~ y16

	// Non-linear section.
	t2 := y12 & y15
	t3 := y3 & y6
	t4 := t3 ~ t2
	t5 := y4 & x7
	t6 := t5 ~ t2
	t7 := y13 & y16
	t8 := y5 & y1
	t9 := t8 ~ t7
	t10 := y2 & y7
	t11 := t10 ~ t7
	t12 := y9 & y11
	t13 := y14 & y17
	t14 := t13 ~ t12
	t15 := y8 & y10
	t16 := t15 ~ t12
	t17 := t4 ~ t14
	t18 := t6 ~ t16
	t19 := t9 ~ t14
	t20 := t11 ~ t16
	t21 := t17 ~ y20
	t22 := t18 ~ y19
	t23 := t19 ~ y21
	t24 := t20 ~ y18

	t25 := t21 ~ t22
	t26 := t21 & t23
	t27 := t24 ~ t26
	t28 := t25 & t27
	t29 := t28 ~ t22
	t30 := t23 ~ t24
	t31 := t22 ~ t26
	t32 := t31 & t30
	t33 := t32 ~ t24
	t34 := t23 ~ t33
	t35 := t27 ~ t33
	t36 := t24 & t35
	t37 := t36 ~ t34
	t38 := t27 ~ t36
	t39 := t29 & t38
	t40 := t25 ~ t39

	t41 := t40 ~ t37
	t42 := t29 ~ t33
	t43 := t29 ~ t40
	t44 := t33 ~ t37
	t45 := t42 ~ t41
	z0 := t44 & y15
	z1 := t37 & y6
	z2 := t33 & x7
	z3 := t43 & y16
	z4 := t40 & y1
	z5 := t29 & y7
	z6 := t42 & y11
	z7 := t45 & y17
	z8 := t41 & y10
	z9 := t44 & y12
	z10 := t37 & y3
	z11 := t33 & y4
	z12 := t43 & y13
	z13 := t40 & y5
	z14 := t29 & y2
	z15 := t42 & y9
	z16 := t45 & y14
	z17 := t41 & y8

	// Bottom linear transformation.
	t46 := z15 ~ z16
	t47 := z10 ~ z11
	t48 := z5 ~ z13
	t49 := z9 ~ z10
	t50 := z2 ~ z12
	t51 := z2 ~ z5
	t52 := z7 ~ z8
	t53 := z0 ~ z3
	t54 := z6 ~ z7
	t55 := z16 ~ z17
	t56 := z12 ~ t48
	t57 := t50 ~ t53
	t58 := z4 ~ t46
	t59 := z3 ~ t54
	t60 := t46 ~ t57
	t61 := z14 ~ t57
	t62 := t52 ~ t58
	t63 := t49 ~ t58
	t64 := z4 ~ t59
	t65 := t61 ~ t62
	t66 := z1 ~ t63
	s0 := t59 ~ t63
	s6 := t56 ~ ~t62
	s7 := t48 ~ ~t60
	t67 := t64 ~ t65
	s3 := t53 ~ t66
	s4 := t51 ~ t66
	s5 := t47 ~ t65
	s1 := t64 ~ ~s3
	s2 := t55 ~ ~t67

	q[7] = s0
	q[6] = s1
	q[5] = s2
	q[4] = s3
	q[3] = s4
	q[2] = s5
	q[1] = s6
	q[0] = s7
}

orthogonalize :: proc "contextless" (q: ^[8]u64) {
	CL2 :: 0x5555555555555555
	CH2 :: 0xAAAAAAAAAAAAAAAA
	q[0], q[1] = (q[0] & CL2) | ((q[1] & CL2) << 1), ((q[0] & CH2) >> 1) | (q[1] & CH2)
	q[2], q[3] = (q[2] & CL2) | ((q[3] & CL2) << 1), ((q[2] & CH2) >> 1) | (q[3] & CH2)
	q[4], q[5] = (q[4] & CL2) | ((q[5] & CL2) << 1), ((q[4] & CH2) >> 1) | (q[5] & CH2)
	q[6], q[7] = (q[6] & CL2) | ((q[7] & CL2) << 1), ((q[6] & CH2) >> 1) | (q[7] & CH2)

	CL4 :: 0x3333333333333333
	CH4 :: 0xCCCCCCCCCCCCCCCC
	q[0], q[2] = (q[0] & CL4) | ((q[2] & CL4) << 2), ((q[0] & CH4) >> 2) | (q[2] & CH4)
	q[1], q[3] = (q[1] & CL4) | ((q[3] & CL4) << 2), ((q[1] & CH4) >> 2) | (q[3] & CH4)
	q[4], q[6] = (q[4] & CL4) | ((q[6] & CL4) << 2), ((q[4] & CH4) >> 2) | (q[6] & CH4)
	q[5], q[7] = (q[5] & CL4) | ((q[7] & CL4) << 2), ((q[5] & CH4) >> 2) | (q[7] & CH4)

	CL8 :: 0x0F0F0F0F0F0F0F0F
	CH8 :: 0xF0F0F0F0F0F0F0F0
	q[0], q[4] = (q[0] & CL8) | ((q[4] & CL8) << 4), ((q[0] & CH8) >> 4) | (q[4] & CH8)
	q[1], q[5] = (q[1] & CL8) | ((q[5] & CL8) << 4), ((q[1] & CH8) >> 4) | (q[5] & CH8)
	q[2], q[6] = (q[2] & CL8) | ((q[6] & CL8) << 4), ((q[2] & CH8) >> 4) | (q[6] & CH8)
	q[3], q[7] = (q[3] & CL8) | ((q[7] & CL8) << 4), ((q[3] & CH8) >> 4) | (q[7] & CH8)
}

@(require_results)
interleave_in :: proc "contextless" (w0, w1, w2, w3: u32) -> (q0, q1: u64) #no_bounds_check {
	x0, x1, x2, x3 := u64(w0), u64(w1), u64(w2), u64(w3)
	x0 |= (x0 << 16)
	x1 |= (x1 << 16)
	x2 |= (x2 << 16)
	x3 |= (x3 << 16)
	x0 &= 0x0000FFFF0000FFFF
	x1 &= 0x0000FFFF0000FFFF
	x2 &= 0x0000FFFF0000FFFF
	x3 &= 0x0000FFFF0000FFFF
	x0 |= (x0 << 8)
	x1 |= (x1 << 8)
	x2 |= (x2 << 8)
	x3 |= (x3 << 8)
	x0 &= 0x00FF00FF00FF00FF
	x1 &= 0x00FF00FF00FF00FF
	x2 &= 0x00FF00FF00FF00FF
	x3 &= 0x00FF00FF00FF00FF
	q0 = x0 | (x2 << 8)
	q1 = x1 | (x3 << 8)
	return
}

@(require_results)
interleave_out :: proc "contextless" (q0, q1: u64) -> (w0, w1, w2, w3: u32) {
	x0 := q0 & 0x00FF00FF00FF00FF
	x1 := q1 & 0x00FF00FF00FF00FF
	x2 := (q0 >> 8) & 0x00FF00FF00FF00FF
	x3 := (q1 >> 8) & 0x00FF00FF00FF00FF
	x0 |= (x0 >> 8)
	x1 |= (x1 >> 8)
	x2 |= (x2 >> 8)
	x3 |= (x3 >> 8)
	x0 &= 0x0000FFFF0000FFFF
	x1 &= 0x0000FFFF0000FFFF
	x2 &= 0x0000FFFF0000FFFF
	x3 &= 0x0000FFFF0000FFFF
	w0 = u32(x0) | u32(x0 >> 16)
	w1 = u32(x1) | u32(x1 >> 16)
	w2 = u32(x2) | u32(x2 >> 16)
	w3 = u32(x3) | u32(x3 >> 16)
	return
}

@(private)
rotr32 :: #force_inline proc "contextless" (x: u64) -> u64 {
	return (x << 32) | (x >> 32)
}