1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
|
package runtime
import "base:intrinsics"
// This is an implementation of the Chacha8Rand DRBG, as specified
// in https://github.com/C2SP/C2SP/blob/main/chacha8rand.md
//
// There is a tradeoff to be made between state-size and performance,
// in terms of the amount of rng output buffered.
//
// The sensible buffer sizes are:
// - 256-bytes: 128-bit SIMD with 16x vector registers (SSE2)
// - 512-bytes: 128-bit SIMD with 32x vector registers (ARMv8),
// 256-bit SIMD with 16x vector registers (AVX2),
// - 1024-bytes: AVX-512
//
// Notes:
// - Smaller than 256-bytes is possible but would require redundant
// calls to the ChaCha8 function, which is prohibitively expensive.
// - Larger than 1024-bytes is possible but pointless as the construct
// is defined around 992-bytes of RNG output and 32-bytes of input
// per iteration.
//
// This implementation opts for a 1024-byte buffer for simplicity,
// under the rationale that modern extremely memory constrained targets
// provide suitable functionality in hardware, and the language makes
// supporting the various SIMD flavors easy.
@(private = "file")
RNG_SEED_SIZE :: 32
@(private)
RNG_OUTPUT_PER_ITER :: 1024 - RNG_SEED_SIZE
@(private)
CHACHA_SIGMA_0: u32 : 0x61707865
@(private)
CHACHA_SIGMA_1: u32 : 0x3320646e
@(private)
CHACHA_SIGMA_2: u32 : 0x79622d32
@(private)
CHACHA_SIGMA_3: u32 : 0x6b206574
@(private)
CHACHA_ROUNDS :: 8
Default_Random_State :: struct {
_buf: [1024]byte,
_off: int,
_seeded: bool,
}
@(require_results)
default_random_generator :: proc "contextless" (state: ^Default_Random_State = nil) -> Random_Generator {
return {
procedure = default_random_generator_proc,
data = state,
}
}
default_random_generator_proc :: proc(data: rawptr, mode: Random_Generator_Mode, p: []byte) {
@(thread_local)
state: Default_Random_State
r: ^Default_Random_State = &state
if data != nil {
r = cast(^Default_Random_State)data
}
next_seed := r._buf[RNG_OUTPUT_PER_ITER:]
switch mode {
case .Read:
if !r._seeded { // Unlikely.
rand_bytes(next_seed)
r._off = RNG_OUTPUT_PER_ITER // Force refill.
r._seeded = true
}
assert(r._off <= RNG_OUTPUT_PER_ITER, "chacha8rand/BUG: outputed key material")
if r._off >= RNG_OUTPUT_PER_ITER { // Unlikely.
chacha8rand_refill(r)
}
// We are guaranteed to have at least some RNG output buffered.
//
// As an invariant each read will consume a multiple of 8-bytes
// of output at a time.
assert(r._off <= RNG_OUTPUT_PER_ITER - 8, "chacha8rand/BUG: less than 8-bytes of output available")
assert(r._off % 8 == 0, "chacha8rand/BUG: buffered output is not a multiple of 8-bytes")
p_len := len(p)
if p_len == size_of(u64) {
#no_bounds_check {
// Fast path for a 64-bit destination.
src := (^u64)(raw_data(r._buf[r._off:]))
intrinsics.unaligned_store((^u64)(raw_data(p)), src^)
src^ = 0 // Erasure (backtrack resistance)
r._off += 8
}
return
}
p_ := p
for remaining := p_len; remaining > 0; {
sz := min(remaining, RNG_OUTPUT_PER_ITER - r._off)
#no_bounds_check {
copy(p_[:sz], r._buf[r._off:])
p_ = p_[sz:]
remaining -= sz
}
rounded_sz := ((sz + 7) / 8) * 8
new_off := r._off + rounded_sz
#no_bounds_check if new_off < RNG_OUTPUT_PER_ITER {
// Erasure (backtrack resistance)
intrinsics.mem_zero(raw_data(r._buf[r._off:]), rounded_sz)
r._off = new_off
} else {
// Can omit erasure since we are overwriting the entire
// buffer.
chacha8rand_refill(r)
}
}
case .Reset:
// If no seed is passed, the next call to .Read will attempt to
// reseed from the system entropy source.
if len(p) == 0 {
r._seeded = false
return
}
// The cryptographic security of the output depends entirely
// on the quality of the entropy in the seed, we will allow
// re-seeding (as it makes testing easier), but callers that
// decide to provide arbitrary seeds are on their own as far
// as ensuring high-quality entropy.
intrinsics.mem_zero(raw_data(next_seed), RNG_SEED_SIZE)
copy(next_seed, p)
r._seeded = true
r._off = RNG_OUTPUT_PER_ITER // Force a refill.
case .Query_Info:
if len(p) != size_of(Random_Generator_Query_Info) {
return
}
info := (^Random_Generator_Query_Info)(raw_data(p))
info^ += {.Uniform, .Cryptographic, .Resettable}
}
}
@(private = "file")
chacha8rand_refill :: proc(r: ^Default_Random_State) {
assert(r._seeded == true, "chacha8rand/BUG: unseeded refill")
// i386 has insufficient vector registers to use the
// accelerated path at the moment.
when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
chacha8rand_refill_simd256(r)
} else when HAS_HARDWARE_SIMD && ODIN_ARCH != .i386 {
chacha8rand_refill_simd128(r)
} else {
chacha8rand_refill_ref(r)
}
r._off = 0
}
|