Merge pull request #3229 from Yawning/feature/moar-crypto

core/crypto: More improvements/additions
author: gingerBill <gingerBill@users.noreply.github.com> 2024-03-06 14:49:15 +0000
committer: GitHub <noreply@github.com> 2024-03-06 14:49:15 +0000
commit: 703eab2f15b959797dfa1d81c4e9bc37ec00ff80 (patch)
tree: 2086dfbe6da4f4f502a87c63f81b91ec1ff905a7 /core/crypto
parent: 04bfc926eedbdee2276748365056e8fa44be8184 (diff)
parent: c044e295ce4a36eea5a58b21a16d3c2a8a792d26 (diff)
14 files changed, 730 insertions, 86 deletions
diff --git a/core/crypto/_sha3/sha3.odin b/core/crypto/_sha3/sha3.odin
index 6779c9770..2db76fce0 100644
--- a/core/crypto/_sha3/sha3.odin
+++ b/core/crypto/_sha3/sha3.odin
@@ -7,8 +7,12 @@ package _sha3
     List of contributors:
         zhibog, dotbmp:  Initial implementation.
 
-    Implementation of the Keccak hashing algorithm, standardized as SHA3 in <https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf>
-    To use the original Keccak padding, set the is_keccak bool to true, otherwise it will use SHA3 padding.
+    Implementation of the Keccak hashing algorithm, standardized as SHA3
+    in <https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf>.
+
+    As the only difference between the legacy Keccak and SHA3 is the domain
+    separation byte, set dsbyte to the appropriate value to pick the desired
+    algorithm.
 */
 
 import "core:math/bits"
@@ -16,47 +20,56 @@ import "core:mem"
 
 ROUNDS :: 24
 
+RATE_128 :: 1344 / 8 // ONLY for SHAKE128.
 RATE_224 :: 1152 / 8
 RATE_256 :: 1088 / 8
 RATE_384 :: 832 / 8
 RATE_512 :: 576 / 8
 
+DS_KECCAK :: 0x01
+DS_SHA3 :: 0x06
+DS_SHAKE :: 0x1f
+DS_CSHAKE :: 0x04
+
 Context :: struct {
-	st:        struct #raw_union {
+	st:             struct #raw_union {
 		b: [200]u8,
 		q: [25]u64,
 	},
-	pt:        int,
-	rsiz:      int,
-	mdlen:     int,
-	is_keccak: bool,
-
+	pt:             int,
+	rsiz:           int,
+	mdlen:          int,
+	dsbyte:         byte,
 	is_initialized: bool,
 	is_finalized:   bool, // For SHAKE (unlimited squeeze is allowed)
 }
 
-keccakf :: proc "contextless" (st: ^[25]u64) {
-	keccakf_rndc := [?]u64 {
-		0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
-		0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
-		0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
-		0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
-		0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
-		0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
-		0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
-		0x8000000000008080, 0x0000000080000001, 0x8000000080008008,
-	}
+@(private)
+keccakf_rndc := [?]u64 {
+	0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
+	0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
+	0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
+	0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
+	0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
+	0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
+	0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
+	0x8000000000008080, 0x0000000080000001, 0x8000000080008008,
+}
 
-	keccakf_rotc := [?]int {
-		1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
-		27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
-	}
+@(private)
+keccakf_rotc := [?]int {
+	1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
+	27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
+}
 
-	keccakf_piln := [?]i32 {
-		10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
-		15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
-	}
+@(private)
+keccakf_piln := [?]i32 {
+	10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
+	15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
+}
 
+@(private)
+keccakf :: proc "contextless" (st: ^[25]u64) {
 	i, j, r: i32 = ---, ---, ---
 	t: u64 = ---
 	bc: [5]u64 = ---
@@ -140,9 +153,6 @@ final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
 	assert(ctx.is_initialized)
 
 	if len(hash) < ctx.mdlen {
-		if ctx.is_keccak {
-			panic("crypto/keccac: invalid destination digest size")
-		}
 		panic("crypto/sha3: invalid destination digest size")
 	}
 
@@ -152,13 +162,9 @@ final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
 		clone(&tmp_ctx, ctx)
 		ctx = &tmp_ctx
 	}
-	defer(reset(ctx))
+	defer (reset(ctx))
 
-	if ctx.is_keccak {
-		ctx.st.b[ctx.pt] ~= 0x01
-	} else {
-		ctx.st.b[ctx.pt] ~= 0x06
-	}
+	ctx.st.b[ctx.pt] ~= ctx.dsbyte
 
 	ctx.st.b[ctx.rsiz - 1] ~= 0x80
 	keccakf(&ctx.st.q)
@@ -183,7 +189,7 @@ shake_xof :: proc(ctx: ^Context) {
 	assert(ctx.is_initialized)
 	assert(!ctx.is_finalized)
 
-	ctx.st.b[ctx.pt] ~= 0x1F
+	ctx.st.b[ctx.pt] ~= ctx.dsbyte
 	ctx.st.b[ctx.rsiz - 1] ~= 0x80
 	keccakf(&ctx.st.q)
 	ctx.pt = 0
diff --git a/core/crypto/_sha3/sp800_185.odin b/core/crypto/_sha3/sp800_185.odin
new file mode 100644
index 000000000..f32398d5c
--- /dev/null
+++ b/core/crypto/_sha3/sp800_185.odin
@@ -0,0 +1,145 @@
+package _sha3
+
+import "core:encoding/endian"
+import "core:math/bits"
+
+init_cshake :: proc(ctx: ^Context, n, s: []byte, sec_strength: int) {
+	ctx.mdlen = sec_strength / 8
+
+	// No domain separator is equivalent to vanilla SHAKE.
+	if len(n) == 0 && len(s) == 0 {
+		ctx.dsbyte = DS_SHAKE
+		init(ctx)
+		return
+	}
+
+	ctx.dsbyte = DS_CSHAKE
+	init(ctx)
+	bytepad(ctx, [][]byte{n, s}, rate_cshake(sec_strength))
+}
+
+final_cshake :: proc(ctx: ^Context, dst: []byte, finalize_clone: bool = false) {
+	ctx := ctx
+	if finalize_clone {
+		tmp_ctx: Context
+		clone(&tmp_ctx, ctx)
+		ctx = &tmp_ctx
+	}
+	defer reset(ctx)
+
+	encode_byte_len(ctx, len(dst), false) // right_encode
+	shake_xof(ctx)
+	shake_out(ctx, dst)
+}
+
+rate_cshake :: #force_inline proc(sec_strength: int) -> int {
+	switch sec_strength {
+	case 128:
+		return RATE_128
+	case 256:
+		return RATE_256
+	}
+
+	panic("crypto/sha3: invalid security strength")
+}
+
+// right_encode and left_encode are defined to support 0 <= x < 2^2040
+// however, the largest value we will ever need to encode is `max(int) * 8`.
+//
+// This is unfortunate as the extreme upper edge is larger than
+// `max(u64)`.  While such values are impractical at present,
+// they are possible (ie: https://arxiv.org/pdf/quant-ph/9908043.pdf).
+//
+// Thus we support 0 <= x < 2^128.
+
+@(private)
+_PAD: [RATE_128]byte // Biggest possible value of w per spec.
+
+bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) {
+	// 1. z = left_encode(w) || X.
+	z_hi: u64
+	z_lo := left_right_encode(ctx, 0, u64(w), true)
+	for x in x_strings {
+		// All uses of bytepad in SP 800-185 use the output from
+		// one or more encode_string values for `X`.
+		hi, lo := encode_string(ctx, x)
+
+		carry: u64
+		z_lo, carry = bits.add_u64(z_lo, lo, 0)
+		z_hi, carry = bits.add_u64(z_hi, hi, carry)
+
+		// This isn't actually possible, at least with the currently
+		// defined SP 800-185 routines.
+		if carry != 0 {
+			panic("crypto/sha3: bytepad input length overflow")
+		}
+	}
+
+	// We skip this step as we are doing a byte-oriented implementation
+	// rather than a bit oriented one.
+	//
+	// 2. while len(z) mod 8 ≠ 0:
+	//    z = z || 0
+
+	// 3. while (len(z)/8) mod w ≠ 0:
+	//    z = z || 00000000
+	z_len := u128(z_hi) << 64 | u128(z_lo)
+	z_rem := int(z_len % u128(w))
+	pad := _PAD[:w - z_rem]
+
+	// We just add the padding to the state, instead of returning z.
+	//
+	// 4. return z.
+	update(ctx, pad)
+}
+
+encode_string :: #force_inline proc(ctx: ^Context, s: []byte) -> (u64, u64) {
+	l := encode_byte_len(ctx, len(s), true) // left_encode
+	update(ctx, s)
+
+	lo, hi := bits.add_u64(l, u64(len(s)), 0)
+
+	return hi, lo
+}
+
+encode_byte_len :: #force_inline proc(ctx: ^Context, l: int, is_left: bool) -> u64 {
+	hi, lo := bits.mul_u64(u64(l), 8)
+	return left_right_encode(ctx, hi, lo, is_left)
+}
+
+@(private)
+left_right_encode :: proc(ctx: ^Context, hi, lo: u64, is_left: bool) -> u64 {
+	HI_OFFSET :: 1
+	LO_OFFSET :: HI_OFFSET + 8
+	RIGHT_OFFSET :: LO_OFFSET + 8
+	BUF_LEN :: RIGHT_OFFSET + 1
+
+	buf: [BUF_LEN]byte // prefix + largest uint + postfix
+
+	endian.unchecked_put_u64be(buf[HI_OFFSET:], hi)
+	endian.unchecked_put_u64be(buf[LO_OFFSET:], lo)
+
+	// 2. Strip leading `0x00` bytes.
+	off: int
+	for off = HI_OFFSET; off < RIGHT_OFFSET - 1; off = off + 1 {// Note: Minimum size is 1, not 0.
+		if buf[off] != 0 {
+			break
+		}
+	}
+	n := byte(RIGHT_OFFSET - off)
+
+	// 3. Prefix (left_encode) or postfix (right_encode) the length in bytes.
+	b: []byte
+	switch is_left {
+	case true:
+		buf[off - 1] = n // n | x
+		b = buf[off - 1:RIGHT_OFFSET]
+	case false:
+		buf[RIGHT_OFFSET] = n // x | n
+		b = buf[off:]
+	}
+
+	update(ctx, b)
+
+	return u64(len(b))
+}
diff --git a/core/crypto/chacha20/chacha20.odin b/core/crypto/chacha20/chacha20.odin
index 43b3303c2..7f0950d03 100644
--- a/core/crypto/chacha20/chacha20.odin
+++ b/core/crypto/chacha20/chacha20.odin
@@ -1,11 +1,21 @@
+/*
+package chacha20 implements the ChaCha20 and XChaCha20 stream ciphers.
+
+See:
+- https://datatracker.ietf.org/doc/html/rfc8439
+- https://datatracker.ietf.org/doc/draft-irtf-cfrg-xchacha/03/
+*/
 package chacha20
 
 import "core:encoding/endian"
 import "core:math/bits"
 import "core:mem"
 
+// KEY_SIZE is the (X)ChaCha20 key size in bytes.
 KEY_SIZE :: 32
+// NONCE_SIZE is the ChaCha20 nonce size in bytes.
 NONCE_SIZE :: 12
+// XNONCE_SIZE is the XChaCha20 nonce size in bytes.
 XNONCE_SIZE :: 24
 
 @(private)
@@ -19,25 +29,26 @@ _STATE_SIZE_U32 :: 16
 _ROUNDS :: 20
 
 @(private)
-_SIGMA_0 : u32 : 0x61707865
+_SIGMA_0: u32 : 0x61707865
 @(private)
-_SIGMA_1 : u32 : 0x3320646e
+_SIGMA_1: u32 : 0x3320646e
 @(private)
-_SIGMA_2 : u32 : 0x79622d32
+_SIGMA_2: u32 : 0x79622d32
 @(private)
-_SIGMA_3 : u32 : 0x6b206574
+_SIGMA_3: u32 : 0x6b206574
 
+// Context is a ChaCha20 or XChaCha20 instance.
 Context :: struct {
-	_s: [_STATE_SIZE_U32]u32,
-
-	_buffer: [_BLOCK_SIZE]byte,
-	_off: int,
-
+	_s:              [_STATE_SIZE_U32]u32,
+	_buffer:         [_BLOCK_SIZE]byte,
+	_off:            int,
 	_is_ietf_flavor: bool,
 	_is_initialized: bool,
 }
 
-init :: proc (ctx: ^Context, key, nonce: []byte) {
+// init inititializes a Context for ChaCha20 or XChaCha20 with the provided
+// key and nonce.
+init :: proc(ctx: ^Context, key, nonce: []byte) {
 	if len(key) != KEY_SIZE {
 		panic("crypto/chacha20: invalid ChaCha20 key size")
 	}
@@ -89,7 +100,8 @@ init :: proc (ctx: ^Context, key, nonce: []byte) {
 	ctx._is_initialized = true
 }
 
-seek :: proc (ctx: ^Context, block_nr: u64) {
+// seek seeks the (X)ChaCha20 stream counter to the specified block.
+seek :: proc(ctx: ^Context, block_nr: u64) {
 	assert(ctx._is_initialized)
 
 	if ctx._is_ietf_flavor {
@@ -103,7 +115,10 @@ seek :: proc (ctx: ^Context, block_nr: u64) {
 	ctx._off = _BLOCK_SIZE
 }
 
-xor_bytes :: proc (ctx: ^Context, dst, src: []byte) {
+// xor_bytes XORs each byte in src with bytes taken from the (X)ChaCha20
+// keystream, and writes the resulting output to dst.  Dst and src MUST
+// alias exactly or not at all.
+xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
 	assert(ctx._is_initialized)
 
 	// TODO: Enforcing that dst and src alias exactly or not at all
@@ -147,7 +162,8 @@ xor_bytes :: proc (ctx: ^Context, dst, src: []byte) {
 	}
 }
 
-keystream_bytes :: proc (ctx: ^Context, dst: []byte) {
+// keystream_bytes fills dst with the raw (X)ChaCha20 keystream output.
+keystream_bytes :: proc(ctx: ^Context, dst: []byte) {
 	assert(ctx._is_initialized)
 
 	dst := dst
@@ -180,7 +196,9 @@ keystream_bytes :: proc (ctx: ^Context, dst: []byte) {
 	}
 }
 
-reset :: proc (ctx: ^Context) {
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
 	mem.zero_explicit(&ctx._s, size_of(ctx._s))
 	mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
 
@@ -188,7 +206,7 @@ reset :: proc (ctx: ^Context) {
 }
 
 @(private)
-_do_blocks :: proc (ctx: ^Context, dst, src: []byte, nr_blocks: int) {
+_do_blocks :: proc(ctx: ^Context, dst, src: []byte, nr_blocks: int) {
 	// Enforce the maximum consumed keystream per nonce.
 	//
 	// While all modern "standard" definitions of ChaCha20 use
diff --git a/core/crypto/chacha20poly1305/chacha20poly1305.odin b/core/crypto/chacha20poly1305/chacha20poly1305.odin
index 86fe54e79..7fc112d0d 100644
--- a/core/crypto/chacha20poly1305/chacha20poly1305.odin
+++ b/core/crypto/chacha20poly1305/chacha20poly1305.odin
@@ -1,3 +1,10 @@
+/*
+package chacha20poly1305 implements the AEAD_CHACHA20_POLY1305 Authenticated
+Encryption with Additional Data algorithm.
+
+See:
+- https://www.rfc-editor.org/rfc/rfc8439
+*/
 package chacha20poly1305
 
 import "core:crypto"
@@ -6,8 +13,11 @@ import "core:crypto/poly1305"
 import "core:encoding/endian"
 import "core:mem"
 
+// KEY_SIZE is the chacha20poly1305 key size in bytes.
 KEY_SIZE :: chacha20.KEY_SIZE
+// NONCE_SIZE is the chacha20poly1305 nonce size in bytes.
 NONCE_SIZE :: chacha20.NONCE_SIZE
+// TAG_SIZE is the chacha20poly1305 tag size in bytes.
 TAG_SIZE :: poly1305.TAG_SIZE
 
 @(private)
@@ -49,6 +59,8 @@ _update_mac_pad16 :: #force_inline proc (ctx: ^poly1305.Context, x_len: int) {
 	}
 }
 
+// encrypt encrypts the plaintext and authenticates the aad and ciphertext,
+// with the provided key and nonce, stores the output in ciphertext and tag.
 encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
 	_validate_common_slice_sizes(tag, key, nonce, aad, plaintext)
 	if len(ciphertext) != len(plaintext) {
@@ -95,6 +107,11 @@ encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
 	poly1305.final(&mac_ctx, tag) // Implicitly sanitizes context.
 }
 
+// decrypt authenticates the aad and ciphertext, and decrypts the ciphertext,
+// with the provided key, nonce, and tag, and stores the output in plaintext,
+// returning true iff the authentication was successful.
+//
+// If authentication fails, the destination plaintext buffer will be zeroed.
 decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
 	_validate_common_slice_sizes(tag, key, nonce, aad, ciphertext)
 	if len(ciphertext) != len(plaintext) {
diff --git a/core/crypto/hkdf/hkdf.odin b/core/crypto/hkdf/hkdf.odin
new file mode 100644
index 000000000..2ac67476e
--- /dev/null
+++ b/core/crypto/hkdf/hkdf.odin
@@ -0,0 +1,103 @@
+/*
+package hkdf implements the HKDF HMAC-based Extract-and-Expand Key
+Derivation Function.
+
+See: https://www.rfc-editor.org/rfc/rfc5869
+*/
+package hkdf
+
+import "core:crypto/hash"
+import "core:crypto/hmac"
+import "core:mem"
+
+// extract_and_expand derives output keying material (OKM) via the
+// HKDF-Extract and HKDF-Expand algorithms, with the specified has
+// function, salt, input keying material (IKM), and optional info.
+// The dst buffer must be less-than-or-equal to 255 HMAC tags.
+extract_and_expand :: proc(algorithm: hash.Algorithm, salt, ikm, info, dst: []byte) {
+	h_len := hash.DIGEST_SIZES[algorithm]
+
+	tmp: [hash.MAX_DIGEST_SIZE]byte
+	prk := tmp[:h_len]
+	defer mem.zero_explicit(raw_data(prk), h_len)
+
+	extract(algorithm, salt, ikm, prk)
+	expand(algorithm, prk, info, dst)
+}
+
+// extract derives a pseudorandom key (PRK) via the HKDF-Extract algorithm,
+// with the specified hash function, salt, and input keying material (IKM).
+// It requires that the dst buffer be the HMAC tag size for the specified
+// hash function.
+extract :: proc(algorithm: hash.Algorithm, salt, ikm, dst: []byte) {
+	// PRK = HMAC-Hash(salt, IKM)
+	hmac.sum(algorithm, dst, ikm, salt)
+}
+
+// expand derives output keying material (OKM) via the HKDF-Expand algorithm,
+// with the specified hash function, pseudorandom key (PRK), and optional
+// info.  The dst buffer must be less-than-or-equal to 255 HMAC tags.
+expand :: proc(algorithm: hash.Algorithm, prk, info, dst: []byte) {
+	h_len := hash.DIGEST_SIZES[algorithm]
+
+	// (<= 255*HashLen)
+	dk_len := len(dst)
+	switch {
+	case dk_len == 0:
+		return
+	case dk_len > h_len * 255:
+		panic("crypto/hkdf: derived key too long")
+	case:
+	}
+
+	// The output OKM is calculated as follows:
+	//
+	// N = ceil(L/HashLen)
+	// T = T(1) | T(2) | T(3) | ... | T(N)
+	// OKM = first L octets of T
+	//
+	// where:
+	// T(0) = empty string (zero length)
+	// T(1) = HMAC-Hash(PRK, T(0) | info | 0x01)
+	// T(2) = HMAC-Hash(PRK, T(1) | info | 0x02)
+	// T(3) = HMAC-Hash(PRK, T(2) | info | 0x03)
+	// ...
+
+	n := dk_len / h_len
+	r := dk_len % h_len
+
+	base: hmac.Context
+	defer hmac.reset(&base)
+
+	hmac.init(&base, algorithm, prk)
+
+	dst_blk := dst
+	prev: []byte
+
+	for i in 1 ..= n {
+		_F(&base, prev, info, i, dst_blk[:h_len])
+
+		prev = dst_blk[:h_len]
+		dst_blk = dst_blk[h_len:]
+	}
+
+	if r > 0 {
+		tmp: [hash.MAX_DIGEST_SIZE]byte
+		blk := tmp[:h_len]
+		defer mem.zero_explicit(raw_data(blk), h_len)
+
+		_F(&base, prev, info, n + 1, blk)
+		copy(dst_blk, blk)
+	}
+}
+
+@(private)
+_F :: proc(base: ^hmac.Context, prev, info: []byte, i: int, dst_blk: []byte) {
+	prf: hmac.Context
+
+	hmac.clone(&prf, base)
+	hmac.update(&prf, prev)
+	hmac.update(&prf, info)
+	hmac.update(&prf, []byte{u8(i)})
+	hmac.final(&prf, dst_blk)
+}
diff --git a/core/crypto/hmac/hmac.odin b/core/crypto/hmac/hmac.odin
index f720d2181..6aac8fca7 100644
--- a/core/crypto/hmac/hmac.odin
+++ b/core/crypto/hmac/hmac.odin
@@ -11,7 +11,7 @@ import "core:crypto/hash"
 import "core:mem"
 
 // sum will compute the HMAC with the specified algorithm and key
-// over msg, and write the computed digest to dst.  It requires that
+// over msg, and write the computed tag to dst.  It requires that
 // the dst buffer is the tag size.
 sum :: proc(algorithm: hash.Algorithm, dst, msg, key: []byte) {
 	ctx: Context
@@ -78,6 +78,18 @@ final :: proc(ctx: ^Context, dst: []byte) {
 	hash.final(&ctx._o_hash, dst)
 }
 
+// clone clones the Context other into ctx.
+clone :: proc(ctx, other: ^Context) {
+	if ctx == other {
+		return
+	}
+
+	hash.clone(&ctx._o_hash, &other._o_hash)
+	hash.clone(&ctx._i_hash, &other._i_hash)
+	ctx._tag_sz = other._tag_sz
+	ctx._is_initialized = other._is_initialized
+}
+
 // reset sanitizes the Context.  The Context must be re-initialized to
 // be used again.
 reset :: proc(ctx: ^Context) {
diff --git a/core/crypto/kmac/kmac.odin b/core/crypto/kmac/kmac.odin
new file mode 100644
index 000000000..e5be6f91b
--- /dev/null
+++ b/core/crypto/kmac/kmac.odin
@@ -0,0 +1,116 @@
+/*
+package kmac implements the KMAC MAC algorithm.
+
+See:
+- https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf
+*/
+package kmac
+
+import "../_sha3"
+import "core:crypto"
+import "core:crypto/shake"
+
+// MIN_KEY_SIZE_128 is the minimum key size for KMAC128 in bytes.
+MIN_KEY_SIZE_128 :: 128 / 8
+// MIN_KEY_SIZE_256 is the minimum key size for KMAC256 in bytes.
+MIN_KEY_SIZE_256 :: 256 / 8
+
+// MIN_TAG_SIZE is the absolute minimum tag size for KMAC in bytes (8.4.2).
+// Most callers SHOULD use at least 128-bits if not 256-bits for the tag
+// size.
+MIN_TAG_SIZE :: 32 / 8
+
+// sum will compute the KMAC with the specified security strength,
+// key, and domain separator over msg, and write the computed digest to
+// dst.
+sum :: proc(sec_strength: int, dst, msg, key, domain_sep: []byte) {
+	ctx: Context
+
+	_init_kmac(&ctx, key, domain_sep, sec_strength)
+	update(&ctx, msg)
+	final(&ctx, dst)
+}
+
+// verify will verify the KMAC tag computed with the specified security
+// strength, key and domain separator over msg and return true iff the
+// tag is valid.
+verify :: proc(sec_strength: int, tag, msg, key, domain_sep: []byte, allocator := context.temp_allocator) -> bool {
+	derived_tag := make([]byte, len(tag), allocator)
+
+	sum(sec_strength, derived_tag, msg, key, domain_sep)
+
+	return crypto.compare_constant_time(derived_tag, tag) == 1
+}
+
+// Context is a KMAC instance.
+Context :: distinct shake.Context
+
+// init_128 initializes a Context for KMAC28.  This routine will panic if
+// the key length is less than MIN_KEY_SIZE_128.
+init_128 :: proc(ctx: ^Context, key, domain_sep: []byte) {
+	_init_kmac(ctx, key, domain_sep, 128)
+}
+
+// init_256 initializes a Context for KMAC256.  This routine will panic if
+// the key length is less than MIN_KEY_SIZE_256.
+init_256 :: proc(ctx: ^Context, key, domain_sep: []byte) {
+	_init_kmac(ctx, key, domain_sep, 256)
+}
+
+// update adds more data to the Context.
+update :: proc(ctx: ^Context, data: []byte) {
+	assert(ctx.is_initialized)
+
+	shake.write(transmute(^shake.Context)(ctx), data)
+}
+
+// final finalizes the Context, writes the tag to dst, and calls reset
+// on the Context.  This routine will panic if the dst length is less than
+// MIN_TAG_SIZE.
+final :: proc(ctx: ^Context, dst: []byte) {
+	assert(ctx.is_initialized)
+	defer reset(ctx)
+
+	if len(dst) < MIN_TAG_SIZE {
+		panic("crypto/kmac: invalid KMAC tag_size, too short")
+	}
+
+	_sha3.final_cshake(transmute(^_sha3.Context)(ctx), dst)
+}
+
+// clone clones the Context other into ctx.
+clone :: proc(ctx, other: ^Context) {
+	if ctx == other {
+		return
+	}
+
+	shake.clone(transmute(^shake.Context)(ctx), transmute(^shake.Context)(other))
+}
+
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
+	if !ctx.is_initialized {
+		return
+	}
+
+	shake.reset(transmute(^shake.Context)(ctx))
+}
+
+@(private)
+_init_kmac :: proc(ctx: ^Context, key, s: []byte, sec_strength: int) {
+	if ctx.is_initialized {
+		reset(ctx)
+	}
+
+	if len(key) < sec_strength / 8 {
+		panic("crypto/kmac: invalid KMAC key, too short")
+	}
+
+	ctx_ := transmute(^_sha3.Context)(ctx)
+	_sha3.init_cshake(ctx_, N_KMAC, s, sec_strength)
+	_sha3.bytepad(ctx_, [][]byte{key}, _sha3.rate_cshake(sec_strength))
+}
+
+@(private)
+N_KMAC := []byte{'K', 'M', 'A', 'C'}
diff --git a/core/crypto/legacy/keccak/keccak.odin b/core/crypto/legacy/keccak/keccak.odin
index 00ad06ad9..7813a1ab4 100644
--- a/core/crypto/legacy/keccak/keccak.odin
+++ b/core/crypto/legacy/keccak/keccak.odin
@@ -65,7 +65,7 @@ init_512 :: proc(ctx: ^Context) {
 
 @(private)
 _init :: proc(ctx: ^Context) {
-	ctx.is_keccak = true
+	ctx.dsbyte = _sha3.DS_KECCAK
 	_sha3.init(transmute(^_sha3.Context)(ctx))
 }
 
diff --git a/core/crypto/pbkdf2/pbkdf2.odin b/core/crypto/pbkdf2/pbkdf2.odin
new file mode 100644
index 000000000..20e490135
--- /dev/null
+++ b/core/crypto/pbkdf2/pbkdf2.odin
@@ -0,0 +1,122 @@
+/*
+package pbkdf2 implements the PBKDF2 password-based key derivation function.
+
+See: https://www.rfc-editor.org/rfc/rfc2898
+*/
+package pbkdf2
+
+import "core:crypto/hash"
+import "core:crypto/hmac"
+import "core:encoding/endian"
+import "core:mem"
+
+// derive invokes PBKDF2-HMAC with the specified hash algorithm, password,
+// salt, iteration count, and outputs the derived key to dst.
+derive :: proc(
+	hmac_hash: hash.Algorithm,
+	password: []byte,
+	salt: []byte,
+	iterations: u32,
+	dst: []byte,
+) {
+	h_len := hash.DIGEST_SIZES[hmac_hash]
+
+	// 1. If dkLen > (2^32 - 1) * hLen, output "derived key too long"
+	// and stop.
+
+	dk_len := len(dst)
+	switch {
+	case dk_len == 0:
+		return
+	case u64(dk_len) > u64(max(u32)) * u64(h_len):
+		// This is so beyond anything that is practical or reasonable,
+		// so just panic instead of returning an error.
+		panic("crypto/pbkdf2: derived key too long")
+	case:
+	}
+
+	// 2. Let l be the number of hLen-octet blocks in the derived key,
+	// rounding up, and let r be the number of octets in the last block.
+
+	l := dk_len / h_len // Don't need to round up.
+	r := dk_len % h_len
+
+	// 3. For each block of the derived key apply the function F defined
+	// below to the password P, the salt S, the iteration count c, and
+	// the block index to compute the block.
+	//
+	// 4. Concatenate the blocks and extract the first dkLen octets to
+	// produce a derived key DK.
+	//
+	// 5. Output the derived key DK.
+
+	// Each iteration of F is always `PRF (P, ...)`, so instantiate the
+	// PRF, and clone since memcpy is faster than having to re-initialize
+	// HMAC repeatedly.
+
+	base: hmac.Context
+	defer hmac.reset(&base)
+
+	hmac.init(&base, hmac_hash, password)
+
+	// Process all of the blocks that will be written directly to dst.
+	dst_blk := dst
+	for i in 1 ..= l { 	// F expects i starting at 1.
+		_F(&base, salt, iterations, u32(i), dst_blk[:h_len])
+		dst_blk = dst_blk[h_len:]
+	}
+
+	// Instead of rounding l up, just proceass the one extra block iff
+	// r != 0.
+	if r > 0 {
+		tmp: [hash.MAX_DIGEST_SIZE]byte
+		blk := tmp[:h_len]
+		defer mem.zero_explicit(raw_data(blk), h_len)
+
+		_F(&base, salt, iterations, u32(l + 1), blk)
+		copy(dst_blk, blk)
+	}
+}
+
+@(private)
+_F :: proc(base: ^hmac.Context, salt: []byte, c: u32, i: u32, dst_blk: []byte) {
+	h_len := len(dst_blk)
+
+	tmp: [hash.MAX_DIGEST_SIZE]byte
+	u := tmp[:h_len]
+	defer mem.zero_explicit(raw_data(u), h_len)
+
+	// F (P, S, c, i) = U_1 \xor U_2 \xor ... \xor U_c
+	//
+	// where
+	//
+	// U_1 = PRF (P, S || INT (i)) ,
+	// U_2 = PRF (P, U_1) ,
+	// ...
+	// U_c = PRF (P, U_{c-1}) .
+	//
+	// Here, INT (i) is a four-octet encoding of the integer i, most
+	// significant octet first.
+
+	prf: hmac.Context
+
+	// U_1: PRF (P, S || INT (i))
+	hmac.clone(&prf, base)
+	hmac.update(&prf, salt)
+	endian.unchecked_put_u32be(u, i) // Use u as scratch space.
+	hmac.update(&prf, u[:4])
+	hmac.final(&prf, u)
+	copy(dst_blk, u)
+
+	// U_2 ... U_c: U_n = PRF (P, U_(n-1))
+	for _ in 1 ..< c {
+		hmac.clone(&prf, base)
+		hmac.update(&prf, u)
+		hmac.final(&prf, u)
+
+		// XOR dst_blk and u.
+		for v, i in u {
+			dst_blk[i] ~= v
+		}
+	}
+}
diff --git a/core/crypto/poly1305/poly1305.odin b/core/crypto/poly1305/poly1305.odin
index a2fb3c223..fa57c6c06 100644
--- a/core/crypto/poly1305/poly1305.odin
+++ b/core/crypto/poly1305/poly1305.odin
@@ -1,3 +1,9 @@
+/*
+package poly1305 implements the Poly1305 one-time MAC algorithm.
+
+See:
+- https://datatracker.ietf.org/doc/html/rfc8439
+*/
 package poly1305
 
 import "core:crypto"
@@ -5,13 +11,20 @@ import field "core:crypto/_fiat/field_poly1305"
 import "core:encoding/endian"
 import "core:mem"
 
+// KEY_SIZE is the Poly1305 key size in bytes.
 KEY_SIZE :: 32
+// TAG_SIZE is the Poly1305 tag size in bytes.
 TAG_SIZE :: 16
 
 @(private)
 _BLOCK_SIZE :: 16
 
-sum :: proc (dst, msg, key: []byte) {
+// sum will compute the Poly1305 MAC with the key over msg, and write
+// the computed tag to dst.  It requires that the dst buffer is the tag
+// size.
+//
+// The key SHOULD be unique and MUST be unpredictable for each invocation.
+sum :: proc(dst, msg, key: []byte) {
 	ctx: Context = ---
 
 	init(&ctx, key)
@@ -19,9 +32,12 @@ sum :: proc (dst, msg, key: []byte) {
 	final(&ctx, dst)
 }
 
-verify :: proc (tag, msg, key: []byte) -> bool {
+// verify will verify the Poly1305 tag computed with the key over msg and
+// return true iff the tag is valid.  It requires that the tag is correctly
+// sized.
+verify :: proc(tag, msg, key: []byte) -> bool {
 	ctx: Context = ---
-	derived_tag: [16]byte = ---
+	derived_tag: [TAG_SIZE]byte = ---
 
 	init(&ctx, key)
 	update(&ctx, msg)
@@ -30,18 +46,19 @@ verify :: proc (tag, msg, key: []byte) -> bool {
 	return crypto.compare_constant_time(derived_tag[:], tag) == 1
 }
 
+// Context is a Poly1305 instance.
 Context :: struct {
-	_r: field.Tight_Field_Element,
-	_a: field.Tight_Field_Element,
-	_s: field.Tight_Field_Element,
-
-	_buffer: [_BLOCK_SIZE]byte,
-	_leftover: int,
-
+	_r:              field.Tight_Field_Element,
+	_a:              field.Tight_Field_Element,
+	_s:              field.Tight_Field_Element,
+	_buffer:         [_BLOCK_SIZE]byte,
+	_leftover:       int,
 	_is_initialized: bool,
 }
 
-init :: proc (ctx: ^Context, key: []byte) {
+// init initializes a Context with the specified key.  The key SHOULD be
+// unique and MUST be unpredictable for each invocation.
+init :: proc(ctx: ^Context, key: []byte) {
 	if len(key) != KEY_SIZE {
 		panic("crypto/poly1305: invalid key size")
 	}
@@ -64,7 +81,8 @@ init :: proc (ctx: ^Context, key: []byte) {
 	ctx._is_initialized = true
 }
 
-update :: proc (ctx: ^Context, data: []byte) {
+// update adds more data to the Context.
+update :: proc(ctx: ^Context, data: []byte) {
 	assert(ctx._is_initialized)
 
 	msg := data
@@ -101,8 +119,11 @@ update :: proc (ctx: ^Context, data: []byte) {
 	}
 }
 
-final :: proc (ctx: ^Context, dst: []byte) {
+// final finalizes the Context, writes the tag to dst, and calls
+// reset on the Context.
+final :: proc(ctx: ^Context, dst: []byte) {
 	assert(ctx._is_initialized)
+	defer reset(ctx)
 
 	if len(dst) != TAG_SIZE {
 		panic("poly1305: invalid destination tag size")
@@ -125,11 +146,11 @@ final :: proc (ctx: ^Context, dst: []byte) {
 	tmp: [32]byte = ---
 	field.fe_to_bytes(&tmp, &ctx._a)
 	copy_slice(dst, tmp[0:16])
-
-	reset(ctx)
 }
 
-reset :: proc (ctx: ^Context) {
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
 	mem.zero_explicit(&ctx._r, size_of(ctx._r))
 	mem.zero_explicit(&ctx._a, size_of(ctx._a))
 	mem.zero_explicit(&ctx._s, size_of(ctx._s))
@@ -139,7 +160,7 @@ reset :: proc (ctx: ^Context) {
 }
 
 @(private)
-_blocks :: proc (ctx: ^Context, msg: []byte, final := false) {
+_blocks :: proc(ctx: ^Context, msg: []byte, final := false) {
 	n: field.Tight_Field_Element = ---
 	final_byte := byte(!final)
 
diff --git a/core/crypto/sha3/sha3.odin b/core/crypto/sha3/sha3.odin
index 87ff9c9cb..bc3e6e846 100644
--- a/core/crypto/sha3/sha3.odin
+++ b/core/crypto/sha3/sha3.odin
@@ -67,6 +67,7 @@ init_512 :: proc(ctx: ^Context) {
 
 @(private)
 _init :: proc(ctx: ^Context) {
+	ctx.dsbyte = _sha3.DS_SHA3
 	_sha3.init(transmute(^_sha3.Context)(ctx))
 }
 
diff --git a/core/crypto/shake/shake.odin b/core/crypto/shake/shake.odin
index 072204800..7da427485 100644
--- a/core/crypto/shake/shake.odin
+++ b/core/crypto/shake/shake.odin
@@ -1,10 +1,11 @@
 /*
-package shake implements the SHAKE XOF algorithm family.
+package shake implements the SHAKE and cSHAKE XOF algorithm families.
 
 The SHA3 hash algorithm can be found in the crypto/sha3.
 
 See:
 - https://nvlpubs.nist.gov/nistpubs/fips/nist.fips.202.pdf
+- https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf
 */
 package shake
 
@@ -18,24 +19,27 @@ package shake
 
 import "../_sha3"
 
-// Context is a SHAKE128 or SHAKE256 instance.
+// Context is a SHAKE128, SHAKE256, cSHAKE128, or cSHAKE256 instance.
 Context :: distinct _sha3.Context
 
 // init_128 initializes a Context for SHAKE128.
 init_128 :: proc(ctx: ^Context) {
-	ctx.mdlen = 128 / 8
-	_init(ctx)
+	_sha3.init_cshake(transmute(^_sha3.Context)(ctx), nil, nil, 128)
 }
 
 // init_256 initializes a Context for SHAKE256.
 init_256 :: proc(ctx: ^Context) {
-	ctx.mdlen = 256 / 8
-	_init(ctx)
+	_sha3.init_cshake(transmute(^_sha3.Context)(ctx), nil, nil, 256)
 }
 
-@(private)
-_init :: proc(ctx: ^Context) {
-	_sha3.init(transmute(^_sha3.Context)(ctx))
+// init_cshake_128 initializes a Context for cSHAKE128.
+init_cshake_128 :: proc(ctx: ^Context, domain_sep: []byte) {
+	_sha3.init_cshake(transmute(^_sha3.Context)(ctx), nil, domain_sep, 128)
+}
+
+// init_cshake_256 initializes a Context for cSHAKE256.
+init_cshake_256 :: proc(ctx: ^Context, domain_sep: []byte) {
+	_sha3.init_cshake(transmute(^_sha3.Context)(ctx), nil, domain_sep, 256)
 }
 
 // write writes more data into the SHAKE instance.  This MUST not be called
diff --git a/core/crypto/tuplehash/tuplehash.odin b/core/crypto/tuplehash/tuplehash.odin
new file mode 100644
index 000000000..baba1ce59
--- /dev/null
+++ b/core/crypto/tuplehash/tuplehash.odin
@@ -0,0 +1,66 @@
+/*
+package tuplehash implements the TupleHash and TupleHashXOF algorithms.
+
+See:
+- https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-185.pdf
+*/
+package tuplehash
+
+import "../_sha3"
+
+// Context is a TupleHash or TupleHashXOF instance.
+Context :: distinct _sha3.Context
+
+// init_128 initializes a Context for TupleHash128 or TupleHashXOF128.
+init_128 :: proc(ctx: ^Context, domain_sep: []byte) {
+	_sha3.init_cshake(transmute(^_sha3.Context)(ctx), N_TUPLEHASH, domain_sep, 128)
+}
+
+// init_256 initializes a Context for TupleHash256 or TupleHashXOF256.
+init_256 :: proc(ctx: ^Context, domain_sep: []byte) {
+	_sha3.init_cshake(transmute(^_sha3.Context)(ctx), N_TUPLEHASH, domain_sep, 256)
+}
+
+// write_element writes a tuple element into the TupleHash or TupleHashXOF
+// instance.  This MUST not be called after any reads have been done, and
+// any attempts to do so will panic.
+write_element :: proc(ctx: ^Context, data: []byte) {
+	_, _ = _sha3.encode_string(transmute(^_sha3.Context)(ctx), data)
+}
+
+// final finalizes the Context, writes the digest to hash, and calls
+// reset on the Context.
+//
+// Iff finalize_clone is set, final will work on a copy of the Context,
+// which is useful for for calculating rolling digests.
+final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
+	_sha3.final_cshake(transmute(^_sha3.Context)(ctx), hash, finalize_clone)
+}
+
+// read reads output from the TupleHashXOF instance.  There is no practical
+// upper limit to the amount of data that can be read from TupleHashXOF.
+// After read has been called one or more times, further calls to
+// write_element will panic.
+read :: proc(ctx: ^Context, dst: []byte) {
+	ctx_ := transmute(^_sha3.Context)(ctx)
+	if !ctx.is_finalized {
+		_sha3.encode_byte_len(ctx_, 0, false) // right_encode
+		_sha3.shake_xof(ctx_)
+	}
+
+	_sha3.shake_out(ctx_, dst)
+}
+
+// clone clones the Context other into ctx.
+clone :: proc(ctx, other: ^Context) {
+	_sha3.clone(transmute(^_sha3.Context)(ctx), transmute(^_sha3.Context)(other))
+}
+
+// reset sanitizes the Context.  The Context must be re-initialized to
+// be used again.
+reset :: proc(ctx: ^Context) {
+	_sha3.reset(transmute(^_sha3.Context)(ctx))
+}
+
+@(private)
+N_TUPLEHASH := []byte{'T', 'u', 'p', 'l', 'e', 'H', 'a', 's', 'h'}
diff --git a/core/crypto/x25519/x25519.odin b/core/crypto/x25519/x25519.odin
index fc446d25c..285666a32 100644
--- a/core/crypto/x25519/x25519.odin
+++ b/core/crypto/x25519/x25519.odin
@@ -1,9 +1,18 @@
+/*
+package x25519 implements the X25519 (aka curve25519) Elliptic-Curve
+Diffie-Hellman key exchange protocol.
+
+See:
+- https://www.rfc-editor.org/rfc/rfc7748
+*/
 package x25519
 
 import field "core:crypto/_fiat/field_curve25519"
 import "core:mem"
 
+// SCALAR_SIZE is the size of a X25519 scalar (private key) in bytes.
 SCALAR_SIZE :: 32
+// POINT_SIZE is the size of a X25519 point (public key/shared secret) in bytes.
 POINT_SIZE :: 32
 
 @(private)
@@ -14,11 +23,11 @@ _scalar_bit :: #force_inline proc "contextless" (s: ^[32]byte, i: int) -> u8 {
 	if i < 0 {
 		return 0
 	}
-	return (s[i>>3] >> uint(i&7)) & 1
+	return (s[i >> 3] >> uint(i & 7)) & 1
 }
 
 @(private)
-_scalarmult :: proc (out, scalar, point: ^[32]byte) {
+_scalarmult :: proc(out, scalar, point: ^[32]byte) {
 	// Montgomery pseduo-multiplication taken from Monocypher.
 
 	// computes the scalar product
@@ -26,7 +35,7 @@ _scalarmult :: proc (out, scalar, point: ^[32]byte) {
 	field.fe_from_bytes(&x1, point)
 
 	// computes the actual scalar product (the result is in x2 and z2)
-	x2, x3, z2, z3: field.Tight_Field_Element =  ---, ---, ---, ---
+	x2, x3, z2, z3: field.Tight_Field_Element = ---, ---, ---, ---
 	t0, t1: field.Loose_Field_Element = ---, ---
 
 	// Montgomery ladder
@@ -38,7 +47,7 @@ _scalarmult :: proc (out, scalar, point: ^[32]byte) {
 	field.fe_one(&z3)
 
 	swap: int
-	for pos := 255-1; pos >= 0; pos = pos - 1 	{
+	for pos := 255 - 1; pos >= 0; pos = pos - 1 {
 		// constant time conditional swap before ladder step
 		b := int(_scalar_bit(scalar, pos))
 		swap ~= b // xor trick avoids swapping at the end of the loop
@@ -94,7 +103,9 @@ _scalarmult :: proc (out, scalar, point: ^[32]byte) {
 	mem.zero_explicit(&t1, size_of(t1))
 }
 
-scalarmult :: proc (dst, scalar, point: []byte) {
+// scalarmult "multiplies" the provided scalar and point, and writes the
+// resulting point to dst.
+scalarmult :: proc(dst, scalar, point: []byte) {
 	if len(scalar) != SCALAR_SIZE {
 		panic("crypto/x25519: invalid scalar size")
 	}
@@ -123,7 +134,9 @@ scalarmult :: proc (dst, scalar, point: []byte) {
 	mem.zero_explicit(&d, size_of(d))
 }
 
-scalarmult_basepoint :: proc (dst, scalar: []byte) {
+// scalarmult_basepoint "multiplies" the provided scalar with the X25519
+// base point and writes the resulting point to dst.
+scalarmult_basepoint :: proc(dst, scalar: []byte) {
 	// TODO/perf: Switch to using a precomputed table.
 	scalarmult(dst, scalar, _BASE_POINT[:])
 }
author	gingerBill <gingerBill@users.noreply.github.com>	2024-03-06 14:49:15 +0000
committer	GitHub <noreply@github.com>	2024-03-06 14:49:15 +0000
commit	703eab2f15b959797dfa1d81c4e9bc37ec00ff80 (patch)
tree	2086dfbe6da4f4f502a87c63f81b91ec1ff905a7 /core/crypto
parent	04bfc926eedbdee2276748365056e8fa44be8184 (diff)
parent	c044e295ce4a36eea5a58b21a16d3c2a8a792d26 (diff)