diff options
| author | avanspector <94762082+avanspector@users.noreply.github.com> | 2025-01-06 16:42:29 +0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-01-06 16:42:29 +0400 |
| commit | be7799459be05af307a79d80bd4ac9f61eedac7c (patch) | |
| tree | 4c272a6d20028768f731367fca3325eeceb3567d /core/encoding/base32 | |
| parent | a20d85df1eaf7a24c407256786b714044a87e5ff (diff) | |
| parent | 98efb03934b464a1b23759b5695a12ff37588357 (diff) | |
Merge branch 'odin-lang:master' into master
Diffstat (limited to 'core/encoding/base32')
| -rw-r--r-- | core/encoding/base32/base32.odin | 378 | ||||
| -rw-r--r-- | core/encoding/base32/base32_test.odin | 227 |
2 files changed, 457 insertions, 148 deletions
diff --git a/core/encoding/base32/base32.odin b/core/encoding/base32/base32.odin index f3320428d..8629491b1 100644 --- a/core/encoding/base32/base32.odin +++ b/core/encoding/base32/base32.odin @@ -1,148 +1,230 @@ -package encoding_base32
-
-// @note(zh): Encoding utility for Base32
-// A secondary param can be used to supply a custom alphabet to
-// @link(encode) and a matching decoding table to @link(decode).
-// If none is supplied it just uses the standard Base32 alphabet.
-// Incase your specific version does not use padding, you may
-// truncate it from the encoded output.
-
-ENC_TABLE := [32]byte {
- 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
- 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
- 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
- 'Y', 'Z', '2', '3', '4', '5', '6', '7',
-}
-
-PADDING :: '='
-
-DEC_TABLE := [?]u8 {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 26, 27, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0,
- 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-}
-
-encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> string {
- out_length := (len(data) + 4) / 5 * 8
- out := make([]byte, out_length)
- _encode(out, data)
- return string(out)
-}
-
-@private
-_encode :: proc(out, data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) {
- out := out
- data := data
-
- for len(data) > 0 {
- carry: byte
- switch len(data) {
- case:
- out[7] = ENC_TABLE[data[4] & 0x1f]
- carry = data[4] >> 5
- fallthrough
- case 4:
- out[6] = ENC_TABLE[carry | (data[3] << 3) & 0x1f]
- out[5] = ENC_TABLE[(data[3] >> 2) & 0x1f]
- carry = data[3] >> 7
- fallthrough
- case 3:
- out[4] = ENC_TABLE[carry | (data[2] << 1) & 0x1f]
- carry = (data[2] >> 4) & 0x1f
- fallthrough
- case 2:
- out[3] = ENC_TABLE[carry | (data[1] << 4) & 0x1f]
- out[2] = ENC_TABLE[(data[1] >> 1) & 0x1f]
- carry = (data[1] >> 6) & 0x1f
- fallthrough
- case 1:
- out[1] = ENC_TABLE[carry | (data[0] << 2) & 0x1f]
- out[0] = ENC_TABLE[data[0] >> 3]
- }
-
- if len(data) < 5 {
- out[7] = byte(PADDING)
- if len(data) < 4 {
- out[6] = byte(PADDING)
- out[5] = byte(PADDING)
- if len(data) < 3 {
- out[4] = byte(PADDING)
- if len(data) < 2 {
- out[3] = byte(PADDING)
- out[2] = byte(PADDING)
- }
- }
- }
- break
- }
- data = data[5:]
- out = out[8:]
- }
-}
-
-decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocator) -> []byte #no_bounds_check{
- if len(data) == 0 {
- return nil
- }
-
- outi := 0
- data := data
-
- out := make([]byte, len(data) / 8 * 5, allocator)
- end := false
- for len(data) > 0 && !end {
- dbuf : [8]byte
- dlen := 8
-
- for j := 0; j < 8; {
- if len(data) == 0 {
- dlen, end = j, true
- break
- }
- input := data[0]
- data = data[1:]
- if input == byte(PADDING) && j >= 2 && len(data) < 8 {
- assert(!(len(data) + j < 8 - 1), "Corrupted input")
- for k := 0; k < 8-1-j; k +=1 {
- assert(len(data) < k || data[k] == byte(PADDING), "Corrupted input")
- }
- dlen, end = j, true
- assert(dlen != 1 && dlen != 3 && dlen != 6, "Corrupted input")
- break
- }
- dbuf[j] = DEC_TABLE[input]
- assert(dbuf[j] != 0xff, "Corrupted input")
- j += 1
- }
-
- switch dlen {
- case 8:
- out[outi + 4] = dbuf[6] << 5 | dbuf[7]
- fallthrough
- case 7:
- out[outi + 3] = dbuf[4] << 7 | dbuf[5] << 2 | dbuf[6] >> 3
- fallthrough
- case 5:
- out[outi + 2] = dbuf[3] << 4 | dbuf[4] >> 1
- fallthrough
- case 4:
- out[outi + 1] = dbuf[1] << 6 | dbuf[2] << 1 | dbuf[3] >> 4
- fallthrough
- case 2:
- out[outi + 0] = dbuf[0] << 3 | dbuf[1] >> 2
- }
- outi += 5
- }
- return out
-}
+// Base32 encoding/decoding implementation as specified in RFC 4648. +// [[ More; https://www.rfc-editor.org/rfc/rfc4648.html ]] +package encoding_base32 + +// @note(zh): Encoding utility for Base32 +// A secondary param can be used to supply a custom alphabet to +// @link(encode) and a matching decoding table to @link(decode). +// If none is supplied it just uses the standard Base32 alphabet. +// In case your specific version does not use padding, you may +// truncate it from the encoded output. + +// Error represents errors that can occur during base32 decoding operations. +// As per RFC 4648: +// - Section 3.3: Invalid character handling +// - Section 3.2: Padding requirements +// - Section 6: Base32 encoding specifics (including block size requirements) +Error :: enum { + None, + Invalid_Character, // Input contains characters outside the specified alphabet + Invalid_Length, // Input length is not valid for base32 (must be a multiple of 8 with proper padding) + Malformed_Input, // Input has improper structure (wrong padding position or incomplete groups) +} + +Validate_Proc :: #type proc(c: byte) -> bool + +@private +_validate_default :: proc(c: byte) -> bool { + return (c >= 'A' && c <= 'Z') || (c >= '2' && c <= '7') +} + +@(rodata) +ENC_TABLE := [32]byte { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', + 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', + 'Y', 'Z', '2', '3', '4', '5', '6', '7', +} + +PADDING :: '=' + +@(rodata) +DEC_TABLE := [256]u8 { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 26, 27, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +} + +encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> string { + out_length := (len(data) + 4) / 5 * 8 + out := make([]byte, out_length, allocator) + _encode(out, data, ENC_TBL) + return string(out[:]) +} + +@private +_encode :: proc(out, data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) { + out := out + data := data + + for len(data) > 0 { + carry: byte + switch len(data) { + case: + out[7] = ENC_TBL[data[4] & 0x1f] + carry = data[4] >> 5 + fallthrough + case 4: + out[6] = ENC_TBL[carry | (data[3] << 3) & 0x1f] + out[5] = ENC_TBL[(data[3] >> 2) & 0x1f] + carry = data[3] >> 7 + fallthrough + case 3: + out[4] = ENC_TBL[carry | (data[2] << 1) & 0x1f] + carry = (data[2] >> 4) & 0x1f + fallthrough + case 2: + out[3] = ENC_TBL[carry | (data[1] << 4) & 0x1f] + out[2] = ENC_TBL[(data[1] >> 1) & 0x1f] + carry = (data[1] >> 6) & 0x1f + fallthrough + case 1: + out[1] = ENC_TBL[carry | (data[0] << 2) & 0x1f] + out[0] = ENC_TBL[data[0] >> 3] + } + + if len(data) < 5 { + out[7] = byte(PADDING) + if len(data) < 4 { + out[6] = byte(PADDING) + out[5] = byte(PADDING) + if len(data) < 3 { + out[4] = byte(PADDING) + if len(data) < 2 { + out[3] = byte(PADDING) + out[2] = byte(PADDING) + } + } + } + break + } + data = data[5:] + out = out[8:] + } +} + +@(optimization_mode="favor_size") +decode :: proc( + data: string, + DEC_TBL := DEC_TABLE, + validate: Validate_Proc = _validate_default, + allocator := context.allocator) -> (out: []byte, err: Error) { + if len(data) == 0 { + return nil, .None + } + + // Check minimum length requirement first + if len(data) < 2 { + return nil, .Invalid_Length + } + + // Validate characters using provided validation function + for i := 0; i < len(data); i += 1 { + c := data[i] + if c == byte(PADDING) { + break + } + if !validate(c) { + return nil, .Invalid_Character + } + } + + // Validate padding and length + data_len := len(data) + padding_count := 0 + for i := data_len - 1; i >= 0; i -= 1 { + if data[i] != byte(PADDING) { + break + } + padding_count += 1 + } + + // Check for proper padding and length combinations + if padding_count > 0 { + // Verify no padding in the middle + for i := 0; i < data_len - padding_count; i += 1 { + if data[i] == byte(PADDING) { + return nil, .Malformed_Input + } + } + + content_len := data_len - padding_count + mod8 := content_len % 8 + required_padding: int + switch mod8 { + case 2: required_padding = 6 // 2 chars need 6 padding chars + case 4: required_padding = 4 // 4 chars need 4 padding chars + case 5: required_padding = 3 // 5 chars need 3 padding chars + case 7: required_padding = 1 // 7 chars need 1 padding char + case: required_padding = 0 + } + + if required_padding > 0 { + if padding_count != required_padding { + return nil, .Malformed_Input + } + } else if mod8 != 0 { + return nil, .Malformed_Input + } + } else { + // No padding - must be multiple of 8 + if data_len % 8 != 0 { + return nil, .Malformed_Input + } + } + + // Calculate decoded length: 5 bytes for every 8 input chars + input_chars := data_len - padding_count + out_len := input_chars * 5 / 8 + out = make([]byte, out_len, allocator) + defer if err != .None { + delete(out) + } + + // Process input in 8-byte blocks + outi := 0 + for i := 0; i < input_chars; i += 8 { + buf: [8]byte + block_size := min(8, input_chars - i) + + // Decode block + for j := 0; j < block_size; j += 1 { + buf[j] = DEC_TBL[data[i + j]] + } + + // Convert to output bytes based on block size + bytes_to_write := block_size * 5 / 8 + switch block_size { + case 8: + out[outi + 4] = (buf[6] << 5) | buf[7] + fallthrough + case 7: + out[outi + 3] = (buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3) + fallthrough + case 5: + out[outi + 2] = (buf[3] << 4) | (buf[4] >> 1) + fallthrough + case 4: + out[outi + 1] = (buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4) + fallthrough + case 2: + out[outi] = (buf[0] << 3) | (buf[1] >> 2) + } + outi += bytes_to_write + } + + return +} diff --git a/core/encoding/base32/base32_test.odin b/core/encoding/base32/base32_test.odin new file mode 100644 index 000000000..ea41ae36f --- /dev/null +++ b/core/encoding/base32/base32_test.odin @@ -0,0 +1,227 @@ +package encoding_base32 + +import "core:testing" +import "core:bytes" + +@(test) +test_base32_decode_valid :: proc(t: ^testing.T) { + // RFC 4648 Section 10 - Test vectors + cases := [?]struct { + input, expected: string, + }{ + {"", ""}, + {"MY======", "f"}, + {"MZXQ====", "fo"}, + {"MZXW6===", "foo"}, + {"MZXW6YQ=", "foob"}, + {"MZXW6YTB", "fooba"}, + {"MZXW6YTBOI======", "foobar"}, + } + + for c in cases { + output, err := decode(c.input) + if output != nil { + defer delete(output) + } + testing.expect_value(t, err, Error.None) + expected := transmute([]u8)c.expected + if output != nil { + testing.expect(t, bytes.equal(output, expected)) + } else { + testing.expect(t, len(c.expected) == 0) + } + } +} + +@(test) +test_base32_encode :: proc(t: ^testing.T) { + // RFC 4648 Section 10 - Test vectors + cases := [?]struct { + input, expected: string, + }{ + {"", ""}, + {"f", "MY======"}, + {"fo", "MZXQ===="}, + {"foo", "MZXW6==="}, + {"foob", "MZXW6YQ="}, + {"fooba", "MZXW6YTB"}, + {"foobar", "MZXW6YTBOI======"}, + } + + for c in cases { + output := encode(transmute([]byte)c.input) + defer delete(output) + testing.expect(t, output == c.expected) + } +} + +@(test) +test_base32_decode_invalid :: proc(t: ^testing.T) { + // Section 3.3 - Non-alphabet characters + { + // Characters outside alphabet + input := "MZ1W6YTB" // '1' not in alphabet (A-Z, 2-7) + output, err := decode(input) + if output != nil { + defer delete(output) + } + testing.expect_value(t, err, Error.Invalid_Character) + } + { + // Lowercase not allowed + input := "mzxq====" + output, err := decode(input) + if output != nil { + defer delete(output) + } + testing.expect_value(t, err, Error.Invalid_Character) + } + + // Section 3.2 - Padding requirements + { + // Padding must only be at end + input := "MZ=Q====" + output, err := decode(input) + if output != nil { + defer delete(output) + } + testing.expect_value(t, err, Error.Malformed_Input) + } + { + // Missing padding + input := "MZXQ" // Should be MZXQ==== + output, err := decode(input) + if output != nil { + defer delete(output) + } + testing.expect_value(t, err, Error.Malformed_Input) + } + { + // Incorrect padding length + input := "MZXQ=" // Needs 4 padding chars + output, err := decode(input) + if output != nil { + defer delete(output) + } + testing.expect_value(t, err, Error.Malformed_Input) + } + { + // Too much padding + input := "MY=========" // Extra padding chars + output, err := decode(input) + if output != nil { + defer delete(output) + } + testing.expect_value(t, err, Error.Malformed_Input) + } + + // Section 6 - Base32 block size requirements + { + // Single character (invalid block) + input := "M" + output, err := decode(input) + if output != nil { + defer delete(output) + } + testing.expect_value(t, err, Error.Invalid_Length) + } +} + +@(test) +test_base32_roundtrip :: proc(t: ^testing.T) { + cases := [?]string{ + "", + "f", + "fo", + "foo", + "foob", + "fooba", + "foobar", + } + + for input in cases { + encoded := encode(transmute([]byte)input) + defer delete(encoded) + decoded, err := decode(encoded) + if decoded != nil { + defer delete(decoded) + } + testing.expect_value(t, err, Error.None) + testing.expect(t, bytes.equal(decoded, transmute([]byte)input)) + } +} + +@(test) +test_base32_custom_alphabet :: proc(t: ^testing.T) { + custom_enc_table := [32]byte{ + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', + 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', + 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', + } + + custom_dec_table: [256]u8 + for i := 0; i < len(custom_enc_table); i += 1 { + custom_dec_table[custom_enc_table[i]] = u8(i) + } + + /* + custom_dec_table := [256]u8{ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00-0x0f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10-0x1f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20-0x2f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // 0x30-0x3f ('0'-'9') + 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 0x40-0x4f ('A'-'O') + 25, 26, 27, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50-0x5f ('P'-'V') + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60-0x6f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70-0x7f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80-0x8f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90-0x9f + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xa0-0xaf + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xb0-0xbf + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xc0-0xcf + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xd0-0xdf + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xe0-0xef + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0-0xff + } + */ + + custom_validate :: proc(c: byte) -> bool { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'V') || c == byte(PADDING) + } + + cases := [?]struct { + input: string, + enc_expected: string, + }{ + {"f", "CO======"}, + {"fo", "CPNG===="}, + {"foo", "CPNMU==="}, + } + + for c in cases { + // Test encoding + encoded := encode(transmute([]byte)c.input, custom_enc_table) + defer delete(encoded) + testing.expect(t, encoded == c.enc_expected) + + // Test decoding + decoded, err := decode(encoded, custom_dec_table, custom_validate) + defer if decoded != nil { + delete(decoded) + } + + testing.expect_value(t, err, Error.None) + testing.expect(t, bytes.equal(decoded, transmute([]byte)c.input)) + } + + // Test invalid character detection + { + input := "WXY=====" // Contains chars not in our alphabet + output, err := decode(input, custom_dec_table, custom_validate) + if output != nil { + delete(output) + } + testing.expect_value(t, err, Error.Invalid_Character) + } +} |