From 5533a327eb0f526cbebbe71124620fcbb0bc0649 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 22 Nov 2023 16:12:37 +0100 Subject: encoding/cbor: initial package implementation --- core/encoding/base64/base64.odin | 124 +++--- core/encoding/cbor/cbor.odin | 680 +++++++++++++++++++++++++++++++ core/encoding/cbor/coding.odin | 825 +++++++++++++++++++++++++++++++++++++ core/encoding/cbor/marshal.odin | 541 +++++++++++++++++++++++++ core/encoding/cbor/tags.odin | 361 +++++++++++++++++ core/encoding/cbor/unmarshal.odin | 832 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 3316 insertions(+), 47 deletions(-) create mode 100644 core/encoding/cbor/cbor.odin create mode 100644 core/encoding/cbor/coding.odin create mode 100644 core/encoding/cbor/marshal.odin create mode 100644 core/encoding/cbor/tags.odin create mode 100644 core/encoding/cbor/unmarshal.odin (limited to 'core/encoding') diff --git a/core/encoding/base64/base64.odin b/core/encoding/base64/base64.odin index cf2ea1c12..793f22c57 100644 --- a/core/encoding/base64/base64.odin +++ b/core/encoding/base64/base64.odin @@ -1,5 +1,9 @@ package base64 +import "core:io" +import "core:mem" +import "core:strings" + // @note(zh): Encoding utility for Base64 // A secondary param can be used to supply a custom alphabet to // @link(encode) and a matching decoding table to @link(decode). @@ -39,59 +43,85 @@ DEC_TABLE := [128]int { 49, 50, 51, -1, -1, -1, -1, -1, } -encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> string #no_bounds_check { - length := len(data) - if length == 0 { - return "" - } +encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> (encoded: string, err: mem.Allocator_Error) #optional_allocator_error { + out_length := encoded_length(data) + if out_length == 0 { + return + } + + out: strings.Builder + strings.builder_init(&out, 0, out_length, allocator) or_return + + ioerr := encode_into(strings.to_stream(&out), data, ENC_TBL) + assert(ioerr == nil) + + return strings.to_string(out), nil +} + +encoded_length :: #force_inline proc(data: []byte) -> int { + length := len(data) + if length == 0 { + return 0 + } + + return ((4 * length / 3) + 3) &~ 3 +} - out_length := ((4 * length / 3) + 3) &~ 3 - out := make([]byte, out_length, allocator) +encode_into :: proc(w: io.Writer, data: []byte, ENC_TBL := ENC_TABLE) -> (err: io.Error) #no_bounds_check { + length := len(data) + if length == 0 { + return + } - c0, c1, c2, block: int + c0, c1, c2, block: int - for i, d := 0, 0; i < length; i, d = i + 3, d + 4 { - c0, c1, c2 = int(data[i]), -1, -1 + for i, d := 0, 0; i < length; i, d = i + 3, d + 4 { + c0, c1, c2 = int(data[i]), -1, -1 - if i + 1 < length { c1 = int(data[i + 1]) } - if i + 2 < length { c2 = int(data[i + 2]) } + if i + 1 < length { c1 = int(data[i + 1]) } + if i + 2 < length { c2 = int(data[i + 2]) } - block = (c0 << 16) | (max(c1, 0) << 8) | max(c2, 0) + block = (c0 << 16) | (max(c1, 0) << 8) | max(c2, 0) + + out: [4]byte + out[0] = ENC_TBL[block >> 18 & 63] + out[1] = ENC_TBL[block >> 12 & 63] + out[2] = c1 == -1 ? PADDING : ENC_TBL[block >> 6 & 63] + out[3] = c2 == -1 ? PADDING : ENC_TBL[block & 63] - out[d] = ENC_TBL[block >> 18 & 63] - out[d + 1] = ENC_TBL[block >> 12 & 63] - out[d + 2] = c1 == -1 ? PADDING : ENC_TBL[block >> 6 & 63] - out[d + 3] = c2 == -1 ? PADDING : ENC_TBL[block & 63] - } - return string(out) + #bounds_check { io.write_full(w, out[:]) or_return } + } + return } -decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocator) -> []byte #no_bounds_check { - length := len(data) - if length == 0 { - return nil - } - - pad_count := data[length - 1] == PADDING ? (data[length - 2] == PADDING ? 2 : 1) : 0 - out_length := ((length * 6) >> 3) - pad_count - out := make([]byte, out_length, allocator) - - c0, c1, c2, c3: int - b0, b1, b2: int - - for i, j := 0, 0; i < length; i, j = i + 4, j + 3 { - c0 = DEC_TBL[data[i]] - c1 = DEC_TBL[data[i + 1]] - c2 = DEC_TBL[data[i + 2]] - c3 = DEC_TBL[data[i + 3]] - - b0 = (c0 << 2) | (c1 >> 4) - b1 = (c1 << 4) | (c2 >> 2) - b2 = (c2 << 6) | c3 - - out[j] = byte(b0) - out[j + 1] = byte(b1) - out[j + 2] = byte(b2) - } - return out +decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocator) -> (out: []byte, err: mem.Allocator_Error) #optional_allocator_error { + #no_bounds_check { + length := len(data) + if length == 0 { + return + } + + pad_count := data[length - 1] == PADDING ? (data[length - 2] == PADDING ? 2 : 1) : 0 + out_length := ((length * 6) >> 3) - pad_count + out = make([]byte, out_length, allocator) or_return + + c0, c1, c2, c3: int + b0, b1, b2: int + + for i, j := 0, 0; i < length; i, j = i + 4, j + 3 { + c0 = DEC_TBL[data[i]] + c1 = DEC_TBL[data[i + 1]] + c2 = DEC_TBL[data[i + 2]] + c3 = DEC_TBL[data[i + 3]] + + b0 = (c0 << 2) | (c1 >> 4) + b1 = (c1 << 4) | (c2 >> 2) + b2 = (c2 << 6) | c3 + + out[j] = byte(b0) + out[j + 1] = byte(b1) + out[j + 2] = byte(b2) + } + return + } } diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin new file mode 100644 index 000000000..e91c53f3c --- /dev/null +++ b/core/encoding/cbor/cbor.odin @@ -0,0 +1,680 @@ +package cbor + +import "core:encoding/json" +import "core:intrinsics" +import "core:io" +import "core:mem" +import "core:runtime" +import "core:strconv" +import "core:strings" + +// If we are decoding a stream of either a map or list, the initial capacity will be this value. +INITIAL_STREAMED_CONTAINER_CAPACITY :: 8 +// If we are decoding a stream of either text or bytes, the initial capacity will be this value. +INITIAL_STREAMED_BYTES_CAPACITY :: 16 + +// Known/common headers are defined, undefined headers can still be valid. +// Higher 3 bits is for the major type and lower 5 bits for the additional information. +Header :: enum u8 { + U8 = (u8(Major.Unsigned) << 5) | u8(Add.One_Byte), + U16 = (u8(Major.Unsigned) << 5) | u8(Add.Two_Bytes), + U32 = (u8(Major.Unsigned) << 5) | u8(Add.Four_Bytes), + U64 = (u8(Major.Unsigned) << 5) | u8(Add.Eight_Bytes), + + Neg_U8 = (u8(Major.Negative) << 5) | u8(Add.One_Byte), + Neg_U16 = (u8(Major.Negative) << 5) | u8(Add.Two_Bytes), + Neg_U32 = (u8(Major.Negative) << 5) | u8(Add.Four_Bytes), + Neg_U64 = (u8(Major.Negative) << 5) | u8(Add.Eight_Bytes), + + False = (u8(Major.Other) << 5) | u8(Add.False), + True = (u8(Major.Other) << 5) | u8(Add.True), + + Nil = (u8(Major.Other) << 5) | u8(Add.Nil), + Undefined = (u8(Major.Other) << 5) | u8(Add.Undefined), + + Simple = (u8(Major.Other) << 5) | u8(Add.One_Byte), + + F16 = (u8(Major.Other) << 5) | u8(Add.Two_Bytes), + F32 = (u8(Major.Other) << 5) | u8(Add.Four_Bytes), + F64 = (u8(Major.Other) << 5) | u8(Add.Eight_Bytes), + + Break = (u8(Major.Other) << 5) | u8(Add.Break), +} + +// The higher 3 bits of the header which denotes what type of value it is. +Major :: enum u8 { + Unsigned, + Negative, + Bytes, + Text, + Array, + Map, + Tag, + Other, +} + +// The lower 3 bits of the header which denotes additional information for the type of value. +Add :: enum u8 { + False = 20, + True = 21, + Nil = 22, + Undefined = 23, + + One_Byte = 24, + Two_Bytes = 25, + Four_Bytes = 26, + Eight_Bytes = 27, + + Length_Unknown = 31, + Break = Length_Unknown, +} + +Value :: union { + u8, + u16, + u32, + u64, + + Negative_U8, + Negative_U16, + Negative_U32, + Negative_U64, + + // Pointers so the size of the Value union stays small. + ^Bytes, + ^Text, + ^Array, + ^Map, + ^Tag, + + Simple, + f16, + f32, + f64, + bool, + Undefined, + Nil, +} + +Bytes :: []byte +Text :: string + +Array :: []Value + +Map :: []Map_Entry +Map_Entry :: struct { + key: Value, // Can be any unsigned, negative, float, Simple, bool, Text. + value: Value, +} + +Tag :: struct { + number: Tag_Number, + value: Value, // Value based on the number. +} + +Tag_Number :: u64 + +Nil :: distinct rawptr +Undefined :: distinct rawptr + +// A distinct atom-like number, range from `0..=19` and `32..=max(u8)`. +Simple :: distinct u8 +Atom :: Simple + +Unmarshal_Error :: union #shared_nil { + io.Error, + mem.Allocator_Error, + Decode_Data_Error, + Unmarshal_Data_Error, + Maybe(Unsupported_Type_Error), +} + +Marshal_Error :: union #shared_nil { + io.Error, + mem.Allocator_Error, + Encode_Data_Error, + Marshal_Data_Error, + Maybe(Unsupported_Type_Error), +} + +Decode_Error :: union #shared_nil { + io.Error, + mem.Allocator_Error, + Decode_Data_Error, +} + +Encode_Error :: union #shared_nil { + io.Error, + mem.Allocator_Error, + Encode_Data_Error, +} + +Decode_Data_Error :: enum { + None, + Bad_Major, // An invalid major type was encountered. + Bad_Argument, // A general unexpected value (most likely invalid additional info in header). + Bad_Tag_Value, // When the type of value for the given tag is not valid. + Nested_Indefinite_Length, // When an streamed/indefinite length container nests another, this is not allowed. + Nested_Tag, // When a tag's value is another tag, this is not allowed. + Length_Too_Big, // When the length of a container (map, array, bytes, string) is more than `max(int)`. + Break, +} + +Encode_Data_Error :: enum { + None, + Invalid_Simple, // When a simple is being encoded that is out of the range `0..=19` and `32..=max(u8)`. + Int_Too_Big, // When an int is being encoded that is larger than `max(u64)` or smaller than `min(u64)`. + Bad_Tag_Value, // When the type of value is not supported by the tag implementation. +} + +Unmarshal_Data_Error :: enum { + None, + Invalid_Parameter, // When the given `any` can not be unmarshalled into. + Non_Pointer_Parameter, // When the given `any` is not a pointer. +} + +Marshal_Data_Error :: enum { + None, + Invalid_CBOR_Tag, // When the struct tag `cbor_tag:""` is not a registered name or number. +} + +// Error that is returned when a type couldn't be marshalled into or out of, as much information +// as possible/available is added. +Unsupported_Type_Error :: struct { + id: typeid, + hdr: Header, + add: Add, +} + +_unsupported :: proc(v: any, hdr: Header, add: Add = nil) -> Maybe(Unsupported_Type_Error) { + return Unsupported_Type_Error{ + id = v.id, + hdr = hdr, + add = add, + } +} + +// Actual value is `-1 - x` (be careful of overflows). + +Negative_U8 :: distinct u8 +Negative_U16 :: distinct u16 +Negative_U32 :: distinct u32 +Negative_U64 :: distinct u64 + +// Turns the CBOR negative unsigned int type into a signed integer type. +negative_to_int :: proc { + negative_u8_to_int, + negative_u16_to_int, + negative_u32_to_int, + negative_u64_to_int, +} + +negative_u8_to_int :: #force_inline proc(u: Negative_U8) -> i16 { + return -1 - i16(u) +} + +negative_u16_to_int :: #force_inline proc(u: Negative_U16) -> i32 { + return -1 - i32(u) +} + +negative_u32_to_int :: #force_inline proc(u: Negative_U32) -> i64 { + return -1 - i64(u) +} + +negative_u64_to_int :: #force_inline proc(u: Negative_U64) -> i128 { + return -1 - i128(u) +} + +// Utility for converting between the different errors when they are subsets of the other. +err_conv :: proc { + encode_to_marshal_err, + decode_to_unmarshal_err, + decode_to_unmarshal_err_p, + decode_to_unmarshal_err_p2, +} + +encode_to_marshal_err :: #force_inline proc(err: Encode_Error) -> Marshal_Error { + switch e in err { + case nil: return nil + case io.Error: return e + case mem.Allocator_Error: return e + case Encode_Data_Error: return e + case: return nil + } +} + +decode_to_unmarshal_err :: #force_inline proc(err: Decode_Error) -> Unmarshal_Error { + switch e in err { + case nil: return nil + case io.Error: return e + case mem.Allocator_Error: return e + case Decode_Data_Error: return e + case: return nil + } +} + +decode_to_unmarshal_err_p :: #force_inline proc(v: $T, err: Decode_Error) -> (T, Unmarshal_Error) { + return v, err_conv(err) +} + +decode_to_unmarshal_err_p2 :: #force_inline proc(v: $T, v2: $T2, err: Decode_Error) -> (T, T2, Unmarshal_Error) { + return v, v2, err_conv(err) +} + +// Recursively frees all memory allocated when decoding the passed value. +destroy :: proc(val: Value, allocator := context.allocator) { + context.allocator = allocator + #partial switch v in val { + case ^Map: + if v == nil { return } + for entry in v { + destroy(entry.key) + destroy(entry.value) + } + delete(v^) + free(v) + case ^Array: + if v == nil { return } + for entry in v { + destroy(entry) + } + delete(v^) + free(v) + case ^Text: + if v == nil { return } + delete(v^) + free(v) + case ^Bytes: + if v == nil { return } + delete(v^) + free(v) + case ^Tag: + if v == nil { return } + destroy(v.value) + free(v) + } +} + +/* +diagnose either writes or returns a human-readable representation of the value, +optionally formatted, defined as the diagnostic format in section 8 of RFC 8949. + +Incidentally, if the CBOR does not contain any of the additional types defined on top of JSON +this will also be valid JSON. +*/ +diagnose :: proc { + diagnostic_string, + diagnose_to_writer, +} + +// Turns the given CBOR value into a human-readable string. +// See docs on the proc group `diagnose` for more info. +diagnostic_string :: proc(val: Value, padding := 0, allocator := context.allocator) -> (string, mem.Allocator_Error) #optional_allocator_error { + b := strings.builder_make(allocator) + w := strings.to_stream(&b) + err := diagnose_to_writer(w, val, padding) + if err == .EOF { + // The string builder stream only returns .EOF, and only if it can't write (out of memory). + return "", .Out_Of_Memory + } + assert(err == nil) + + return strings.to_string(b), nil +} + +// Writes the given CBOR value into the writer as human-readable text. +// See docs on the proc group `diagnose` for more info. +diagnose_to_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { + @(require_results) + indent :: proc(padding: int) -> int { + padding := padding + if padding != -1 { + padding += 1 + } + return padding + } + + @(require_results) + dedent :: proc(padding: int) -> int { + padding := padding + if padding != -1 { + padding -= 1 + } + return padding + } + + comma :: proc(w: io.Writer, padding: int) -> io.Error { + _ = io.write_string(w, ", " if padding == -1 else ",") or_return + return nil + } + + newline :: proc(w: io.Writer, padding: int) -> io.Error { + if padding != -1 { + io.write_string(w, "\n") or_return + for _ in 0.. (Value, mem.Allocator_Error) #optional_allocator_error { + internal :: proc(val: json.Value) -> (ret: Value, err: mem.Allocator_Error) { + switch v in val { + case json.Null: return Nil{}, nil + case json.Integer: + i, major := _int_to_uint(v) + #partial switch major { + case .Unsigned: return i, nil + case .Negative: return Negative_U64(i), nil + case: unreachable() + } + case json.Float: return v, nil + case json.Boolean: return v, nil + case json.String: + container := new(Text) or_return + + // We need the string to have a nil byte at the end so we clone to cstring. + container^ = string(strings.clone_to_cstring(v) or_return) + return container, nil + case json.Array: + arr := new(Array) or_return + arr^ = make([]Value, len(v)) or_return + for _, i in arr { + arr[i] = internal(v[i]) or_return + } + return arr, nil + case json.Object: + m := new(Map) or_return + dm := make([dynamic]Map_Entry, 0, len(v)) or_return + for mkey, mval in v { + append(&dm, Map_Entry{from_json(mkey) or_return, from_json(mval) or_return}) + } + m^ = dm[:] + return m, nil + } + return nil, nil + } + + context.allocator = allocator + return internal(val) +} + +/* +Converts from CBOR to JSON. + +NOTE: overflow on integers or floats is not handled. + +Everything is copied to the given allocator, the passed in CBOR value can be `destroy`'ed after. + +If a CBOR map with non-string keys is encountered it is turned into an array of tuples. +*/ +to_json :: proc(val: Value, allocator := context.allocator) -> (json.Value, mem.Allocator_Error) #optional_allocator_error { + internal :: proc(val: Value) -> (ret: json.Value, err: mem.Allocator_Error) { + switch v in val { + case Simple: return json.Integer(v), nil + + case u8: return json.Integer(v), nil + case u16: return json.Integer(v), nil + case u32: return json.Integer(v), nil + case u64: return json.Integer(v), nil + + case Negative_U8: return json.Integer(negative_to_int(v)), nil + case Negative_U16: return json.Integer(negative_to_int(v)), nil + case Negative_U32: return json.Integer(negative_to_int(v)), nil + case Negative_U64: return json.Integer(negative_to_int(v)), nil + + case f16: return json.Float(v), nil + case f32: return json.Float(v), nil + case f64: return json.Float(v), nil + + case bool: return json.Boolean(v), nil + + case Undefined: return json.Null{}, nil + case Nil: return json.Null{}, nil + + case ^Bytes: return json.String(strings.clone(string(v^)) or_return), nil + case ^Text: return json.String(strings.clone(v^) or_return), nil + + case ^Map: + keys_all_strings :: proc(m: ^Map) -> bool { + for entry in m { + #partial switch kv in entry.key { + case ^Bytes: + case ^Text: + case: return false + } + } + return false + } + + if keys_all_strings(v) { + obj := make(json.Object, len(v)) or_return + for entry in v { + k: string + #partial switch kv in entry.key { + case ^Bytes: k = string(kv^) + case ^Text: k = kv^ + case: unreachable() + } + + v := internal(entry.value) or_return + obj[k] = v + } + return obj, nil + } else { + // Resort to an array of tuples if keys aren't all strings. + arr := make(json.Array, 0, len(v)) or_return + for entry in v { + entry_arr := make(json.Array, 0, 2) or_return + append(&entry_arr, internal(entry.key) or_return) or_return + append(&entry_arr, internal(entry.value) or_return) or_return + append(&arr, entry_arr) or_return + } + return arr, nil + } + + case ^Array: + arr := make(json.Array, 0, len(v)) or_return + for entry in v { + append(&arr, internal(entry) or_return) or_return + } + return arr, nil + + case ^Tag: + obj := make(json.Object, 2) or_return + obj[strings.clone("number") or_return] = internal(v.number) or_return + obj[strings.clone("value") or_return] = internal(v.value) or_return + return obj, nil + + case: return json.Null{}, nil + } + } + + context.allocator = allocator + return internal(val) +} + +_int_to_uint :: proc { + _i8_to_uint, + _i16_to_uint, + _i32_to_uint, + _i64_to_uint, + _i128_to_uint, +} + +_u128_to_u64 :: #force_inline proc(v: u128) -> (u64, Encode_Data_Error) { + if v > u128(max(u64)) { + return 0, .Int_Too_Big + } + + return u64(v), nil +} + +_i8_to_uint :: #force_inline proc(v: i8) -> (u: u8, m: Major) { + if v < 0 { + return u8(abs(v)-1), .Negative + } + + return u8(v), .Unsigned +} + +_i16_to_uint :: #force_inline proc(v: i16) -> (u: u16, m: Major) { + if v < 0 { + return u16(abs(v)-1), .Negative + } + + return u16(v), .Unsigned +} + +_i32_to_uint :: #force_inline proc(v: i32) -> (u: u32, m: Major) { + if v < 0 { + return u32(abs(v)-1), .Negative + } + + return u32(v), .Unsigned +} + +_i64_to_uint :: #force_inline proc(v: i64) -> (u: u64, m: Major) { + if v < 0 { + return u64(abs(v)-1), .Negative + } + + return u64(v), .Unsigned +} + +_i128_to_uint :: proc(v: i128) -> (u: u64, m: Major, err: Encode_Data_Error) { + if v < 0 { + m = .Negative + u, err = _u128_to_u64(u128(abs(v) - 1)) + return + } + + m = .Unsigned + u, err = _u128_to_u64(u128(v)) + return +} + +@(private) +is_bit_set_different_endian_to_platform :: proc(ti: ^runtime.Type_Info) -> bool { + if ti == nil { + return false + } + t := runtime.type_info_base(ti) + #partial switch info in t.variant { + case runtime.Type_Info_Integer: + switch info.endianness { + case .Platform: return false + case .Little: return ODIN_ENDIAN != .Little + case .Big: return ODIN_ENDIAN != .Big + } + } + return false +} + diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin new file mode 100644 index 000000000..5c14d8f87 --- /dev/null +++ b/core/encoding/cbor/coding.odin @@ -0,0 +1,825 @@ +package cbor + +import "core:bytes" +import "core:encoding/endian" +import "core:intrinsics" +import "core:io" +import "core:slice" +import "core:strings" + +Encoder_Flag :: enum { + // CBOR defines a tag header that also acts as a file/binary header, + // this way decoders can check the first header of the binary and see if it is CBOR. + Self_Described_CBOR, + + // Integers are stored in the smallest integer type it fits. + // This involves checking each int against the max of all its smaller types. + Deterministic_Int_Size, + + // Floats are stored in the smallest size float type without losing precision. + // This involves casting each float down to its smaller types and checking if it changed. + Deterministic_Float_Size, + + // Sort maps by their keys in bytewise lexicographic order of their deterministic encoding. + // NOTE: In order to do this, all keys of a map have to be pre-computed, sorted, and + // then written, this involves temporary allocations for the keys and a copy of the map itself. + Deterministic_Map_Sorting, + + // Internal flag to do initialization. + _In_Progress, +} + +Encoder_Flags :: bit_set[Encoder_Flag] + +// Flags for fully deterministic output (if you are not using streaming/indeterminate length). +ENCODE_FULLY_DETERMINISTIC :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size, .Deterministic_Map_Sorting} +// Flags for the smallest encoding output. +ENCODE_SMALL :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size} +// Flags for the fastest encoding output. +ENCODE_FAST :: Encoder_Flags{} + +Encoder :: struct { + flags: Encoder_Flags, + writer: io.Writer, +} + +/* +Decodes both deterministic and non-deterministic CBOR into a `Value` variant. + +`Text` and `Bytes` can safely be cast to cstrings because of an added 0 byte. + +Allocations are done using the given allocator, +*no* allocations are done on the `context.temp_allocator`. + +A value can be (fully and recursively) deallocated using the `destroy` proc in this package. +*/ +decode :: proc { + decode_string, + decode_reader, +} + +// Decodes the given string as CBOR. +// See docs on the proc group `decode` for more information. +decode_string :: proc(s: string, allocator := context.allocator) -> (v: Value, err: Decode_Error) { + context.allocator = allocator + + r: strings.Reader + strings.reader_init(&r, s) + return decode(strings.reader_to_stream(&r), allocator=allocator) +} + +// Reads a CBOR value from the given reader. +// See docs on the proc group `decode` for more information. +decode_reader :: proc(r: io.Reader, hdr: Header = Header(0), allocator := context.allocator) -> (v: Value, err: Decode_Error) { + context.allocator = allocator + + hdr := hdr + if hdr == Header(0) { hdr = _decode_header(r) or_return } + switch hdr { + case .U8: return _decode_u8 (r) + case .U16: return _decode_u16(r) + case .U32: return _decode_u32(r) + case .U64: return _decode_u64(r) + + case .Neg_U8: return Negative_U8 (_decode_u8 (r) or_return), nil + case .Neg_U16: return Negative_U16(_decode_u16(r) or_return), nil + case .Neg_U32: return Negative_U32(_decode_u32(r) or_return), nil + case .Neg_U64: return Negative_U64(_decode_u64(r) or_return), nil + + case .Simple: return _decode_simple(r) + + case .F16: return _decode_f16(r) + case .F32: return _decode_f32(r) + case .F64: return _decode_f64(r) + + case .True: return true, nil + case .False: return false, nil + + case .Nil: return Nil{}, nil + case .Undefined: return Undefined{}, nil + + case .Break: return nil, .Break + } + + maj, add := _header_split(hdr) + switch maj { + case .Unsigned: return _decode_tiny_u8(add) + case .Negative: return Negative_U8(_decode_tiny_u8(add) or_return), nil + case .Bytes: return _decode_bytes_ptr(r, add) + case .Text: return _decode_text_ptr(r, add) + case .Array: return _decode_array_ptr(r, add) + case .Map: return _decode_map_ptr(r, add) + case .Tag: return _decode_tag_ptr(r, add) + case .Other: return _decode_tiny_simple(add) + case: return nil, .Bad_Major + } +} + +/* +Encodes the CBOR value into a binary CBOR. + +Flags can be used to control the output (mainly determinism, which coincidently affects size). + +The default flags `ENCODE_SMALL` (`.Deterministic_Int_Size`, `.Deterministic_Float_Size`) will try +to put ints and floats into their smallest possible byte size without losing equality. + +Adding the `.Self_Described_CBOR` flag will wrap the value in a tag that lets generic decoders know +the contents are CBOR from just reading the first byte. + +Adding the `.Deterministic_Map_Sorting` flag will sort the encoded maps by the byte content of the +encoded key. This flag has a cost on performance and memory efficiency because all keys in a map +have to be precomputed, sorted and only then written to the output. + +Empty flags will do nothing extra to the value. + +The allocations for the `.Deterministic_Map_Sorting` flag are done using the `context.temp_allocator` +but are followed by the necessary `delete` and `free` calls if the allocator supports them. +This is helpful when the CBOR size is so big that you don't want to collect all the temporary +allocations until the end. +*/ +encode_into :: proc { + encode_into_bytes, + encode_into_builder, + encode_into_writer, + encode_into_encoder, +} +encode :: encode_into + +// Encodes the CBOR value into binary CBOR allocated on the given allocator. +// See the docs on the proc group `encode_into` for more info. +encode_into_bytes :: proc(v: Value, flags := ENCODE_SMALL, allocator := context.allocator) -> (data: []byte, err: Encode_Error) { + b := strings.builder_make(allocator) or_return + encode_into_builder(&b, v, flags) or_return + return b.buf[:], nil +} + +// Encodes the CBOR value into binary CBOR written to the given builder. +// See the docs on the proc group `encode_into` for more info. +encode_into_builder :: proc(b: ^strings.Builder, v: Value, flags := ENCODE_SMALL) -> Encode_Error { + return encode_into_writer(strings.to_stream(b), v, flags) +} + +// Encodes the CBOR value into binary CBOR written to the given writer. +// See the docs on the proc group `encode_into` for more info. +encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Encode_Error { + return encode_into_encoder(Encoder{flags, w}, v) +} + +// Encodes the CBOR value into binary CBOR written to the given encoder. +// See the docs on the proc group `encode_into` for more info. +encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { + e := e + + outer: bool + defer if outer { + e.flags &~= {._In_Progress} + } + + if ._In_Progress not_in e.flags { + outer = true + e.flags |= {._In_Progress} + + if .Self_Described_CBOR in e.flags { + _encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag) or_return + } + } + + switch v_spec in v { + case u8: return _encode_u8(e.writer, v_spec, .Unsigned) + case u16: return _encode_u16(e, v_spec, .Unsigned) + case u32: return _encode_u32(e, v_spec, .Unsigned) + case u64: return _encode_u64(e, v_spec, .Unsigned) + case Negative_U8: return _encode_u8(e.writer, u8(v_spec), .Negative) + case Negative_U16: return _encode_u16(e, u16(v_spec), .Negative) + case Negative_U32: return _encode_u32(e, u32(v_spec), .Negative) + case Negative_U64: return _encode_u64(e, u64(v_spec), .Negative) + case ^Bytes: return _encode_bytes(e, v_spec^) + case ^Text: return _encode_text(e, v_spec^) + case ^Array: return _encode_array(e, v_spec^) + case ^Map: return _encode_map(e, v_spec^) + case ^Tag: return _encode_tag(e, v_spec^) + case Simple: return _encode_simple(e.writer, v_spec) + case f16: return _encode_f16(e.writer, v_spec) + case f32: return _encode_f32(e, v_spec) + case f64: return _encode_f64(e, v_spec) + case bool: return _encode_bool(e.writer, v_spec) + case Nil: return _encode_nil(e.writer) + case Undefined: return _encode_undefined(e.writer) + case: return nil + } +} + +_decode_header :: proc(r: io.Reader) -> (hdr: Header, err: io.Error) { + buf: [1]byte + io.read_full(r, buf[:]) or_return + return Header(buf[0]), nil +} + +_header_split :: proc(hdr: Header) -> (Major, Add) { + return Major(u8(hdr) >> 5), Add(u8(hdr) & 0x1f) +} + +_decode_u8 :: proc(r: io.Reader) -> (v: u8, err: io.Error) { + byte: [1]byte + io.read_full(r, byte[:]) or_return + return byte[0], nil +} + +_encode_uint :: proc { + _encode_u8, + _encode_u16, + _encode_u32, + _encode_u64, +} + +_encode_u8 :: proc(w: io.Writer, v: u8, major: Major = .Unsigned) -> (err: io.Error) { + header := u8(major) << 5 + if v < u8(Add.One_Byte) { + header |= v + _, err = io.write_full(w, {header}) + return + } + + header |= u8(Add.One_Byte) + _, err = io.write_full(w, {header, v}) + return +} + +_decode_tiny_u8 :: proc(additional: Add) -> (u8, Decode_Data_Error) { + if intrinsics.expect(additional < .One_Byte, true) { + return u8(additional), nil + } + + return 0, .Bad_Argument +} + +_decode_u16 :: proc(r: io.Reader) -> (v: u16, err: io.Error) { + bytes: [2]byte + io.read_full(r, bytes[:]) or_return + return endian.unchecked_get_u16be(bytes[:]), nil +} + +_encode_u16 :: proc(e: Encoder, v: u16, major: Major = .Unsigned) -> Encode_Error { + if .Deterministic_Int_Size in e.flags { + return _encode_deterministic_uint(e.writer, v, major) + } + return _encode_u16_exact(e.writer, v, major) +} + +_encode_u16_exact :: proc(w: io.Writer, v: u16, major: Major = .Unsigned) -> (err: io.Error) { + bytes: [3]byte + bytes[0] = (u8(major) << 5) | u8(Add.Two_Bytes) + endian.unchecked_put_u16be(bytes[1:], v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_u32 :: proc(r: io.Reader) -> (v: u32, err: io.Error) { + bytes: [4]byte + io.read_full(r, bytes[:]) or_return + return endian.unchecked_get_u32be(bytes[:]), nil +} + +_encode_u32 :: proc(e: Encoder, v: u32, major: Major = .Unsigned) -> Encode_Error { + if .Deterministic_Int_Size in e.flags { + return _encode_deterministic_uint(e.writer, v, major) + } + return _encode_u32_exact(e.writer, v, major) +} + +_encode_u32_exact :: proc(w: io.Writer, v: u32, major: Major = .Unsigned) -> (err: io.Error) { + bytes: [5]byte + bytes[0] = (u8(major) << 5) | u8(Add.Four_Bytes) + endian.unchecked_put_u32be(bytes[1:], v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_u64 :: proc(r: io.Reader) -> (v: u64, err: io.Error) { + bytes: [8]byte + io.read_full(r, bytes[:]) or_return + return endian.unchecked_get_u64be(bytes[:]), nil +} + +_encode_u64 :: proc(e: Encoder, v: u64, major: Major = .Unsigned) -> Encode_Error { + if .Deterministic_Int_Size in e.flags { + return _encode_deterministic_uint(e.writer, v, major) + } + return _encode_u64_exact(e.writer, v, major) +} + +_encode_u64_exact :: proc(w: io.Writer, v: u64, major: Major = .Unsigned) -> (err: io.Error) { + bytes: [9]byte + bytes[0] = (u8(major) << 5) | u8(Add.Eight_Bytes) + endian.unchecked_put_u64be(bytes[1:], v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_bytes_ptr :: proc(r: io.Reader, add: Add, type: Major = .Bytes) -> (v: ^Bytes, err: Decode_Error) { + v = new(Bytes) or_return + defer if err != nil { free(v) } + + v^ = _decode_bytes(r, add, type) or_return + return +} + +_decode_bytes :: proc(r: io.Reader, add: Add, type: Major = .Bytes) -> (v: Bytes, err: Decode_Error) { + _n_items, length_is_unknown := _decode_container_length(r, add) or_return + + n_items := _n_items.? or_else INITIAL_STREAMED_BYTES_CAPACITY + + if length_is_unknown { + buf: strings.Builder + buf.buf = make([dynamic]byte, 0, n_items) or_return + defer if err != nil { strings.builder_destroy(&buf) } + + buf_stream := strings.to_stream(&buf) + + for { + header := _decode_header(r) or_return + maj, add := _header_split(header) + + #partial switch maj { + case type: + _n_items, length_is_unknown := _decode_container_length(r, add) or_return + if length_is_unknown { + return nil, .Nested_Indefinite_Length + } + n_items := i64(_n_items.?) + + copied := io.copy_n(buf_stream, r, n_items) or_return + assert(copied == n_items) + + case .Other: + if add != .Break { return nil, .Bad_Argument } + + v = buf.buf[:] + + // Write zero byte so this can be converted to cstring. + io.write_full(buf_stream, {0}) or_return + shrink(&buf.buf) // Ignoring error, this is not critical to succeed. + return + + case: + return nil, .Bad_Major + } + } + } else { + v = make([]byte, n_items + 1) or_return // Space for the bytes and a zero byte. + defer if err != nil { delete(v) } + + io.read_full(r, v[:n_items]) or_return + + v = v[:n_items] // Take off zero byte. + return + } +} + +_encode_bytes :: proc(e: Encoder, val: Bytes, major: Major = .Bytes) -> (err: Encode_Error) { + assert(len(val) >= 0) + _encode_u64(e, u64(len(val)), major) or_return + _, err = io.write_full(e.writer, val[:]) + return +} + +_decode_text_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Text, err: Decode_Error) { + v = new(Text) or_return + defer if err != nil { free(v) } + + v^ = _decode_text(r, add) or_return + return +} + +_decode_text :: proc(r: io.Reader, add: Add) -> (v: Text, err: Decode_Error) { + return (Text)(_decode_bytes(r, add, .Text) or_return), nil +} + +_encode_text :: proc(e: Encoder, val: Text) -> Encode_Error { + return _encode_bytes(e, transmute([]byte)val, .Text) +} + +_decode_array_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Array, err: Decode_Error) { + v = new(Array) or_return + defer if err != nil { free(v) } + + v^ = _decode_array(r, add) or_return + return +} + +_decode_array :: proc(r: io.Reader, add: Add) -> (v: Array, err: Decode_Error) { + _n_items, length_is_unknown := _decode_container_length(r, add) or_return + n_items := _n_items.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + + array := make([dynamic]Value, 0, n_items) or_return + defer if err != nil { + for entry in array { destroy(entry) } + delete(array) + } + + for i := 0; length_is_unknown || i < n_items; i += 1 { + val, verr := decode(r) + if length_is_unknown && verr == .Break { + break + } else if verr != nil { + err = verr + return + } + + append(&array, val) or_return + } + + shrink(&array) + v = array[:] + return +} + +_encode_array :: proc(e: Encoder, arr: Array) -> Encode_Error { + assert(len(arr) >= 0) + _encode_u64(e, u64(len(arr)), .Array) + for val in arr { + encode(e, val) or_return + } + return nil +} + +_decode_map_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Map, err: Decode_Error) { + v = new(Map) or_return + defer if err != nil { free(v) } + + v^ = _decode_map(r, add) or_return + return +} + +_decode_map :: proc(r: io.Reader, add: Add) -> (v: Map, err: Decode_Error) { + _n_items, length_is_unknown := _decode_container_length(r, add) or_return + n_items := _n_items.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + + items := make([dynamic]Map_Entry, 0, n_items) or_return + defer if err != nil { + for entry in items { + destroy(entry.key) + destroy(entry.value) + } + delete(items) + } + + for i := 0; length_is_unknown || i < n_items; i += 1 { + key, kerr := decode(r) + if length_is_unknown && kerr == .Break { + break + } else if kerr != nil { + return nil, kerr + } + + value := decode(r) or_return + + append(&items, Map_Entry{ + key = key, + value = value, + }) or_return + } + + shrink(&items) + v = items[:] + return +} + +_encode_map :: proc(e: Encoder, m: Map) -> (err: Encode_Error) { + assert(len(m) >= 0) + _encode_u64(e, u64(len(m)), .Map) or_return + + if .Deterministic_Map_Sorting not_in e.flags { + for entry in m { + encode(e, entry.key) or_return + encode(e, entry.value) or_return + } + return + } + + // Deterministic_Map_Sorting needs us to sort the entries by the byte contents of the + // encoded key. + // + // This means we have to store and sort them before writing incurring extra (temporary) allocations. + + Map_Entry_With_Key :: struct { + encoded_key: []byte, + entry: Map_Entry, + } + + entries := make([]Map_Entry_With_Key, len(m), context.temp_allocator) or_return + defer delete(entries, context.temp_allocator) + + for &entry, i in entries { + entry.entry = m[i] + + buf := strings.builder_make(0, 8, context.temp_allocator) or_return + + ke := e + ke.writer = strings.to_stream(&buf) + + encode(ke, entry.entry.key) or_return + entry.encoded_key = buf.buf[:] + } + + // Sort lexicographic on the bytes of the key. + slice.sort_by_cmp(entries, proc(a, b: Map_Entry_With_Key) -> slice.Ordering { + return slice.Ordering(bytes.compare(a.encoded_key, b.encoded_key)) + }) + + for entry in entries { + io.write_full(e.writer, entry.encoded_key) or_return + delete(entry.encoded_key, context.temp_allocator) + + encode(e, entry.entry.value) or_return + } + + return nil +} + +_decode_tag_ptr :: proc(r: io.Reader, add: Add) -> (v: Value, err: Decode_Error) { + tag := _decode_tag(r, add) or_return + if t, ok := tag.?; ok { + defer if err != nil { destroy(t.value) } + tp := new(Tag) or_return + tp^ = t + return tp, nil + } + + // no error, no tag, this was the self described CBOR tag, skip it. + return decode(r) +} + +_decode_tag :: proc(r: io.Reader, add: Add) -> (v: Maybe(Tag), err: Decode_Error) { + num := _decode_tag_nr(r, add) or_return + + // CBOR can be wrapped in a tag that decoders can use to see/check if the binary data is CBOR. + // We can ignore it here. + if num == TAG_SELF_DESCRIBED_CBOR { + return + } + + t := Tag{ + number = num, + value = decode(r) or_return, + } + + if nested, ok := t.value.(^Tag); ok { + destroy(nested) + return nil, .Nested_Tag + } + + return t, nil +} + +_decode_tag_nr :: proc(r: io.Reader, add: Add) -> (nr: Tag_Number, err: Decode_Error) { + #partial switch add { + case .One_Byte: return u64(_decode_u8(r) or_return), nil + case .Two_Bytes: return u64(_decode_u16(r) or_return), nil + case .Four_Bytes: return u64(_decode_u32(r) or_return), nil + case .Eight_Bytes: return u64(_decode_u64(r) or_return), nil + case: return u64(_decode_tiny_u8(add) or_return), nil + } +} + +_encode_tag :: proc(e: Encoder, val: Tag) -> Encode_Error { + _encode_u64(e, val.number, .Tag) or_return + return encode(e, val.value) +} + +_decode_simple :: proc(r: io.Reader) -> (v: Simple, err: io.Error) { + buf: [1]byte + io.read_full(r, buf[:]) or_return + return Simple(buf[0]), nil +} + +_encode_simple :: proc(w: io.Writer, v: Simple) -> (err: Encode_Error) { + header := u8(Major.Other) << 5 + + if v < Simple(Add.False) { + header |= u8(v) + _, err = io.write_full(w, {header}) + return + } else if v <= Simple(Add.Break) { + return .Invalid_Simple + } + + header |= u8(Add.One_Byte) + _, err = io.write_full(w, {header, u8(v)}) + return +} + +_decode_tiny_simple :: proc(add: Add) -> (Simple, Decode_Data_Error) { + if add < Add.False { + return Simple(add), nil + } + + return 0, .Bad_Argument +} + +_decode_f16 :: proc(r: io.Reader) -> (v: f16, err: io.Error) { + bytes: [2]byte + io.read_full(r, bytes[:]) or_return + n := endian.unchecked_get_u16be(bytes[:]) + return transmute(f16)n, nil +} + +_encode_f16 :: proc(w: io.Writer, v: f16) -> (err: io.Error) { + bytes: [3]byte + bytes[0] = u8(Header.F16) + endian.unchecked_put_u16be(bytes[1:], transmute(u16)v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_f32 :: proc(r: io.Reader) -> (v: f32, err: io.Error) { + bytes: [4]byte + io.read_full(r, bytes[:]) or_return + n := endian.unchecked_get_u32be(bytes[:]) + return transmute(f32)n, nil +} + +_encode_f32 :: proc(e: Encoder, v: f32) -> io.Error { + if .Deterministic_Float_Size in e.flags { + return _encode_deterministic_float(e.writer, v) + } + return _encode_f32_exact(e.writer, v) +} + +_encode_f32_exact :: proc(w: io.Writer, v: f32) -> (err: io.Error) { + bytes: [5]byte + bytes[0] = u8(Header.F32) + endian.unchecked_put_u32be(bytes[1:], transmute(u32)v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_f64 :: proc(r: io.Reader) -> (v: f64, err: io.Error) { + bytes: [8]byte + io.read_full(r, bytes[:]) or_return + n := endian.unchecked_get_u64be(bytes[:]) + return transmute(f64)n, nil +} + +_encode_f64 :: proc(e: Encoder, v: f64) -> io.Error { + if .Deterministic_Float_Size in e.flags { + return _encode_deterministic_float(e.writer, v) + } + return _encode_f64_exact(e.writer, v) +} + +_encode_f64_exact :: proc(w: io.Writer, v: f64) -> (err: io.Error) { + bytes: [9]byte + bytes[0] = u8(Header.F64) + endian.unchecked_put_u64be(bytes[1:], transmute(u64)v) + _, err = io.write_full(w, bytes[:]) + return +} + +_encode_bool :: proc(w: io.Writer, v: bool) -> (err: io.Error) { + switch v { + case true: _, err = io.write_full(w, {u8(Header.True )}); return + case false: _, err = io.write_full(w, {u8(Header.False)}); return + case: unreachable() + } +} + +_encode_undefined :: proc(w: io.Writer) -> io.Error { + _, err := io.write_full(w, {u8(Header.Undefined)}) + return err +} + +_encode_nil :: proc(w: io.Writer) -> io.Error { + _, err := io.write_full(w, {u8(Header.Nil)}) + return err +} + +// Streaming + +encode_stream_begin :: proc(w: io.Writer, major: Major) -> (err: io.Error) { + assert(major >= Major(.Bytes) && major <= Major(.Map), "illegal stream type") + + header := (u8(major) << 5) | u8(Add.Length_Unknown) + _, err = io.write_full(w, {header}) + return +} + +encode_stream_end :: proc(w: io.Writer) -> io.Error { + header := (u8(Major.Other) << 5) | u8(Add.Break) + _, err := io.write_full(w, {header}) + return err +} + +encode_stream_bytes :: _encode_bytes +encode_stream_text :: _encode_text +encode_stream_array_item :: encode + +encode_stream_map_entry :: proc(e: Encoder, key: Value, val: Value) -> Encode_Error { + encode(e, key) or_return + return encode(e, val) +} + +// + +_decode_container_length :: proc(r: io.Reader, add: Add) -> (length: Maybe(int), is_unknown: bool, err: Decode_Error) { + if add == Add.Length_Unknown { return nil, true, nil } + #partial switch add { + case .One_Byte: length = int(_decode_u8(r) or_return) + case .Two_Bytes: length = int(_decode_u16(r) or_return) + case .Four_Bytes: + big_length := _decode_u32(r) or_return + if u64(big_length) > u64(max(int)) { + err = .Length_Too_Big + return + } + length = int(big_length) + case .Eight_Bytes: + big_length := _decode_u64(r) or_return + if big_length > u64(max(int)) { + err = .Length_Too_Big + return + } + length = int(big_length) + case: + length = int(_decode_tiny_u8(add) or_return) + } + return +} + +// Deterministic encoding is (among other things) encoding all values into their smallest +// possible representation. +// See section 4 of RFC 8949. + +_encode_deterministic_uint :: proc { + _encode_u8, + _encode_deterministic_u16, + _encode_deterministic_u32, + _encode_deterministic_u64, + _encode_deterministic_u128, +} + +_encode_deterministic_u16 :: proc(w: io.Writer, v: u16, major: Major = .Unsigned) -> Encode_Error { + switch { + case v <= u16(max(u8)): return _encode_u8(w, u8(v), major) + case: return _encode_u16_exact(w, v, major) + } +} + +_encode_deterministic_u32 :: proc(w: io.Writer, v: u32, major: Major = .Unsigned) -> Encode_Error { + switch { + case v <= u32(max(u8)): return _encode_u8(w, u8(v), major) + case v <= u32(max(u16)): return _encode_u16_exact(w, u16(v), major) + case: return _encode_u32_exact(w, u32(v), major) + } +} + +_encode_deterministic_u64 :: proc(w: io.Writer, v: u64, major: Major = .Unsigned) -> Encode_Error { + switch { + case v <= u64(max(u8)): return _encode_u8(w, u8(v), major) + case v <= u64(max(u16)): return _encode_u16_exact(w, u16(v), major) + case v <= u64(max(u32)): return _encode_u32_exact(w, u32(v), major) + case: return _encode_u64_exact(w, u64(v), major) + } +} + +_encode_deterministic_u128 :: proc(w: io.Writer, v: u128, major: Major = .Unsigned) -> Encode_Error { + switch { + case v <= u128(max(u8)): return _encode_u8(w, u8(v), major) + case v <= u128(max(u16)): return _encode_u16_exact(w, u16(v), major) + case v <= u128(max(u32)): return _encode_u32_exact(w, u32(v), major) + case v <= u128(max(u64)): return _encode_u64_exact(w, u64(v), major) + case: return .Int_Too_Big + } +} + +_encode_deterministic_negative :: #force_inline proc(w: io.Writer, v: $T) -> Encode_Error + where T == Negative_U8 || T == Negative_U16 || T == Negative_U32 || T == Negative_U64 { + return _encode_deterministic_uint(w, v, .Negative) +} + +// A Deterministic float is a float in the smallest type that stays the same after down casting. +_encode_deterministic_float :: proc { + _encode_f16, + _encode_deterministic_f32, + _encode_deterministic_f64, +} + +_encode_deterministic_f32 :: proc(w: io.Writer, v: f32) -> io.Error { + if (f32(f16(v)) == v) { + return _encode_f16(w, f16(v)) + } + + return _encode_f32_exact(w, v) +} + +_encode_deterministic_f64 :: proc(w: io.Writer, v: f64) -> io.Error { + if (f64(f16(v)) == v) { + return _encode_f16(w, f16(v)) + } + + if (f64(f32(v)) == v) { + return _encode_f32_exact(w, f32(v)) + } + + return _encode_f64_exact(w, v) +} diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin new file mode 100644 index 000000000..aab2defb2 --- /dev/null +++ b/core/encoding/cbor/marshal.odin @@ -0,0 +1,541 @@ +package cbor + +import "core:bytes" +import "core:intrinsics" +import "core:io" +import "core:mem" +import "core:reflect" +import "core:runtime" +import "core:slice" +import "core:strconv" +import "core:strings" +import "core:unicode/utf8" + +/* +Marshal a value into binary CBOR. + +Flags can be used to control the output (mainly determinism, which coincidently affects size). + +The default flags `ENCODE_SMALL` (`.Deterministic_Int_Size`, `.Deterministic_Float_Size`) will try +to put ints and floats into their smallest possible byte size without losing equality. + +Adding the `.Self_Described_CBOR` flag will wrap the value in a tag that lets generic decoders know +the contents are CBOR from just reading the first byte. + +Adding the `.Deterministic_Map_Sorting` flag will sort the encoded maps by the byte content of the +encoded key. This flag has a cost on performance and memory efficiency because all keys in a map +have to be precomputed, sorted and only then written to the output. + +Empty flags will do nothing extra to the value. + +The allocations for the `.Deterministic_Map_Sorting` flag are done using the `context.temp_allocator` +but are followed by the necessary `delete` and `free` calls if the allocator supports them. +This is helpful when the CBOR size is so big that you don't want to collect all the temporary +allocations until the end. +*/ +marshal_into :: proc { + marshal_into_bytes, + marshal_into_builder, + marshal_into_writer, + marshal_into_encoder, +} + +marshal :: marshal_into + +// Marshals the given value into a CBOR byte stream (allocated using the given allocator). +// See docs on the `marshal_into` proc group for more info. +marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.allocator) -> (bytes: []byte, err: Marshal_Error) { + b, alloc_err := strings.builder_make(allocator) + // The builder as a stream also returns .EOF if it ran out of memory so this is consistent. + if alloc_err != nil { + return nil, .EOF + } + + defer if err != nil { strings.builder_destroy(&b) } + + if err = marshal_into_builder(&b, v, flags); err != nil { + return + } + + return b.buf[:], nil +} + +// Marshals the given value into a CBOR byte stream written to the given builder. +// See docs on the `marshal_into` proc group for more info. +marshal_into_builder :: proc(b: ^strings.Builder, v: any, flags := ENCODE_SMALL) -> Marshal_Error { + return marshal_into_writer(strings.to_writer(b), v, flags) +} + +// Marshals the given value into a CBOR byte stream written to the given writer. +// See docs on the `marshal_into` proc group for more info. +marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Marshal_Error { + encoder := Encoder{flags, w} + return marshal_into_encoder(encoder, v) +} + +// Marshals the given value into a CBOR byte stream written to the given encoder. +// See docs on the `marshal_into` proc group for more info. +marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { + e := e + + init: bool + defer if init { + e.flags &~= {._In_Progress} + } + + // If not in progress we do initialization and set in progress. + if ._In_Progress not_in e.flags { + init = true + e.flags |= {._In_Progress} + + if .Self_Described_CBOR in e.flags { + err_conv(_encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag)) or_return + } + } + + if v == nil { + return _encode_nil(e.writer) + } + + // Check if type has a tag implementation to use. + if impl, ok := _tag_implementations_type[v.id]; ok { + return impl->marshal(e, v) + } + + ti := runtime.type_info_base(type_info_of(v.id)) + a := any{v.data, ti.id} + + #partial switch info in ti.variant { + case runtime.Type_Info_Named: + unreachable() + + case runtime.Type_Info_Pointer: + switch vv in v { + case Undefined: return _encode_undefined(e.writer) + case Nil: return _encode_nil(e.writer) + } + + case runtime.Type_Info_Integer: + switch vv in v { + case Simple: return err_conv(_encode_simple(e.writer, vv)) + case Negative_U8: return _encode_u8(e.writer, u8(vv), .Negative) + case Negative_U16: return err_conv(_encode_u16(e, u16(vv), .Negative)) + case Negative_U32: return err_conv(_encode_u32(e, u32(vv), .Negative)) + case Negative_U64: return err_conv(_encode_u64(e, u64(vv), .Negative)) + } + + switch i in a { + case i8: return _encode_uint(e.writer, _int_to_uint(i)) + case i16: return err_conv(_encode_uint(e, _int_to_uint(i))) + case i32: return err_conv(_encode_uint(e, _int_to_uint(i))) + case i64: return err_conv(_encode_uint(e, _int_to_uint(i))) + case i128: return err_conv(_encode_uint(e, _int_to_uint(i128(i)) or_return)) + case int: return err_conv(_encode_uint(e, _int_to_uint(i64(i)))) + + case u8: return _encode_uint(e.writer, i) + case u16: return err_conv(_encode_uint(e, i)) + case u32: return err_conv(_encode_uint(e, i)) + case u64: return err_conv(_encode_uint(e, i)) + case u128: return err_conv(_encode_uint(e, _u128_to_u64(u128(i)) or_return)) + case uint: return err_conv(_encode_uint(e, u64(i))) + case uintptr: return err_conv(_encode_uint(e, u64(i))) + + case i16le: return err_conv(_encode_uint(e, _int_to_uint(i16(i)))) + case i32le: return err_conv(_encode_uint(e, _int_to_uint(i32(i)))) + case i64le: return err_conv(_encode_uint(e, _int_to_uint(i64(i)))) + case i128le: return err_conv(_encode_uint(e, _int_to_uint(i128(i)) or_return)) + + case u16le: return err_conv(_encode_uint(e, u16(i))) + case u32le: return err_conv(_encode_uint(e, u32(i))) + case u64le: return err_conv(_encode_uint(e, u64(i))) + case u128le: return err_conv(_encode_uint(e, _u128_to_u64(u128(i)) or_return)) + + case i16be: return err_conv(_encode_uint(e, _int_to_uint(i16(i)))) + case i32be: return err_conv(_encode_uint(e, _int_to_uint(i32(i)))) + case i64be: return err_conv(_encode_uint(e, _int_to_uint(i64(i)))) + case i128be: return err_conv(_encode_uint(e, _int_to_uint(i128(i)) or_return)) + + case u16be: return err_conv(_encode_uint(e, u16(i))) + case u32be: return err_conv(_encode_uint(e, u32(i))) + case u64be: return err_conv(_encode_uint(e, u64(i))) + case u128be: return err_conv(_encode_uint(e, _u128_to_u64(u128(i)) or_return)) + } + + case runtime.Type_Info_Rune: + buf, w := utf8.encode_rune(a.(rune)) + return err_conv(_encode_text(e, string(buf[:w]))) + + case runtime.Type_Info_Float: + switch f in a { + case f16: return _encode_f16(e.writer, f) + case f32: return _encode_f32(e, f) + case f64: return _encode_f64(e, f) + + case f16le: return _encode_f16(e.writer, f16(f)) + case f32le: return _encode_f32(e, f32(f)) + case f64le: return _encode_f64(e, f64(f)) + + case f16be: return _encode_f16(e.writer, f16(f)) + case f32be: return _encode_f32(e, f32(f)) + case f64be: return _encode_f64(e, f64(f)) + } + + case runtime.Type_Info_Complex: + switch z in a { + case complex32: + arr: [2]Value = {real(z), imag(z)} + return err_conv(_encode_array(e, arr[:])) + case complex64: + arr: [2]Value = {real(z), imag(z)} + return err_conv(_encode_array(e, arr[:])) + case complex128: + arr: [2]Value = {real(z), imag(z)} + return err_conv(_encode_array(e, arr[:])) + } + + case runtime.Type_Info_Quaternion: + switch q in a { + case quaternion64: + arr: [4]Value = {imag(q), jmag(q), kmag(q), real(q)} + return err_conv(_encode_array(e, arr[:])) + case quaternion128: + arr: [4]Value = {imag(q), jmag(q), kmag(q), real(q)} + return err_conv(_encode_array(e, arr[:])) + case quaternion256: + arr: [4]Value = {imag(q), jmag(q), kmag(q), real(q)} + return err_conv(_encode_array(e, arr[:])) + } + + case runtime.Type_Info_String: + switch s in a { + case string: return err_conv(_encode_text(e, s)) + case cstring: return err_conv(_encode_text(e, string(s))) + } + + case runtime.Type_Info_Boolean: + val: bool + switch b in a { + case bool: return _encode_bool(e.writer, b) + case b8: return _encode_bool(e.writer, bool(b)) + case b16: return _encode_bool(e.writer, bool(b)) + case b32: return _encode_bool(e.writer, bool(b)) + case b64: return _encode_bool(e.writer, bool(b)) + } + + case runtime.Type_Info_Array: + if info.elem.id == byte { + raw := ([^]byte)(v.data) + return err_conv(_encode_bytes(e, raw[:info.count])) + } + + err_conv(_encode_u64(e, u64(info.count), .Array)) or_return + for i in 0.. (res: [10]byte) { + e := e + builder := strings.builder_from_slice(res[:]) + e.writer = strings.to_stream(&builder) + + assert(_encode_u64(e, u64(len(str)), .Text) == nil) + res[9] = u8(len(builder.buf)) + assert(res[9] < 10) + return + } + + Encoded_Entry_Fast :: struct($T: typeid) { + pre_key: [10]byte, + key: T, + val_idx: uintptr, + } + + Encoded_Entry :: struct { + key: ^[dynamic]byte, + val_idx: uintptr, + } + + switch info.key.id { + case string: + entries := make([dynamic]Encoded_Entry_Fast(^[]byte), 0, map_cap, context.temp_allocator) or_return + defer delete(entries) + + for bucket_index in 0.. slice.Ordering { + a, b := a, b + pre_cmp := slice.Ordering(bytes.compare(a.pre_key[:a.pre_key[9]], b.pre_key[:b.pre_key[9]])) + if pre_cmp != .Equal { + return pre_cmp + } + + return slice.Ordering(bytes.compare(a.key^, b.key^)) + }) + + for &entry in entries { + io.write_full(e.writer, entry.pre_key[:entry.pre_key[9]]) or_return + io.write_full(e.writer, entry.key^) or_return + + value := rawptr(runtime.map_cell_index_dynamic(vs, info.map_info.vs, entry.val_idx)) + marshal_into(e, any{ value, info.value.id }) or_return + } + return + + case cstring: + entries := make([dynamic]Encoded_Entry_Fast(^cstring), 0, map_cap, context.temp_allocator) or_return + defer delete(entries) + + for bucket_index in 0.. slice.Ordering { + a, b := a, b + pre_cmp := slice.Ordering(bytes.compare(a.pre_key[:a.pre_key[9]], b.pre_key[:b.pre_key[9]])) + if pre_cmp != .Equal { + return pre_cmp + } + + ab := transmute([]byte)string(a.key^) + bb := transmute([]byte)string(b.key^) + return slice.Ordering(bytes.compare(ab, bb)) + }) + + for &entry in entries { + io.write_full(e.writer, entry.pre_key[:entry.pre_key[9]]) or_return + io.write_full(e.writer, transmute([]byte)string(entry.key^)) or_return + + value := rawptr(runtime.map_cell_index_dynamic(vs, info.map_info.vs, entry.val_idx)) + marshal_into(e, any{ value, info.value.id }) or_return + } + return + + case: + entries := make([dynamic]Encoded_Entry, 0, map_cap, context.temp_allocator) or_return + defer delete(entries) + + for bucket_index in 0.. slice.Ordering { + return slice.Ordering(bytes.compare(a.key[:], b.key[:])) + }) + + for entry in entries { + io.write_full(e.writer, entry.key[:]) or_return + delete(entry.key^) + + value := rawptr(runtime.map_cell_index_dynamic(vs, info.map_info.vs, entry.val_idx)) + marshal_into(e, any{ value, info.value.id }) or_return + } + return + } + } + + case runtime.Type_Info_Struct: + switch vv in v { + case Tag: return err_conv(_encode_tag(e, vv)) + } + + err_conv(_encode_u16(e, u16(len(info.names)), .Map)) or_return + + marshal_entry :: #force_inline proc(e: Encoder, info: runtime.Type_Info_Struct, v: any, name: string, i: int) -> Marshal_Error { + err_conv(_encode_text(e, name)) or_return + + id := info.types[i].id + data := rawptr(uintptr(v.data) + info.offsets[i]) + field_any := any{data, id} + + if tag := string(reflect.struct_tag_get(reflect.Struct_Tag(info.tags[i]), "cbor_tag")); tag != "" { + if impl, ok := _tag_implementations_id[tag]; ok { + return impl->marshal(e, field_any) + } + + nr, ok := strconv.parse_u64_of_base(tag, 10) + if !ok { return .Invalid_CBOR_Tag } + + if impl, nok := _tag_implementations_nr[nr]; nok { + return impl->marshal(e, field_any) + } + + err_conv(_encode_u64(e, nr, .Tag)) or_return + } + + return marshal_into(e, field_any) + } + + field_name :: #force_inline proc(info: runtime.Type_Info_Struct, i: int) -> string { + if cbor_name := string(reflect.struct_tag_get(reflect.Struct_Tag(info.tags[i]), "cbor")); cbor_name != "" { + return cbor_name + } else { + return info.names[i] + } + } + + if .Deterministic_Map_Sorting in e.flags { + Name :: struct { + name: string, + field: int, + } + entries := make([dynamic]Name, 0, len(info.names), context.temp_allocator) or_return + defer delete(entries) + + for name, i in info.names { + append(&entries, Name{field_name(info, i), i}) or_return + } + + // Sort lexicographic on the bytes of the key. + slice.sort_by_cmp(entries[:], proc(a, b: Name) -> slice.Ordering { + return slice.Ordering(bytes.compare(transmute([]byte)a.name, transmute([]byte)b.name)) + }) + + for entry in entries { + marshal_entry(e, info, v, entry.name, entry.field) or_return + } + } else { + for name, i in info.names { + marshal_entry(e, info, v, field_name(info, i), i) or_return + } + } + return + + case runtime.Type_Info_Union: + switch vv in v { + case Value: return err_conv(encode(e, vv)) + } + + tag := reflect.get_union_variant_raw_tag(v) + if v.data == nil || tag <= 0 { + return _encode_nil(e.writer) + } + id := info.variants[tag-1].id + return marshal_into(e, any{v.data, id}) + + case runtime.Type_Info_Enum: + return marshal_into(e, any{v.data, info.base.id}) + + case runtime.Type_Info_Bit_Set: + do_byte_swap := is_bit_set_different_endian_to_platform(info.underlying) + switch ti.size * 8 { + case 0: + return _encode_u8(e.writer, 0) + case 8: + x := (^u8)(v.data)^ + return _encode_u8(e.writer, x) + case 16: + x := (^u16)(v.data)^ + if do_byte_swap { x = intrinsics.byte_swap(x) } + return err_conv(_encode_u16(e, x)) + case 32: + x := (^u32)(v.data)^ + if do_byte_swap { x = intrinsics.byte_swap(x) } + return err_conv(_encode_u32(e, x)) + case 64: + x := (^u64)(v.data)^ + if do_byte_swap { x = intrinsics.byte_swap(x) } + return err_conv(_encode_u64(e, x)) + case: + panic("unknown bit_size size") + } + } + + return _unsupported(v.id, nil) +} diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin new file mode 100644 index 000000000..54bc7dd15 --- /dev/null +++ b/core/encoding/cbor/tags.odin @@ -0,0 +1,361 @@ +package cbor + +import "core:encoding/base64" +import "core:io" +import "core:math" +import "core:math/big" +import "core:mem" +import "core:reflect" +import "core:runtime" +import "core:strings" +import "core:time" + +// Tags defined in RFC 7049 that we provide implementations for. + +// UTC time in seconds, unmarshalled into a `core:time` `time.Time` or integer. +TAG_EPOCH_TIME_NR :: 1 +TAG_EPOCH_TIME_ID :: "epoch" + +// Using `core:math/big`, big integers are properly encoded and decoded during marshal and unmarshal. +TAG_UNSIGNED_BIG_NR :: 2 +// Using `core:math/big`, big integers are properly encoded and decoded during marshal and unmarshal. +TAG_NEGATIVE_BIG_NR :: 3 + +// TAG_DECIMAL_FRACTION :: 4 // NOTE: We could probably implement this with `math/fixed`. + +// Sometimes it is beneficial to carry an embedded CBOR data item that is not meant to be decoded +// immediately at the time the enclosing data item is being decoded. Tag number 24 (CBOR data item) +// can be used to tag the embedded byte string as a single data item encoded in CBOR format. +TAG_CBOR_NR :: 24 +TAG_CBOR_ID :: "cbor" + +// The contents of this tag are base64 encoded during marshal and decoded during unmarshal. +TAG_BASE64_NR :: 34 +TAG_BASE64_ID :: "base64" + +// A tag that is used to detect the contents of a binary buffer (like a file) are CBOR. +// This tag would wrap everything else, decoders can then check for this header and see if the +// given content is definitely CBOR. +TAG_SELF_DESCRIBED_CBOR :: 55799 + +// A tag implementation that handles marshals and unmarshals for the tag it is registered on. +Tag_Implementation :: struct { + data: rawptr, + unmarshal: Tag_Unmarshal_Proc, + marshal: Tag_Marshal_Proc, +} + +// Procedure responsible for umarshalling the tag out of the reader into the given `any`. +Tag_Unmarshal_Proc :: #type proc(self: ^Tag_Implementation, r: io.Reader, tag_nr: Tag_Number, v: any) -> Unmarshal_Error + +// Procedure responsible for marshalling the tag in the given `any` into the given encoder. +Tag_Marshal_Proc :: #type proc(self: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error + +// When encountering a tag in the CBOR being unmarshalled, the implementation is used to unmarshal it. +// When encountering a struct tag like `cbor_tag:"Tag_Number"`, the implementation is used to marshal it. +_tag_implementations_nr: map[Tag_Number]Tag_Implementation + +// Same as the number implementations but friendlier to use as a struct tag. +// Instead of `cbor_tag:"34"` you can use `cbor_tag:"base64"`. +_tag_implementations_id: map[string]Tag_Implementation + +// Tag implementations that are always used by a type, if that type is encountered in marshal it +// will rely on the implementation to marshal it. +// +// This is good for types that don't make sense or can't marshal in its default form. +_tag_implementations_type: map[typeid]Tag_Implementation + +// Register a custom tag implementation to be used when marshalling that type and unmarshalling that tag number. +tag_register_type :: proc(impl: Tag_Implementation, nr: Tag_Number, type: typeid) { + _tag_implementations_nr[nr] = impl + _tag_implementations_type[type] = impl +} + +// Register a custom tag implementation to be used when marshalling that tag number or marshalling +// a field with the struct tag `cbor_tag:"nr"`. +tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string) { + _tag_implementations_nr[nr] = impl + _tag_implementations_id[id] = impl +} + +// Controls initialization of default tag implementations. +// JS and WASI default to a panic allocator so we don't want to do it on those. +INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, ODIN_OS != .JS && ODIN_OS != .WASI) + +@(private, init, disabled=!INITIALIZE_DEFAULT_TAGS) +tags_initialize_defaults :: proc() { + tags_register_defaults() +} + +// Registers tags that have implementations provided by this package. +// This is done by default and can be controlled with the `CBOR_INITIALIZE_DEFAULT_TAGS` define. +tags_register_defaults :: proc() { + // NOTE: Not registering this the other way around, user can opt-in using the `cbor_tag:"1"` struct + // tag instead, it would lose precision and marshalling the `time.Time` struct normally is valid. + tag_register_number({nil, tag_time_unmarshal, tag_time_marshal}, TAG_EPOCH_TIME_NR, TAG_EPOCH_TIME_ID) + + // Use the struct tag `cbor_tag:"34"` to have your field encoded in a base64. + tag_register_number({nil, tag_base64_unmarshal, tag_base64_marshal}, TAG_BASE64_NR, TAG_BASE64_ID) + + // Use the struct tag `cbor_tag:"24"` to keep a non-decoded field of raw CBOR. + tag_register_number({nil, tag_cbor_unmarshal, tag_cbor_marshal}, TAG_CBOR_NR, TAG_CBOR_ID) + + // These following tags are registered at the type level and don't require an opt-in struct tag. + // Encoding these types on its own make no sense or no data is lost to encode it. + + tag_register_type({nil, tag_big_unmarshal, tag_big_marshal}, TAG_UNSIGNED_BIG_NR, big.Int) + tag_register_type({nil, tag_big_unmarshal, tag_big_marshal}, TAG_NEGATIVE_BIG_NR, big.Int) +} + +// Tag number 1 contains a numerical value counting the number of seconds from 1970-01-01T00:00Z +// in UTC time to the represented point in civil time. +// +// See RFC 8949 section 3.4.2. +@(private) +tag_time_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(r) or_return + #partial switch hdr { + case .U8, .U16, .U32, .U64, .Neg_U8, .Neg_U16, .Neg_U32, .Neg_U64: + switch &dst in v { + case time.Time: + i: i64 + _unmarshal_any_ptr(r, &i, hdr) or_return + dst = time.unix(i64(i), 0) + return + case: + return _unmarshal_value(r, v, hdr) + } + + case .F16, .F32, .F64: + switch &dst in v { + case time.Time: + f: f64 + _unmarshal_any_ptr(r, &f, hdr) or_return + whole, fract := math.modf(f) + dst = time.unix(i64(whole), i64(fract * 1e9)) + return + case: + return _unmarshal_value(r, v, hdr) + } + + case: + maj, add := _header_split(hdr) + if maj == .Other { + i := _decode_tiny_u8(add) or_return + + switch &dst in v { + case time.Time: + dst = time.unix(i64(i), 0) + case: + if _assign_int(v, i) { return } + } + } + + // Only numbers and floats are allowed in this tag. + return .Bad_Tag_Value + } + + return _unsupported(v, hdr) +} + +@(private) +tag_time_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error { + switch vv in v { + case time.Time: + // NOTE: we lose precision here, which is one of the reasons for this tag being opt-in. + i := time.time_to_unix(vv) + + _encode_u8(e.writer, TAG_EPOCH_TIME_NR, .Tag) or_return + return err_conv(_encode_uint(e, _int_to_uint(i))) + case: + unreachable() + } +} + +@(private) +tag_big_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, tnr: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(r) or_return + maj, add := _header_split(hdr) + if maj != .Bytes { + // Only bytes are supported in this tag. + return .Bad_Tag_Value + } + + switch &dst in v { + case big.Int: + bytes := err_conv(_decode_bytes(r, add)) or_return + defer delete(bytes) + + if err := big.int_from_bytes_big(&dst, bytes); err != nil { + return .Bad_Tag_Value + } + + if tnr == TAG_NEGATIVE_BIG_NR { + dst.sign = .Negative + } + + return + } + + return _unsupported(v, hdr) +} + +@(private) +tag_big_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error { + switch &vv in v { + case big.Int: + if !big.int_is_initialized(&vv) { + _encode_u8(e.writer, TAG_UNSIGNED_BIG_NR, .Tag) or_return + return _encode_u8(e.writer, 0, .Bytes) + } + + // NOTE: using the panic_allocator because all procedures should only allocate if the Int + // is uninitialized (which we checked). + + is_neg, err := big.is_negative(&vv, mem.panic_allocator()) + assert(err == nil, "only errors if not initialized, which has been checked") + + tnr: u8 = TAG_NEGATIVE_BIG_NR if is_neg else TAG_UNSIGNED_BIG_NR + _encode_u8(e.writer, tnr, .Tag) or_return + + size_in_bytes, berr := big.int_to_bytes_size(&vv, false, mem.panic_allocator()) + assert(berr == nil, "only errors if not initialized, which has been checked") + assert(size_in_bytes >= 0) + + err_conv(_encode_u64(e, u64(size_in_bytes), .Bytes)) or_return + + for offset := (size_in_bytes*8)-8; offset >= 0; offset -= 8 { + bits, derr := big.int_bitfield_extract(&vv, offset, 8, mem.panic_allocator()) + assert(derr == nil, "only errors if not initialized or invalid argument (offset and count), which won't happen") + + io.write_full(e.writer, {u8(bits & 255)}) or_return + } + return nil + + case: unreachable() + } +} + +@(private) +tag_cbor_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> Unmarshal_Error { + hdr := _decode_header(r) or_return + major, add := _header_split(hdr) + #partial switch major { + case .Bytes: + ti := reflect.type_info_base(type_info_of(v.id)) + return _unmarshal_bytes(r, v, ti, hdr, add) + + case: return .Bad_Tag_Value + } +} + +@(private) +tag_cbor_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error { + _encode_u8(e.writer, TAG_CBOR_NR, .Tag) or_return + ti := runtime.type_info_base(type_info_of(v.id)) + #partial switch t in ti.variant { + case runtime.Type_Info_String: + return marshal_into(e, v) + case runtime.Type_Info_Array: + elem_base := reflect.type_info_base(t.elem) + if elem_base.id != byte { return .Bad_Tag_Value } + return marshal_into(e, v) + case runtime.Type_Info_Slice: + elem_base := reflect.type_info_base(t.elem) + if elem_base.id != byte { return .Bad_Tag_Value } + return marshal_into(e, v) + case runtime.Type_Info_Dynamic_Array: + elem_base := reflect.type_info_base(t.elem) + if elem_base.id != byte { return .Bad_Tag_Value } + return marshal_into(e, v) + case: + return .Bad_Tag_Value + } +} + +// NOTE: this could probably be more efficient by decoding bytes from CBOR and then from base64 at the same time. +@(private) +tag_base64_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(r) or_return + major, add := _header_split(hdr) + #partial switch major { + case .Text: + ti := reflect.type_info_base(type_info_of(v.id)) + _unmarshal_bytes(r, v, ti, hdr, add) or_return + #partial switch t in ti.variant { + case runtime.Type_Info_String: + switch t.is_cstring { + case true: + str := string((^cstring)(v.data)^) + decoded := base64.decode(str) or_return + (^cstring)(v.data)^ = strings.clone_to_cstring(string(decoded)) or_return + delete(decoded) + delete(str) + case false: + str := (^string)(v.data)^ + decoded := base64.decode(str) or_return + (^string)(v.data)^ = string(decoded) + delete(str) + } + return + + case runtime.Type_Info_Array: + raw := ([^]byte)(v.data) + decoded := base64.decode(string(raw[:t.count])) or_return + copy(raw[:t.count], decoded) + delete(decoded) + return + + case runtime.Type_Info_Slice: + raw := (^[]byte)(v.data) + decoded := base64.decode(string(raw^)) or_return + delete(raw^) + raw^ = decoded + return + + case runtime.Type_Info_Dynamic_Array: + raw := (^mem.Raw_Dynamic_Array)(v.data) + str := string(((^[dynamic]byte)(v.data)^)[:]) + + decoded := base64.decode(str) or_return + delete(str) + + raw.data = raw_data(decoded) + raw.len = len(decoded) + raw.cap = len(decoded) + return + + case: unreachable() + } + + case: return .Bad_Tag_Value + } +} + +@(private) +tag_base64_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error { + _encode_u8(e.writer, TAG_BASE64_NR, .Tag) or_return + + ti := runtime.type_info_base(type_info_of(v.id)) + a := any{v.data, ti.id} + + bytes: []byte + switch val in a { + case string: bytes = transmute([]byte)val + case cstring: bytes = transmute([]byte)string(val) + case []byte: bytes = val + case [dynamic]byte: bytes = val[:] + case: + #partial switch t in ti.variant { + case runtime.Type_Info_Array: + if t.elem.id != byte { return .Bad_Tag_Value } + bytes = ([^]byte)(v.data)[:t.count] + case: + return .Bad_Tag_Value + } + } + + out_len := base64.encoded_length(bytes) + err_conv(_encode_u64(e, u64(out_len), .Text)) or_return + return base64.encode_into(e.writer, bytes) +} diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin new file mode 100644 index 000000000..0da8e3f2a --- /dev/null +++ b/core/encoding/cbor/unmarshal.odin @@ -0,0 +1,832 @@ +package cbor + +import "core:intrinsics" +import "core:io" +import "core:mem" +import "core:reflect" +import "core:runtime" +import "core:strings" +import "core:unicode/utf8" + +// `strings` is only used in poly procs, but -vet thinks it is fully unused. +_ :: strings + +/* +Unmarshals the given CBOR into the given pointer using reflection. +Types that require allocation are allocated using the given allocator. + +Some temporary allocations are done on the `context.temp_allocator`, but, if you want to, +this can be set to a "normal" allocator, because the necessary `delete` and `free` calls are still made. +This is helpful when the CBOR size is so big that you don't want to collect all the temporary allocations until the end. +*/ +unmarshal :: proc { + unmarshal_from_reader, + unmarshal_from_string, +} + +// Unmarshals from a reader, see docs on the proc group `Unmarshal` for more info. +unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, allocator := context.allocator) -> Unmarshal_Error { + return _unmarshal_any_ptr(r, ptr, allocator=allocator) +} + +// Unmarshals from a string, see docs on the proc group `Unmarshal` for more info. +unmarshal_from_string :: proc(s: string, ptr: ^$T, allocator := context.allocator) -> Unmarshal_Error { + sr: strings.Reader + r := strings.to_reader(&sr, s) + return _unmarshal_any_ptr(r, ptr, allocator=allocator) +} + +_unmarshal_any_ptr :: proc(r: io.Reader, v: any, hdr: Maybe(Header) = nil, allocator := context.allocator) -> Unmarshal_Error { + context.allocator = allocator + v := v + + if v == nil || v.id == nil { + return .Invalid_Parameter + } + + v = reflect.any_base(v) + ti := type_info_of(v.id) + if !reflect.is_pointer(ti) || ti.id == rawptr { + return .Non_Pointer_Parameter + } + + data := any{(^rawptr)(v.data)^, ti.variant.(reflect.Type_Info_Pointer).elem.id} + return _unmarshal_value(r, data, hdr.? or_else (_decode_header(r) or_return)) +} + +_unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_Error) { + v := v + ti := reflect.type_info_base(type_info_of(v.id)) + + // If it's a union with only one variant, then treat it as that variant + if u, ok := ti.variant.(reflect.Type_Info_Union); ok && len(u.variants) == 1 { + #partial switch hdr { + case .Nil, .Undefined, nil: // no-op. + case: + variant := u.variants[0] + v.id = variant.id + ti = reflect.type_info_base(variant) + if !reflect.is_pointer_internally(variant) { + tag := any{rawptr(uintptr(v.data) + u.tag_offset), u.tag_type.id} + assert(_assign_int(tag, 1)) + } + } + } + + // Allow generic unmarshal by doing it into a `Value`. + switch &dst in v { + case Value: + dst = err_conv(decode(r, hdr)) or_return + return + } + + switch hdr { + case .U8: + decoded := _decode_u8(r) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr) } + return + + case .U16: + decoded := _decode_u16(r) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr) } + return + + case .U32: + decoded := _decode_u32(r) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr) } + return + + case .U64: + decoded := _decode_u64(r) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr) } + return + + case .Neg_U8: + decoded := Negative_U8(_decode_u8(r) or_return) + + switch &dst in v { + case Negative_U8: + dst = decoded + return + case Negative_U16: + dst = Negative_U16(decoded) + return + case Negative_U32: + dst = Negative_U32(decoded) + return + case Negative_U64: + dst = Negative_U64(decoded) + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr) } + return + + case .Neg_U16: + decoded := Negative_U16(_decode_u16(r) or_return) + + switch &dst in v { + case Negative_U16: + dst = decoded + return + case Negative_U32: + dst = Negative_U32(decoded) + return + case Negative_U64: + dst = Negative_U64(decoded) + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr) } + return + + case .Neg_U32: + decoded := Negative_U32(_decode_u32(r) or_return) + + switch &dst in v { + case Negative_U32: + dst = decoded + return + case Negative_U64: + dst = Negative_U64(decoded) + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr) } + return + + case .Neg_U64: + decoded := Negative_U64(_decode_u64(r) or_return) + + switch &dst in v { + case Negative_U64: + dst = decoded + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr) } + return + + case .Simple: + decoded := _decode_simple(r) or_return + + // NOTE: Because this is a special type and not to be treated as a general integer, + // We only put the value of it in fields that are explicitly of type `Simple`. + switch &dst in v { + case Simple: + dst = decoded + return + case: + return _unsupported(v, hdr) + } + + case .F16: + decoded := _decode_f16(r) or_return + if !_assign_float(v, decoded) { return _unsupported(v, hdr) } + return + + case .F32: + decoded := _decode_f32(r) or_return + if !_assign_float(v, decoded) { return _unsupported(v, hdr) } + return + + case .F64: + decoded := _decode_f64(r) or_return + if !_assign_float(v, decoded) { return _unsupported(v, hdr) } + return + + case .True: + if !_assign_bool(v, true) { return _unsupported(v, hdr) } + return + + case .False: + if !_assign_bool(v, false) { return _unsupported(v, hdr) } + return + + case .Nil, .Undefined: + mem.zero(v.data, ti.size) + return + + case .Break: + return .Break + } + + maj, add := _header_split(hdr) + switch maj { + case .Unsigned: + decoded := _decode_tiny_u8(add) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr, add) } + return + + case .Negative: + decoded := Negative_U8(_decode_tiny_u8(add) or_return) + + switch &dst in v { + case Negative_U8: + dst = decoded + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr, add) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr, add) } + return + + case .Other: + decoded := _decode_tiny_simple(add) or_return + + // NOTE: Because this is a special type and not to be treated as a general integer, + // We only put the value of it in fields that are explicitly of type `Simple`. + switch &dst in v { + case Simple: + dst = decoded + return + case: + return _unsupported(v, hdr, add) + } + + case .Tag: + switch &dst in v { + case ^Tag: + tval := err_conv(_decode_tag_ptr(r, add)) or_return + if t, is_tag := tval.(^Tag); is_tag { + dst = t + return + } + + destroy(tval) + return .Bad_Tag_Value + case Tag: + t := err_conv(_decode_tag(r, add)) or_return + if t, is_tag := t.?; is_tag { + dst = t + return + } + + return .Bad_Tag_Value + } + + nr := err_conv(_decode_tag_nr(r, add)) or_return + + // Custom tag implementations. + if impl, ok := _tag_implementations_nr[nr]; ok { + return impl->unmarshal(r, nr, v) + } else { + // Discard the tag info and unmarshal as its value. + return _unmarshal_value(r, v, _decode_header(r) or_return) + } + + return _unsupported(v, hdr, add) + + case .Bytes: return _unmarshal_bytes(r, v, ti, hdr, add) + case .Text: return _unmarshal_string(r, v, ti, hdr, add) + case .Array: return _unmarshal_array(r, v, ti, hdr, add) + case .Map: return _unmarshal_map(r, v, ti, hdr, add) + + case: return .Bad_Major + } +} + +_unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + #partial switch t in ti.variant { + case reflect.Type_Info_String: + bytes := err_conv(_decode_bytes(r, add)) or_return + + if t.is_cstring { + raw := (^cstring)(v.data) + assert_safe_for_cstring(string(bytes)) + raw^ = cstring(raw_data(bytes)) + } else { + // String has same memory layout as a slice, so we can directly use it as a slice. + raw := (^mem.Raw_String)(v.data) + raw^ = transmute(mem.Raw_String)bytes + } + + return + + case reflect.Type_Info_Slice: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + bytes := err_conv(_decode_bytes(r, add)) or_return + raw := (^mem.Raw_Slice)(v.data) + raw^ = transmute(mem.Raw_Slice)bytes + return + + case reflect.Type_Info_Dynamic_Array: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + bytes := err_conv(_decode_bytes(r, add)) or_return + raw := (^mem.Raw_Dynamic_Array)(v.data) + raw.data = raw_data(bytes) + raw.len = len(bytes) + raw.cap = len(bytes) + raw.allocator = context.allocator + return + + case reflect.Type_Info_Array: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + bytes: []byte; { + context.allocator = context.temp_allocator + bytes = err_conv(_decode_bytes(r, add)) or_return + } + defer delete(bytes, context.temp_allocator) + + if len(bytes) > t.count { return _unsupported(v, hdr) } + + // Copy into array type, delete original. + slice := ([^]byte)(v.data)[:len(bytes)] + n := copy(slice, bytes) + assert(n == len(bytes)) + return + } + + return _unsupported(v, hdr) +} + +_unmarshal_string :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + #partial switch t in ti.variant { + case reflect.Type_Info_String: + text := err_conv(_decode_text(r, add)) or_return + + if t.is_cstring { + raw := (^cstring)(v.data) + + assert_safe_for_cstring(text) + raw^ = cstring(raw_data(text)) + } else { + raw := (^string)(v.data) + raw^ = text + } + return + + // Enum by its variant name. + case reflect.Type_Info_Enum: + context.allocator = context.temp_allocator + text := err_conv(_decode_text(r, add)) or_return + defer delete(text, context.temp_allocator) + + for name, i in t.names { + if name == text { + if !_assign_int(any{v.data, ti.id}, t.values[i]) { return _unsupported(v, hdr) } + return + } + } + + case reflect.Type_Info_Rune: + context.allocator = context.temp_allocator + text := err_conv(_decode_text(r, add)) or_return + defer delete(text, context.temp_allocator) + + r := (^rune)(v.data) + dr, n := utf8.decode_rune(text) + if dr == utf8.RUNE_ERROR || n < len(text) { + return _unsupported(v, hdr) + } + + r^ = dr + return + } + + return _unsupported(v, hdr) +} + +_unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + + assign_array :: proc( + r: io.Reader, + da: ^mem.Raw_Dynamic_Array, + elemt: ^reflect.Type_Info, + _length: Maybe(int), + growable := true, + ) -> (out_of_space: bool, err: Unmarshal_Error) { + length, has_length := _length.? + for idx: uintptr = 0; !has_length || idx < uintptr(length); idx += 1 { + elem_ptr := rawptr(uintptr(da.data) + idx*uintptr(elemt.size)) + elem := any{elem_ptr, elemt.id} + + hdr := _decode_header(r) or_return + + // Double size if out of capacity. + if da.cap <= da.len { + // Not growable, error out. + if !growable { return true, .Out_Of_Memory } + + cap := 2 * da.cap + ok := runtime.__dynamic_array_reserve(da, elemt.size, elemt.align, cap) + + // NOTE: Might be lying here, but it is at least an allocator error. + if !ok { return false, .Out_Of_Memory } + } + + err = _unmarshal_value(r, elem, hdr) + if !has_length && err == .Break { break } + if err != nil { return } + + da.len += 1 + } + + return false, nil + } + + // Allow generically storing the values array. + switch &dst in v { + case ^Array: + dst = err_conv(_decode_array_ptr(r, add)) or_return + return + case Array: + dst = err_conv(_decode_array(r, add)) or_return + return + } + + #partial switch t in ti.variant { + case reflect.Type_Info_Slice: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + + data := mem.alloc_bytes_non_zeroed(t.elem.size * length, t.elem.align) or_return + defer if err != nil { mem.free_bytes(data) } + + da := mem.Raw_Dynamic_Array{raw_data(data), 0, length, context.allocator } + + assign_array(r, &da, t.elem, _length) or_return + + if da.len < da.cap { + // Ignoring an error here, but this is not critical to succeed. + _ = runtime.__dynamic_array_shrink(&da, t.elem.size, t.elem.align, da.len) + } + + raw := (^mem.Raw_Slice)(v.data) + raw.data = da.data + raw.len = da.len + return + + case reflect.Type_Info_Dynamic_Array: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + + data := mem.alloc_bytes_non_zeroed(t.elem.size * length, t.elem.align) or_return + defer if err != nil { mem.free_bytes(data) } + + raw := (^mem.Raw_Dynamic_Array)(v.data) + raw.data = raw_data(data) + raw.len = 0 + raw.cap = length + raw.allocator = context.allocator + + _ = assign_array(r, raw, t.elem, _length) or_return + return + + case reflect.Type_Info_Array: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else t.count + + if !unknown && length > t.count { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, context.allocator } + + out_of_space := assign_array(r, &da, t.elem, _length, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case reflect.Type_Info_Enumerated_Array: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else t.count + + if !unknown && length > t.count { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, context.allocator } + + out_of_space := assign_array(r, &da, t.elem, _length, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case reflect.Type_Info_Complex: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else 2 + + if !unknown && length > 2 { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, 2, context.allocator } + + info: ^runtime.Type_Info + switch ti.id { + case complex32: info = type_info_of(f16) + case complex64: info = type_info_of(f32) + case complex128: info = type_info_of(f64) + case: unreachable() + } + + out_of_space := assign_array(r, &da, info, 2, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case reflect.Type_Info_Quaternion: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else 4 + + if !unknown && length > 4 { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, 4, context.allocator } + + info: ^runtime.Type_Info + switch ti.id { + case quaternion64: info = type_info_of(f16) + case quaternion128: info = type_info_of(f32) + case quaternion256: info = type_info_of(f64) + case: unreachable() + } + + out_of_space := assign_array(r, &da, info, 4, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case: return _unsupported(v, hdr) + } +} + +_unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + + decode_key :: proc(r: io.Reader, v: any) -> (k: string, err: Unmarshal_Error) { + entry_hdr := _decode_header(r) or_return + entry_maj, entry_add := _header_split(entry_hdr) + #partial switch entry_maj { + case .Text: + k = err_conv(_decode_text(r, entry_add)) or_return + return + case .Bytes: + bytes := err_conv(_decode_bytes(r, entry_add)) or_return + k = string(bytes) + return + case: + err = _unsupported(v, entry_hdr) + return + } + } + + // Allow generically storing the map array. + switch &dst in v { + case ^Map: + dst = err_conv(_decode_map_ptr(r, add)) or_return + return + case Map: + dst = err_conv(_decode_map(r, add)) or_return + return + } + + #partial switch t in ti.variant { + case reflect.Type_Info_Struct: + if t.is_raw_union { + return _unsupported(v, hdr) + } + + length, unknown := err_conv(_decode_container_length(r, add)) or_return + fields := reflect.struct_fields_zipped(ti.id) + + for idx := 0; unknown || idx < length.?; idx += 1 { + // Decode key, keys can only be strings. + key: string; { + context.allocator = context.temp_allocator + if keyv, kerr := decode_key(r, v); unknown && kerr == .Break { + break + } else if kerr != nil { + err = kerr + return + } else { + key = keyv + } + } + defer delete(key, context.temp_allocator) + + // Find matching field. + use_field_idx := -1 + { + for field, field_idx in fields { + tag_value := string(reflect.struct_tag_get(field.tag, "cbor")) + if key == tag_value { + use_field_idx = field_idx + break + } + + if key == field.name { + // No break because we want to still check remaining struct tags. + use_field_idx = field_idx + } + } + + // Skips unused map entries. + if use_field_idx < 0 { + continue + } + } + + field := fields[use_field_idx] + name := field.name + ptr := rawptr(uintptr(v.data) + field.offset) + fany := any{ptr, field.type.id} + _unmarshal_value(r, fany, _decode_header(r) or_return) or_return + } + return + + case reflect.Type_Info_Map: + if !reflect.is_string(t.key) { + return _unsupported(v, hdr) + } + + raw_map := (^mem.Raw_Map)(v.data) + if raw_map.allocator.procedure == nil { + raw_map.allocator = context.allocator + } + + defer if err != nil { + _ = runtime.map_free_dynamic(raw_map^, t.map_info) + } + + length, unknown := err_conv(_decode_container_length(r, add)) or_return + if !unknown { + // Reserve space before setting so we can return allocation errors and be efficient on big maps. + new_len := uintptr(runtime.map_len(raw_map^)+length.?) + runtime.map_reserve_dynamic(raw_map, t.map_info, new_len) or_return + } + + // Temporary memory to unmarshal keys into before inserting them into the map. + elem_backing := mem.alloc_bytes_non_zeroed(t.value.size, t.value.align, context.temp_allocator) or_return + defer delete(elem_backing, context.temp_allocator) + + map_backing_value := any{raw_data(elem_backing), t.value.id} + + for idx := 0; unknown || idx < length.?; idx += 1 { + // Decode key, keys can only be strings. + key: string + if keyv, kerr := decode_key(r, v); unknown && kerr == .Break { + break + } else if kerr != nil { + err = kerr + return + } else { + key = keyv + } + + if unknown { + // Reserve space for new element so we can return allocator errors. + new_len := uintptr(runtime.map_len(raw_map^)+1) + runtime.map_reserve_dynamic(raw_map, t.map_info, new_len) or_return + } + + mem.zero_slice(elem_backing) + _unmarshal_value(r, map_backing_value, _decode_header(r) or_return) or_return + + key_ptr := rawptr(&key) + key_cstr: cstring + if reflect.is_cstring(t.key) { + assert_safe_for_cstring(key) + key_cstr = cstring(raw_data(key)) + key_ptr = &key_cstr + } + + set_ptr := runtime.__dynamic_map_set_without_hash(raw_map, t.map_info, key_ptr, map_backing_value.data) + // We already reserved space for it, so this shouldn't fail. + assert(set_ptr != nil) + } + return + + case: + return _unsupported(v, hdr) + } +} + +_assign_int :: proc(val: any, i: $T) -> bool { + v := reflect.any_core(val) + + // NOTE: should under/over flow be checked here? `encoding/json` doesn't, but maybe that is a + // less strict encoding?. + + switch &dst in v { + case i8: dst = i8 (i) + case i16: dst = i16 (i) + case i16le: dst = i16le (i) + case i16be: dst = i16be (i) + case i32: dst = i32 (i) + case i32le: dst = i32le (i) + case i32be: dst = i32be (i) + case i64: dst = i64 (i) + case i64le: dst = i64le (i) + case i64be: dst = i64be (i) + case i128: dst = i128 (i) + case i128le: dst = i128le (i) + case i128be: dst = i128be (i) + case u8: dst = u8 (i) + case u16: dst = u16 (i) + case u16le: dst = u16le (i) + case u16be: dst = u16be (i) + case u32: dst = u32 (i) + case u32le: dst = u32le (i) + case u32be: dst = u32be (i) + case u64: dst = u64 (i) + case u64le: dst = u64le (i) + case u64be: dst = u64be (i) + case u128: dst = u128 (i) + case u128le: dst = u128le (i) + case u128be: dst = u128be (i) + case int: dst = int (i) + case uint: dst = uint (i) + case uintptr: dst = uintptr(i) + case: + ti := type_info_of(v.id) + do_byte_swap := is_bit_set_different_endian_to_platform(ti) + #partial switch info in ti.variant { + case runtime.Type_Info_Bit_Set: + switch ti.size * 8 { + case 0: + case 8: + x := (^u8)(v.data) + x^ = u8(i) + case 16: + x := (^u16)(v.data) + x^ = do_byte_swap ? intrinsics.byte_swap(u16(i)) : u16(i) + case 32: + x := (^u32)(v.data) + x^ = do_byte_swap ? intrinsics.byte_swap(u32(i)) : u32(i) + case 64: + x := (^u64)(v.data) + x^ = do_byte_swap ? intrinsics.byte_swap(u64(i)) : u64(i) + case: + panic("unknown bit_size size") + } + case: + return false + } + } + return true +} + +_assign_float :: proc(val: any, f: $T) -> bool { + v := reflect.any_core(val) + + // NOTE: should under/over flow be checked here? `encoding/json` doesn't, but maybe that is a + // less strict encoding?. + + switch &dst in v { + case f16: dst = f16 (f) + case f16le: dst = f16le(f) + case f16be: dst = f16be(f) + case f32: dst = f32 (f) + case f32le: dst = f32le(f) + case f32be: dst = f32be(f) + case f64: dst = f64 (f) + case f64le: dst = f64le(f) + case f64be: dst = f64be(f) + + case complex32: dst = complex(f16(f), 0) + case complex64: dst = complex(f32(f), 0) + case complex128: dst = complex(f64(f), 0) + + case quaternion64: dst = quaternion(f16(f), 0, 0, 0) + case quaternion128: dst = quaternion(f32(f), 0, 0, 0) + case quaternion256: dst = quaternion(f64(f), 0, 0, 0) + + case: return false + } + return true +} + +_assign_bool :: proc(val: any, b: bool) -> bool { + v := reflect.any_core(val) + switch &dst in v { + case bool: dst = bool(b) + case b8: dst = b8 (b) + case b16: dst = b16 (b) + case b32: dst = b32 (b) + case b64: dst = b64 (b) + case: return false + } + return true +} + +// Sanity check that the decoder added a nil byte to the end. +@(private, disabled=ODIN_DISABLE_ASSERT) +assert_safe_for_cstring :: proc(s: string, loc := #caller_location) { + assert(([^]byte)(raw_data(s))[len(s)] == 0, loc = loc) +} -- cgit v1.2.3 From b6c47e796390924faabd236204bc620ea35c1d13 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 16 Dec 2023 21:40:41 +0100 Subject: encoding/base64: add decode_into, add tests --- core/encoding/base64/base64.odin | 133 ++++++++++++++++++++++----------- tests/core/Makefile | 3 + tests/core/build.bat | 2 + tests/core/encoding/base64/base64.odin | 60 +++++++++++++++ 4 files changed, 155 insertions(+), 43 deletions(-) create mode 100644 tests/core/encoding/base64/base64.odin (limited to 'core/encoding') diff --git a/core/encoding/base64/base64.odin b/core/encoding/base64/base64.odin index 793f22c57..535d457d5 100644 --- a/core/encoding/base64/base64.odin +++ b/core/encoding/base64/base64.odin @@ -44,71 +44,80 @@ DEC_TABLE := [128]int { } encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> (encoded: string, err: mem.Allocator_Error) #optional_allocator_error { - out_length := encoded_length(data) + out_length := encoded_len(data) if out_length == 0 { return } - out: strings.Builder - strings.builder_init(&out, 0, out_length, allocator) or_return - + out := strings.builder_make(0, out_length, allocator) or_return ioerr := encode_into(strings.to_stream(&out), data, ENC_TBL) - assert(ioerr == nil) + + assert(ioerr == nil, "string builder should not IO error") + assert(strings.builder_cap(out) == out_length, "buffer resized, `encoded_len` was wrong") return strings.to_string(out), nil } -encoded_length :: #force_inline proc(data: []byte) -> int { +encode_into :: proc(w: io.Writer, data: []byte, ENC_TBL := ENC_TABLE) -> io.Error { length := len(data) if length == 0 { - return 0 + return nil } - return ((4 * length / 3) + 3) &~ 3 + c0, c1, c2, block: int + out: [4]byte + for i := 0; i < length; i += 3 { + #no_bounds_check { + c0, c1, c2 = int(data[i]), -1, -1 + + if i + 1 < length { c1 = int(data[i + 1]) } + if i + 2 < length { c2 = int(data[i + 2]) } + + block = (c0 << 16) | (max(c1, 0) << 8) | max(c2, 0) + + out[0] = ENC_TBL[block >> 18 & 63] + out[1] = ENC_TBL[block >> 12 & 63] + out[2] = c1 == -1 ? PADDING : ENC_TBL[block >> 6 & 63] + out[3] = c2 == -1 ? PADDING : ENC_TBL[block & 63] + } + io.write_full(w, out[:]) or_return + } + return nil } -encode_into :: proc(w: io.Writer, data: []byte, ENC_TBL := ENC_TABLE) -> (err: io.Error) #no_bounds_check { +encoded_len :: proc(data: []byte) -> int { length := len(data) if length == 0 { - return + return 0 } - c0, c1, c2, block: int + return ((4 * length / 3) + 3) &~ 3 +} - for i, d := 0, 0; i < length; i, d = i + 3, d + 4 { - c0, c1, c2 = int(data[i]), -1, -1 +decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocator) -> (decoded: []byte, err: mem.Allocator_Error) #optional_allocator_error { + out_length := decoded_len(data) - if i + 1 < length { c1 = int(data[i + 1]) } - if i + 2 < length { c2 = int(data[i + 2]) } + out := strings.builder_make(0, out_length, allocator) or_return + ioerr := decode_into(strings.to_stream(&out), data, DEC_TBL) - block = (c0 << 16) | (max(c1, 0) << 8) | max(c2, 0) - - out: [4]byte - out[0] = ENC_TBL[block >> 18 & 63] - out[1] = ENC_TBL[block >> 12 & 63] - out[2] = c1 == -1 ? PADDING : ENC_TBL[block >> 6 & 63] - out[3] = c2 == -1 ? PADDING : ENC_TBL[block & 63] + assert(ioerr == nil, "string builder should not IO error") + assert(strings.builder_cap(out) == out_length, "buffer resized, `decoded_len` was wrong") - #bounds_check { io.write_full(w, out[:]) or_return } - } - return + return out.buf[:], nil } -decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocator) -> (out: []byte, err: mem.Allocator_Error) #optional_allocator_error { - #no_bounds_check { - length := len(data) - if length == 0 { - return - } - - pad_count := data[length - 1] == PADDING ? (data[length - 2] == PADDING ? 2 : 1) : 0 - out_length := ((length * 6) >> 3) - pad_count - out = make([]byte, out_length, allocator) or_return - - c0, c1, c2, c3: int - b0, b1, b2: int +decode_into :: proc(w: io.Writer, data: string, DEC_TBL := DEC_TABLE) -> io.Error { + length := decoded_len(data) + if length == 0 { + return nil + } - for i, j := 0, 0; i < length; i, j = i + 4, j + 3 { + c0, c1, c2, c3: int + b0, b1, b2: int + buf: [3]byte + i, j: int + for ; j + 3 <= length; i, j = i + 4, j + 3 { + #no_bounds_check { c0 = DEC_TBL[data[i]] c1 = DEC_TBL[data[i + 1]] c2 = DEC_TBL[data[i + 2]] @@ -118,10 +127,48 @@ decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocato b1 = (c1 << 4) | (c2 >> 2) b2 = (c2 << 6) | c3 - out[j] = byte(b0) - out[j + 1] = byte(b1) - out[j + 2] = byte(b2) + buf[0] = byte(b0) + buf[1] = byte(b1) + buf[2] = byte(b2) } - return + + io.write_full(w, buf[:]) or_return } + + rest := length - j + if rest > 0 { + #no_bounds_check { + c0 = DEC_TBL[data[i]] + c1 = DEC_TBL[data[i + 1]] + c2 = DEC_TBL[data[i + 2]] + + b0 = (c0 << 2) | (c1 >> 4) + b1 = (c1 << 4) | (c2 >> 2) + } + + switch rest { + case 1: io.write_byte(w, byte(b0)) or_return + case 2: io.write_full(w, {byte(b0), byte(b1)}) or_return + } + } + + return nil +} + +decoded_len :: proc(data: string) -> int { + length := len(data) + if length == 0 { + return 0 + } + + padding: int + if data[length - 1] == PADDING { + if length > 1 && data[length - 2] == PADDING { + padding = 2 + } else { + padding = 1 + } + } + + return ((length * 6) >> 3) - padding } diff --git a/tests/core/Makefile b/tests/core/Makefile index 1fca7bf97..3fa38cd34 100644 --- a/tests/core/Makefile +++ b/tests/core/Makefile @@ -51,11 +51,14 @@ noise_test: $(ODIN) run math/noise $(COMMON) -out:test_noise encoding_test: +<<<<<<< HEAD $(ODIN) run encoding/hxa $(COMMON) $(COLLECTION) -out:test_hxa $(ODIN) run encoding/json $(COMMON) -out:test_json $(ODIN) run encoding/varint $(COMMON) -out:test_varint $(ODIN) run encoding/xml $(COMMON) -out:test_xml $(ODIN) run encoding/cbor $(COMMON) -out:test_cbor + $(ODIN) run encoding/hex $(COMMON) -out:test_hex + $(ODIN) run encoding/base64 $(COMMON) -out:test_base64 math_test: $(ODIN) run math $(COMMON) $(COLLECTION) -out:test_core_math diff --git a/tests/core/build.bat b/tests/core/build.bat index 5bf8e1ead..b9fc4e828 100644 --- a/tests/core/build.bat +++ b/tests/core/build.bat @@ -41,6 +41,8 @@ rem %PATH_TO_ODIN% run encoding/hxa %COMMON% %COLLECTION% -out:test_hxa.exe | %PATH_TO_ODIN% run encoding/varint %COMMON% -out:test_varint.exe || exit /b %PATH_TO_ODIN% run encoding/xml %COMMON% -out:test_xml.exe || exit /b %PATH_TO_ODIN% test encoding/cbor %COMMON% -out:test_cbor.exe || exit /b +%PATH_TO_ODIN% run encoding/hex %COMMON% -out:test_hex.exe || exit /b +%PATH_TO_ODIN% run encoding/base64 %COMMON% -out:test_base64.exe || exit /b echo --- echo Running core:math/noise tests diff --git a/tests/core/encoding/base64/base64.odin b/tests/core/encoding/base64/base64.odin new file mode 100644 index 000000000..41dbba683 --- /dev/null +++ b/tests/core/encoding/base64/base64.odin @@ -0,0 +1,60 @@ +package test_encoding_base64 + +import "core:encoding/base64" +import "core:fmt" +import "core:intrinsics" +import "core:os" +import "core:reflect" +import "core:testing" + +TEST_count := 0 +TEST_fail := 0 + +when ODIN_TEST { + expect_value :: testing.expect_value + +} else { + expect_value :: proc(t: ^testing.T, value, expected: $T, loc := #caller_location) -> bool where intrinsics.type_is_comparable(T) { + TEST_count += 1 + ok := value == expected || reflect.is_nil(value) && reflect.is_nil(expected) + if !ok { + TEST_fail += 1 + fmt.printf("[%v] expected %v, got %v\n", loc, expected, value) + } + return ok + } +} + +main :: proc() { + t := testing.T{} + + test_encoding(&t) + test_decoding(&t) + + fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) + if TEST_fail > 0 { + os.exit(1) + } +} + +@(test) +test_encoding :: proc(t: ^testing.T) { + expect_value(t, base64.encode(transmute([]byte)string("")), "") + expect_value(t, base64.encode(transmute([]byte)string("f")), "Zg==") + expect_value(t, base64.encode(transmute([]byte)string("fo")), "Zm8=") + expect_value(t, base64.encode(transmute([]byte)string("foo")), "Zm9v") + expect_value(t, base64.encode(transmute([]byte)string("foob")), "Zm9vYg==") + expect_value(t, base64.encode(transmute([]byte)string("fooba")), "Zm9vYmE=") + expect_value(t, base64.encode(transmute([]byte)string("foobar")), "Zm9vYmFy") +} + +@(test) +test_decoding :: proc(t: ^testing.T) { + expect_value(t, string(base64.decode("")), "") + expect_value(t, string(base64.decode("Zg==")), "f") + expect_value(t, string(base64.decode("Zm8=")), "fo") + expect_value(t, string(base64.decode("Zm9v")), "foo") + expect_value(t, string(base64.decode("Zm9vYg==")), "foob") + expect_value(t, string(base64.decode("Zm9vYmE=")), "fooba") + expect_value(t, string(base64.decode("Zm9vYmFy")), "foobar") +} -- cgit v1.2.3 From 363769d4d3de601a64e7e4bd1e6b0e744c75671c Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 16 Dec 2023 21:42:06 +0100 Subject: encoding/cbor: cleanup base64 tag --- core/encoding/cbor/tags.odin | 112 ++++++++++++++++++++++++------------------- 1 file changed, 62 insertions(+), 50 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index 54bc7dd15..ef3ef45f2 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -213,20 +213,20 @@ tag_big_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_E // is uninitialized (which we checked). is_neg, err := big.is_negative(&vv, mem.panic_allocator()) - assert(err == nil, "only errors if not initialized, which has been checked") + assert(err == nil, "should only error if not initialized, which has been checked") tnr: u8 = TAG_NEGATIVE_BIG_NR if is_neg else TAG_UNSIGNED_BIG_NR _encode_u8(e.writer, tnr, .Tag) or_return size_in_bytes, berr := big.int_to_bytes_size(&vv, false, mem.panic_allocator()) - assert(berr == nil, "only errors if not initialized, which has been checked") + assert(berr == nil, "should only error if not initialized, which has been checked") assert(size_in_bytes >= 0) err_conv(_encode_u64(e, u64(size_in_bytes), .Bytes)) or_return for offset := (size_in_bytes*8)-8; offset >= 0; offset -= 8 { bits, derr := big.int_bitfield_extract(&vv, offset, 8, mem.panic_allocator()) - assert(derr == nil, "only errors if not initialized or invalid argument (offset and count), which won't happen") + assert(derr == nil, "should only error if not initialized or invalid argument (offset and count), which won't happen") io.write_full(e.writer, {u8(bits & 255)}) or_return } @@ -273,63 +273,75 @@ tag_cbor_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_ } } -// NOTE: this could probably be more efficient by decoding bytes from CBOR and then from base64 at the same time. @(private) tag_base64_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { - hdr := _decode_header(r) or_return + hdr := _decode_header(r) or_return major, add := _header_split(hdr) - #partial switch major { - case .Text: - ti := reflect.type_info_base(type_info_of(v.id)) - _unmarshal_bytes(r, v, ti, hdr, add) or_return - #partial switch t in ti.variant { - case runtime.Type_Info_String: - switch t.is_cstring { - case true: - str := string((^cstring)(v.data)^) - decoded := base64.decode(str) or_return - (^cstring)(v.data)^ = strings.clone_to_cstring(string(decoded)) or_return - delete(decoded) - delete(str) - case false: - str := (^string)(v.data)^ - decoded := base64.decode(str) or_return - (^string)(v.data)^ = string(decoded) - delete(str) - } - return + ti := reflect.type_info_base(type_info_of(v.id)) - case runtime.Type_Info_Array: - raw := ([^]byte)(v.data) - decoded := base64.decode(string(raw[:t.count])) or_return - copy(raw[:t.count], decoded) - delete(decoded) - return + if major != .Text && major != .Bytes { + return .Bad_Tag_Value + } - case runtime.Type_Info_Slice: - raw := (^[]byte)(v.data) - decoded := base64.decode(string(raw^)) or_return - delete(raw^) - raw^ = decoded - return + bytes: string; { + context.allocator = context.temp_allocator + bytes = string(err_conv(_decode_bytes(r, add)) or_return) + } + defer delete(bytes, context.temp_allocator) - case runtime.Type_Info_Dynamic_Array: - raw := (^mem.Raw_Dynamic_Array)(v.data) - str := string(((^[dynamic]byte)(v.data)^)[:]) + #partial switch t in ti.variant { + case reflect.Type_Info_String: + + if t.is_cstring { + length := base64.decoded_len(bytes) + builder := strings.builder_make(0, length+1) + base64.decode_into(strings.to_stream(&builder), bytes) or_return + + raw := (^cstring)(v.data) + raw^ = cstring(raw_data(builder.buf)) + } else { + raw := (^string)(v.data) + raw^ = string(base64.decode(bytes) or_return) + } - decoded := base64.decode(str) or_return - delete(str) + return - raw.data = raw_data(decoded) - raw.len = len(decoded) - raw.cap = len(decoded) - return + case reflect.Type_Info_Slice: + elem_base := reflect.type_info_base(t.elem) - case: unreachable() - } + if elem_base.id != byte { return _unsupported(v, hdr) } - case: return .Bad_Tag_Value + raw := (^[]byte)(v.data) + raw^ = base64.decode(bytes) or_return + return + + case reflect.Type_Info_Dynamic_Array: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + decoded := base64.decode(bytes) or_return + + raw := (^mem.Raw_Dynamic_Array)(v.data) + raw.data = raw_data(decoded) + raw.len = len(decoded) + raw.cap = len(decoded) + raw.allocator = context.allocator + return + + case reflect.Type_Info_Array: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + if base64.decoded_len(bytes) > t.count { return _unsupported(v, hdr) } + + slice := ([^]byte)(v.data)[:len(bytes)] + copy(slice, base64.decode(bytes) or_return) + return } + + return _unsupported(v, hdr) } @(private) @@ -355,7 +367,7 @@ tag_base64_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marsha } } - out_len := base64.encoded_length(bytes) + out_len := base64.encoded_len(bytes) err_conv(_encode_u64(e, u64(out_len), .Text)) or_return return base64.encode_into(e.writer, bytes) } -- cgit v1.2.3 From d77ae9ababb539e7b48258c94c3b55fc46e62919 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 16 Dec 2023 21:42:33 +0100 Subject: encoding/cbor: fully support marshal/unmarshal of unions --- core/encoding/cbor/marshal.odin | 26 ++- core/encoding/cbor/tags.odin | 9 + core/encoding/cbor/unmarshal.odin | 76 +++++++- tests/core/encoding/cbor/test_core_cbor.odin | 260 ++++++++++++++++++++++----- 4 files changed, 325 insertions(+), 46 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index aab2defb2..a5d5efb3e 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -506,8 +506,32 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { if v.data == nil || tag <= 0 { return _encode_nil(e.writer) } + id := info.variants[tag-1].id - return marshal_into(e, any{v.data, id}) + if len(info.variants) == 1 { + id := info.variants[tag-1].id + return marshal_into(e, any{v.data, id}) + } + + // Encode a non-nil multi-variant union as the `TAG_OBJECT_TYPE`. + // Which is a tag of an array, where the first element is the textual id/type of the object + // that follows it. + + err_conv(_encode_u16(e, TAG_OBJECT_TYPE, .Tag)) or_return + _encode_u8(e.writer, 2, .Array) or_return + + vti := reflect.union_variant_type_info(v) + #partial switch vt in vti.variant { + case reflect.Type_Info_Named: + err_conv(_encode_text(e, vt.name)) or_return + case: + builder := strings.builder_make(context.temp_allocator) or_return + defer strings.builder_destroy(&builder) + reflect.write_type(&builder, vti) + err_conv(_encode_text(e, strings.to_string(builder))) or_return + } + + return marshal_into(e, any{v.data, vti.id}) case runtime.Type_Info_Enum: return marshal_into(e, any{v.data, info.base.id}) diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index ef3ef45f2..509896d22 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -38,6 +38,15 @@ TAG_BASE64_ID :: "base64" // given content is definitely CBOR. TAG_SELF_DESCRIBED_CBOR :: 55799 +// A tag that is used to assign a textual type to the object following it. +// The tag's value must be an array of 2 items, where the first is text (describing the following type) +// and the second is any valid CBOR value. +// +// See the registration: https://datatracker.ietf.org/doc/draft-rundgren-cotx/05/ +// +// We use this in Odin to marshal and unmarshal unions. +TAG_OBJECT_TYPE :: 1010 + // A tag implementation that handles marshals and unmarshals for the tag it is registered on. Tag_Implementation :: struct { data: rawptr, diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 0da8e3f2a..c3ab6f908 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -8,9 +8,6 @@ import "core:runtime" import "core:strings" import "core:unicode/utf8" -// `strings` is only used in poly procs, but -vet thinks it is fully unused. -_ :: strings - /* Unmarshals the given CBOR into the given pointer using reflection. Types that require allocation are allocated using the given allocator. @@ -79,7 +76,7 @@ _unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_E dst = err_conv(decode(r, hdr)) or_return return } - + switch hdr { case .U8: decoded := _decode_u8(r) or_return @@ -275,10 +272,12 @@ _unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_E } nr := err_conv(_decode_tag_nr(r, add)) or_return - + // Custom tag implementations. if impl, ok := _tag_implementations_nr[nr]; ok { return impl->unmarshal(r, nr, v) + } else if nr == TAG_OBJECT_TYPE { + return _unmarshal_union(r, v, ti, hdr) } else { // Discard the tag info and unmarshal as its value. return _unmarshal_value(r, v, _decode_header(r) or_return) @@ -717,6 +716,73 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header } } +// Unmarshal into a union, based on the `TAG_OBJECT_TYPE` tag of the spec, it denotes a tag which +// contains an array of exactly two elements, the first is a textual representation of the following +// CBOR value's type. +_unmarshal_union :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header) -> (err: Unmarshal_Error) { + #partial switch t in ti.variant { + case reflect.Type_Info_Union: + idhdr: Header + target_name: string + { + vhdr := _decode_header(r) or_return + vmaj, vadd := _header_split(vhdr) + if vmaj != .Array { + return .Bad_Tag_Value + } + + n_items, unknown := err_conv(_decode_container_length(r, vadd)) or_return + if unknown || n_items != 2 { + return .Bad_Tag_Value + } + + idhdr = _decode_header(r) or_return + idmaj, idadd := _header_split(idhdr) + if idmaj != .Text { + return .Bad_Tag_Value + } + + context.allocator = context.temp_allocator + target_name = err_conv(_decode_text(r, idadd)) or_return + } + defer delete(target_name, context.temp_allocator) + + for variant, i in t.variants { + tag := i64(i) + if !t.no_nil { + tag += 1 + } + + #partial switch vti in variant.variant { + case reflect.Type_Info_Named: + if vti.name == target_name { + reflect.set_union_variant_raw_tag(v, tag) + return _unmarshal_value(r, any{v.data, variant.id}, _decode_header(r) or_return) + } + + case: + builder := strings.builder_make(context.temp_allocator) + defer strings.builder_destroy(&builder) + + reflect.write_type(&builder, variant) + variant_name := strings.to_string(builder) + + if variant_name == target_name { + reflect.set_union_variant_raw_tag(v, tag) + return _unmarshal_value(r, any{v.data, variant.id}, _decode_header(r) or_return) + } + } + } + + // No variant matched. + return _unsupported(v, idhdr) + + case: + // Not a union. + return _unsupported(v, hdr) + } +} + _assign_int :: proc(val: any, i: $T) -> bool { v := reflect.any_core(val) diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 22359d830..06b96c915 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -6,10 +6,96 @@ import "core:fmt" import "core:intrinsics" import "core:math/big" import "core:mem" +import "core:os" import "core:reflect" import "core:testing" import "core:time" +TEST_count := 0 +TEST_fail := 0 + +when ODIN_TEST { + expect :: testing.expect + expect_value :: testing.expect_value + errorf :: testing.errorf + log :: testing.log + +} else { + expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) { + TEST_count += 1 + if !condition { + TEST_fail += 1 + fmt.printf("[%v] %v\n", loc, message) + return + } + } + + expect_value :: proc(t: ^testing.T, value, expected: $T, loc := #caller_location) -> bool where intrinsics.type_is_comparable(T) { + TEST_count += 1 + ok := value == expected || reflect.is_nil(value) && reflect.is_nil(expected) + if !ok { + TEST_fail += 1 + fmt.printf("[%v] expected %v, got %v\n", loc, expected, value) + } + return ok + } + + errorf :: proc(t: ^testing.T, fmts: string, args: ..any, loc := #caller_location) { + TEST_fail += 1 + fmt.printf("[%v] ERROR: ", loc) + fmt.printf(fmts, ..args) + fmt.println() + } + + log :: proc(t: ^testing.T, v: any, loc := #caller_location) { + fmt.printf("[%v] ", loc) + fmt.printf("log: %v\n", v) + } +} + +main :: proc() { + t := testing.T{} + + test_marshalling(&t) + + test_marshalling_maybe(&t) + test_marshalling_nil_maybe(&t) + + test_cbor_marshalling_union(&t) + + test_decode_unsigned(&t) + test_encode_unsigned(&t) + + test_decode_negative(&t) + test_encode_negative(&t) + + test_decode_simples(&t) + test_encode_simples(&t) + + test_decode_floats(&t) + test_encode_floats(&t) + + test_decode_bytes(&t) + test_encode_bytes(&t) + + test_decode_strings(&t) + test_encode_strings(&t) + + test_decode_lists(&t) + test_encode_lists(&t) + + test_decode_maps(&t) + test_encode_maps(&t) + + test_decode_tags(&t) + test_encode_tags(&t) + + fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) + if TEST_fail > 0 { + os.exit(1) + } +} + Foo :: struct { str: string, cstr: cstring, @@ -58,7 +144,7 @@ test_marshalling :: proc(t: ^testing.T) { context.temp_allocator = context.allocator defer mem.tracking_allocator_destroy(&tracker) - ev :: testing.expect_value + ev :: expect_value { nice := "16 is a nice number" @@ -228,7 +314,7 @@ test_marshalling :: proc(t: ^testing.T) { } } - case: testing.error(t, v) + case: errorf(t, "wrong type %v", v) } ev(t, backf.neg, f.neg) @@ -258,22 +344,116 @@ test_marshalling :: proc(t: ^testing.T) { s_equals, s_err := big.equals(&backf.smallest, &f.smallest) ev(t, s_err, nil) if !s_equals { - testing.errorf(t, "smallest: %v does not equal %v", big.itoa(&backf.smallest), big.itoa(&f.smallest)) + errorf(t, "smallest: %v does not equal %v", big.itoa(&backf.smallest), big.itoa(&f.smallest)) } b_equals, b_err := big.equals(&backf.biggest, &f.biggest) ev(t, b_err, nil) if !b_equals { - testing.errorf(t, "biggest: %v does not equal %v", big.itoa(&backf.biggest), big.itoa(&f.biggest)) + errorf(t, "biggest: %v does not equal %v", big.itoa(&backf.biggest), big.itoa(&f.biggest)) } } for _, leak in tracker.allocation_map { - testing.errorf(t, "%v leaked %m\n", leak.location, leak.size) + errorf(t, "%v leaked %m\n", leak.location, leak.size) } for bad_free in tracker.bad_free_array { - testing.errorf(t, "%v allocation %p was freed badly\n", bad_free.location, bad_free.memory) + errorf(t, "%v allocation %p was freed badly\n", bad_free.location, bad_free.memory) + } +} + +@(test) +test_marshalling_maybe :: proc(t: ^testing.T) { + maybe_test: Maybe(int) = 1 + data, err := cbor.marshal(maybe_test) + expect_value(t, err, nil) + + val, derr := cbor.decode(string(data)) + expect_value(t, derr, nil) + + expect_value(t, cbor.diagnose(val), "1") + + maybe_dest: Maybe(int) + uerr := cbor.unmarshal(string(data), &maybe_dest) + expect_value(t, uerr, nil) + expect_value(t, maybe_dest, 1) +} + +@(test) +test_marshalling_nil_maybe :: proc(t: ^testing.T) { + maybe_test: Maybe(int) + data, err := cbor.marshal(maybe_test) + expect_value(t, err, nil) + + val, derr := cbor.decode(string(data)) + expect_value(t, derr, nil) + + expect_value(t, cbor.diagnose(val), "nil") + + maybe_dest: Maybe(int) + uerr := cbor.unmarshal(string(data), &maybe_dest) + expect_value(t, uerr, nil) + expect_value(t, maybe_dest, nil) +} + +@(test) +test_cbor_marshalling_union :: proc(t: ^testing.T) { + My_Distinct :: distinct string + + My_Enum :: enum { + One, + Two, + } + + My_Struct :: struct { + my_enum: My_Enum, + } + + My_Union :: union { + string, + My_Distinct, + My_Struct, + int, + } + + { + test: My_Union = My_Distinct("Hello, World!") + data, err := cbor.marshal(test) + expect_value(t, err, nil) + + val, derr := cbor.decode(string(data)) + expect_value(t, derr, nil) + + expect_value(t, cbor.diagnose(val, -1), `1010(["My_Distinct", "Hello, World!"])`) + + dest: My_Union + uerr := cbor.unmarshal(string(data), &dest) + expect_value(t, uerr, nil) + expect_value(t, dest, My_Distinct("Hello, World!")) + } + + My_Union_No_Nil :: union #no_nil { + string, + My_Distinct, + My_Struct, + int, + } + + { + test: My_Union_No_Nil = My_Struct{.Two} + data, err := cbor.marshal(test) + expect_value(t, err, nil) + + val, derr := cbor.decode(string(data)) + expect_value(t, derr, nil) + + expect_value(t, cbor.diagnose(val, -1), `1010(["My_Struct", {"my_enum": 1}])`) + + dest: My_Union_No_Nil + uerr := cbor.unmarshal(string(data), &dest) + expect_value(t, uerr, nil) + expect_value(t, dest, My_Struct{.Two}) } } @@ -500,34 +680,34 @@ test_encode_lists :: proc(t: ^testing.T) { err: cbor.Encode_Error err = cbor.encode_stream_begin(stream, .Array) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) { err = cbor.encode_stream_array_item(encoder, u8(1)) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) err = cbor.encode_stream_array_item(encoder, &cbor.Array{u8(2), u8(3)}) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) err = cbor.encode_stream_begin(stream, .Array) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) { err = cbor.encode_stream_array_item(encoder, u8(4)) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) err = cbor.encode_stream_array_item(encoder, u8(5)) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) } err = cbor.encode_stream_end(stream) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) } err = cbor.encode_stream_end(stream) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) - testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)string("\x9f\x01\x82\x02\x03\x9f\x04\x05\xff\xff"))) + expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)string("\x9f\x01\x82\x02\x03\x9f\x04\x05\xff\xff"))) } { @@ -535,26 +715,26 @@ test_encode_lists :: proc(t: ^testing.T) { err: cbor.Encode_Error err = cbor._encode_u8(stream, 2, .Array) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) a := "a" err = cbor.encode(encoder, &a) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) { err = cbor.encode_stream_begin(stream, .Map) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) b := "b" c := "c" err = cbor.encode_stream_map_entry(encoder, &b, &c) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) err = cbor.encode_stream_end(stream) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) } - testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)string("\x82\x61\x61\xbf\x61\x62\x61\x63\xff"))) + expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)string("\x82\x61\x61\xbf\x61\x62\x61\x63\xff"))) } } @@ -619,13 +799,13 @@ expect_decoding :: proc(t: ^testing.T, encoded: string, decoded: string, type: t res, err := cbor.decode(stream) defer cbor.destroy(res) - testing.expect_value(t, reflect.union_variant_typeid(res), type, loc) - testing.expect_value(t, err, nil, loc) + expect_value(t, reflect.union_variant_typeid(res), type, loc) + expect_value(t, err, nil, loc) str := cbor.diagnose(res, padding=-1) defer delete(str) - testing.expect_value(t, str, decoded, loc) + expect_value(t, str, decoded, loc) } expect_tag :: proc(t: ^testing.T, encoded: string, nr: cbor.Tag_Number, value_decoded: string, loc := #caller_location) { @@ -635,17 +815,17 @@ expect_tag :: proc(t: ^testing.T, encoded: string, nr: cbor.Tag_Number, value_de res, err := cbor.decode(stream) defer cbor.destroy(res) - testing.expect_value(t, err, nil, loc) + expect_value(t, err, nil, loc) if tag, is_tag := res.(^cbor.Tag); is_tag { - testing.expect_value(t, tag.number, nr, loc) + expect_value(t, tag.number, nr, loc) str := cbor.diagnose(tag, padding=-1) defer delete(str) - testing.expect_value(t, str, value_decoded, loc) + expect_value(t, str, value_decoded, loc) } else { - testing.errorf(t, "Value %#v is not a tag", res, loc) + errorf(t, "Value %#v is not a tag", res, loc) } } @@ -656,16 +836,16 @@ expect_float :: proc(t: ^testing.T, encoded: string, expected: $T, loc := #calle res, err := cbor.decode(stream) defer cbor.destroy(res) - testing.expect_value(t, reflect.union_variant_typeid(res), typeid_of(T), loc) - testing.expect_value(t, err, nil, loc) + expect_value(t, reflect.union_variant_typeid(res), typeid_of(T), loc) + expect_value(t, err, nil, loc) #partial switch r in res { case f16: - when T == f16 { testing.expect_value(t, res, expected, loc) } else { unreachable() } + when T == f16 { expect_value(t, res, expected, loc) } else { unreachable() } case f32: - when T == f32 { testing.expect_value(t, res, expected, loc) } else { unreachable() } + when T == f32 { expect_value(t, res, expected, loc) } else { unreachable() } case f64: - when T == f64 { testing.expect_value(t, res, expected, loc) } else { unreachable() } + when T == f64 { expect_value(t, res, expected, loc) } else { unreachable() } case: unreachable() } @@ -675,8 +855,8 @@ expect_encoding :: proc(t: ^testing.T, val: cbor.Value, encoded: string, loc := bytes.buffer_reset(&buf) err := cbor.encode(encoder, val) - testing.expect_value(t, err, nil, loc) - testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)encoded), loc) + expect_value(t, err, nil, loc) + expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)encoded), loc) } expect_streamed_encoding :: proc(t: ^testing.T, encoded: string, values: ..cbor.Value, loc := #caller_location) { @@ -705,15 +885,15 @@ expect_streamed_encoding :: proc(t: ^testing.T, encoded: string, values: ..cbor. if err2 != nil { break } } case: - testing.errorf(t, "%v does not support streamed encoding", reflect.union_variant_typeid(value)) + errorf(t, "%v does not support streamed encoding", reflect.union_variant_typeid(value)) } - testing.expect_value(t, err, nil, loc) - testing.expect_value(t, err2, nil, loc) + expect_value(t, err, nil, loc) + expect_value(t, err2, nil, loc) } err := cbor.encode_stream_end(stream) - testing.expect_value(t, err, nil, loc) + expect_value(t, err, nil, loc) - testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)encoded), loc) + expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)encoded), loc) } -- cgit v1.2.3 From 21e6e28a3a5609bc4db19dd2b1bc00ff7b1ac5e5 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 16 Dec 2023 23:02:30 +0100 Subject: encoding/cbor: add decoder flags and protect from malicious untrusted input --- core/encoding/cbor/cbor.odin | 8 +- core/encoding/cbor/coding.odin | 275 ++++++++++++++++----------- core/encoding/cbor/tags.odin | 32 ++-- core/encoding/cbor/unmarshal.odin | 246 ++++++++++++++---------- tests/core/encoding/cbor/test_core_cbor.odin | 17 +- 5 files changed, 349 insertions(+), 229 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index e91c53f3c..9c4bb0e4e 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -10,8 +10,13 @@ import "core:strings" // If we are decoding a stream of either a map or list, the initial capacity will be this value. INITIAL_STREAMED_CONTAINER_CAPACITY :: 8 + // If we are decoding a stream of either text or bytes, the initial capacity will be this value. -INITIAL_STREAMED_BYTES_CAPACITY :: 16 +INITIAL_STREAMED_BYTES_CAPACITY :: 16 + +// The default maximum amount of bytes to allocate on a buffer/container at once to prevent +// malicious input from causing massive allocations. +DEFAULT_MAX_PRE_ALLOC :: mem.Kilobyte // Known/common headers are defined, undefined headers can still be valid. // Higher 3 bits is for the major type and lower 5 bits for the additional information. @@ -157,6 +162,7 @@ Decode_Data_Error :: enum { Nested_Indefinite_Length, // When an streamed/indefinite length container nests another, this is not allowed. Nested_Tag, // When a tag's value is another tag, this is not allowed. Length_Too_Big, // When the length of a container (map, array, bytes, string) is more than `max(int)`. + Disallowed_Streaming, // When the `.Disallow_Streaming` flag is set and a streaming header is encountered. Break, } diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 5c14d8f87..e39519e01 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -33,16 +33,40 @@ Encoder_Flags :: bit_set[Encoder_Flag] // Flags for fully deterministic output (if you are not using streaming/indeterminate length). ENCODE_FULLY_DETERMINISTIC :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size, .Deterministic_Map_Sorting} + // Flags for the smallest encoding output. -ENCODE_SMALL :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size} -// Flags for the fastest encoding output. -ENCODE_FAST :: Encoder_Flags{} +ENCODE_SMALL :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size} Encoder :: struct { flags: Encoder_Flags, writer: io.Writer, } +Decoder_Flag :: enum { + // Rejects (with an error `.Disallowed_Streaming`) when a streaming CBOR header is encountered. + Disallow_Streaming, + + // Pre-allocates buffers and containers with the size that was set in the CBOR header. + // This should only be enabled when you control both ends of the encoding, if you don't, + // attackers can craft input that causes massive (`max(u64)`) byte allocations for a few bytes of + // CBOR. + Trusted_Input, + + // Makes the decoder shrink of excess capacity from allocated buffers/containers before returning. + Shrink_Excess, +} + +Decoder_Flags :: bit_set[Decoder_Flag] + +Decoder :: struct { + // The max amount of bytes allowed to pre-allocate when `.Trusted_Input` is not set on the + // flags. + max_pre_alloc: int, + + flags: Decoder_Flags, + reader: io.Reader, +} + /* Decodes both deterministic and non-deterministic CBOR into a `Value` variant. @@ -52,28 +76,60 @@ Allocations are done using the given allocator, *no* allocations are done on the `context.temp_allocator`. A value can be (fully and recursively) deallocated using the `destroy` proc in this package. + +Disable streaming/indeterminate lengths with the `.Disallow_Streaming` flag. + +Shrink excess bytes in buffers and containers with the `.Shrink_Excess` flag. + +Mark the input as trusted input with the `.Trusted_Input` flag, this turns off the safety feature +of not pre-allocating more than `max_pre_alloc` bytes before reading into the bytes. You should only +do this when you own both sides of the encoding and are sure there can't be malicious bytes used as +an input. */ -decode :: proc { - decode_string, - decode_reader, +decode_from :: proc { + decode_from_string, + decode_from_reader, + decode_from_decoder, } +decode :: decode_from // Decodes the given string as CBOR. // See docs on the proc group `decode` for more information. -decode_string :: proc(s: string, allocator := context.allocator) -> (v: Value, err: Decode_Error) { +decode_from_string :: proc(s: string, flags: Decoder_Flags = {}, allocator := context.allocator) -> (v: Value, err: Decode_Error) { context.allocator = allocator - r: strings.Reader strings.reader_init(&r, s) - return decode(strings.reader_to_stream(&r), allocator=allocator) + return decode_from_reader(strings.reader_to_stream(&r), flags) } // Reads a CBOR value from the given reader. // See docs on the proc group `decode` for more information. -decode_reader :: proc(r: io.Reader, hdr: Header = Header(0), allocator := context.allocator) -> (v: Value, err: Decode_Error) { +decode_from_reader :: proc(r: io.Reader, flags: Decoder_Flags = {}, allocator := context.allocator) -> (v: Value, err: Decode_Error) { + return decode_from_decoder( + Decoder{ DEFAULT_MAX_PRE_ALLOC, flags, r }, + allocator=allocator, + ) +} + +// Reads a CBOR value from the given decoder. +// See docs on the proc group `decode` for more information. +decode_from_decoder :: proc(d: Decoder, allocator := context.allocator) -> (v: Value, err: Decode_Error) { context.allocator = allocator + d := d + if d.max_pre_alloc <= 0 { + d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC + } + + v, err = _decode_from_decoder(d) + // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. + if err == .EOF { err = .Unexpected_EOF } + return +} + +_decode_from_decoder :: proc(d: Decoder, hdr: Header = Header(0)) -> (v: Value, err: Decode_Error) { hdr := hdr + r := d.reader if hdr == Header(0) { hdr = _decode_header(r) or_return } switch hdr { case .U8: return _decode_u8 (r) @@ -105,11 +161,11 @@ decode_reader :: proc(r: io.Reader, hdr: Header = Header(0), allocator := contex switch maj { case .Unsigned: return _decode_tiny_u8(add) case .Negative: return Negative_U8(_decode_tiny_u8(add) or_return), nil - case .Bytes: return _decode_bytes_ptr(r, add) - case .Text: return _decode_text_ptr(r, add) - case .Array: return _decode_array_ptr(r, add) - case .Map: return _decode_map_ptr(r, add) - case .Tag: return _decode_tag_ptr(r, add) + case .Bytes: return _decode_bytes_ptr(d, add) + case .Text: return _decode_text_ptr(d, add) + case .Array: return _decode_array_ptr(d, add) + case .Map: return _decode_map_ptr(d, add) + case .Tag: return _decode_tag_ptr(d, add) case .Other: return _decode_tiny_simple(add) case: return nil, .Bad_Major } @@ -246,7 +302,7 @@ _encode_u8 :: proc(w: io.Writer, v: u8, major: Major = .Unsigned) -> (err: io.Er } _decode_tiny_u8 :: proc(additional: Add) -> (u8, Decode_Data_Error) { - if intrinsics.expect(additional < .One_Byte, true) { + if additional < .One_Byte { return u8(additional), nil } @@ -316,64 +372,53 @@ _encode_u64_exact :: proc(w: io.Writer, v: u64, major: Major = .Unsigned) -> (er return } -_decode_bytes_ptr :: proc(r: io.Reader, add: Add, type: Major = .Bytes) -> (v: ^Bytes, err: Decode_Error) { +_decode_bytes_ptr :: proc(d: Decoder, add: Add, type: Major = .Bytes) -> (v: ^Bytes, err: Decode_Error) { v = new(Bytes) or_return defer if err != nil { free(v) } - v^ = _decode_bytes(r, add, type) or_return + v^ = _decode_bytes(d, add, type) or_return return } -_decode_bytes :: proc(r: io.Reader, add: Add, type: Major = .Bytes) -> (v: Bytes, err: Decode_Error) { - _n_items, length_is_unknown := _decode_container_length(r, add) or_return - - n_items := _n_items.? or_else INITIAL_STREAMED_BYTES_CAPACITY - - if length_is_unknown { - buf: strings.Builder - buf.buf = make([dynamic]byte, 0, n_items) or_return - defer if err != nil { strings.builder_destroy(&buf) } - - buf_stream := strings.to_stream(&buf) +_decode_bytes :: proc(d: Decoder, add: Add, type: Major = .Bytes) -> (v: Bytes, err: Decode_Error) { + n, scap := _decode_len_str(d, add) or_return + + buf := strings.builder_make(0, scap) or_return + defer if err != nil { strings.builder_destroy(&buf) } + buf_stream := strings.to_stream(&buf) - for { - header := _decode_header(r) or_return + if n == -1 { + indefinite_loop: for { + header := _decode_header(d.reader) or_return maj, add := _header_split(header) - #partial switch maj { case type: - _n_items, length_is_unknown := _decode_container_length(r, add) or_return - if length_is_unknown { + iter_n, iter_cap := _decode_len_str(d, add) or_return + if iter_n == -1 { return nil, .Nested_Indefinite_Length } - n_items := i64(_n_items.?) + reserve(&buf.buf, len(buf.buf) + iter_cap) or_return + io.copy_n(buf_stream, d.reader, i64(iter_n)) or_return - copied := io.copy_n(buf_stream, r, n_items) or_return - assert(copied == n_items) - case .Other: if add != .Break { return nil, .Bad_Argument } - - v = buf.buf[:] - - // Write zero byte so this can be converted to cstring. - io.write_full(buf_stream, {0}) or_return - shrink(&buf.buf) // Ignoring error, this is not critical to succeed. - return + break indefinite_loop case: return nil, .Bad_Major } } } else { - v = make([]byte, n_items + 1) or_return // Space for the bytes and a zero byte. - defer if err != nil { delete(v) } + io.copy_n(buf_stream, d.reader, i64(n)) or_return + } - io.read_full(r, v[:n_items]) or_return + v = buf.buf[:] - v = v[:n_items] // Take off zero byte. - return - } + // Write zero byte so this can be converted to cstring. + strings.write_byte(&buf, 0) + + if .Shrink_Excess in d.flags { shrink(&buf.buf) } + return } _encode_bytes :: proc(e: Encoder, val: Bytes, major: Major = .Bytes) -> (err: Encode_Error) { @@ -383,43 +428,41 @@ _encode_bytes :: proc(e: Encoder, val: Bytes, major: Major = .Bytes) -> (err: En return } -_decode_text_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Text, err: Decode_Error) { +_decode_text_ptr :: proc(d: Decoder, add: Add) -> (v: ^Text, err: Decode_Error) { v = new(Text) or_return defer if err != nil { free(v) } - v^ = _decode_text(r, add) or_return + v^ = _decode_text(d, add) or_return return } -_decode_text :: proc(r: io.Reader, add: Add) -> (v: Text, err: Decode_Error) { - return (Text)(_decode_bytes(r, add, .Text) or_return), nil +_decode_text :: proc(d: Decoder, add: Add) -> (v: Text, err: Decode_Error) { + return (Text)(_decode_bytes(d, add, .Text) or_return), nil } _encode_text :: proc(e: Encoder, val: Text) -> Encode_Error { return _encode_bytes(e, transmute([]byte)val, .Text) } -_decode_array_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Array, err: Decode_Error) { +_decode_array_ptr :: proc(d: Decoder, add: Add) -> (v: ^Array, err: Decode_Error) { v = new(Array) or_return defer if err != nil { free(v) } - v^ = _decode_array(r, add) or_return + v^ = _decode_array(d, add) or_return return } -_decode_array :: proc(r: io.Reader, add: Add) -> (v: Array, err: Decode_Error) { - _n_items, length_is_unknown := _decode_container_length(r, add) or_return - n_items := _n_items.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY - - array := make([dynamic]Value, 0, n_items) or_return +_decode_array :: proc(d: Decoder, add: Add) -> (v: Array, err: Decode_Error) { + n, scap := _decode_len_container(d, add) or_return + array := make([dynamic]Value, 0, scap) or_return defer if err != nil { for entry in array { destroy(entry) } delete(array) } - for i := 0; length_is_unknown || i < n_items; i += 1 { - val, verr := decode(r) - if length_is_unknown && verr == .Break { + for i := 0; n == -1 || i < n; i += 1 { + val, verr := _decode_from_decoder(d) + if n == -1 && verr == .Break { break } else if verr != nil { err = verr @@ -428,8 +471,9 @@ _decode_array :: proc(r: io.Reader, add: Add) -> (v: Array, err: Decode_Error) { append(&array, val) or_return } + + if .Shrink_Excess in d.flags { shrink(&array) } - shrink(&array) v = array[:] return } @@ -443,19 +487,17 @@ _encode_array :: proc(e: Encoder, arr: Array) -> Encode_Error { return nil } -_decode_map_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Map, err: Decode_Error) { +_decode_map_ptr :: proc(d: Decoder, add: Add) -> (v: ^Map, err: Decode_Error) { v = new(Map) or_return defer if err != nil { free(v) } - v^ = _decode_map(r, add) or_return + v^ = _decode_map(d, add) or_return return } -_decode_map :: proc(r: io.Reader, add: Add) -> (v: Map, err: Decode_Error) { - _n_items, length_is_unknown := _decode_container_length(r, add) or_return - n_items := _n_items.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY - - items := make([dynamic]Map_Entry, 0, n_items) or_return +_decode_map :: proc(d: Decoder, add: Add) -> (v: Map, err: Decode_Error) { + n, scap := _decode_len_container(d, add) or_return + items := make([dynamic]Map_Entry, 0, scap) or_return defer if err != nil { for entry in items { destroy(entry.key) @@ -464,23 +506,24 @@ _decode_map :: proc(r: io.Reader, add: Add) -> (v: Map, err: Decode_Error) { delete(items) } - for i := 0; length_is_unknown || i < n_items; i += 1 { - key, kerr := decode(r) - if length_is_unknown && kerr == .Break { + for i := 0; n == -1 || i < n; i += 1 { + key, kerr := _decode_from_decoder(d) + if n == -1 && kerr == .Break { break } else if kerr != nil { return nil, kerr } - value := decode(r) or_return + value := decode_from_decoder(d) or_return append(&items, Map_Entry{ key = key, value = value, }) or_return } + + if .Shrink_Excess in d.flags { shrink(&items) } - shrink(&items) v = items[:] return } @@ -537,8 +580,8 @@ _encode_map :: proc(e: Encoder, m: Map) -> (err: Encode_Error) { return nil } -_decode_tag_ptr :: proc(r: io.Reader, add: Add) -> (v: Value, err: Decode_Error) { - tag := _decode_tag(r, add) or_return +_decode_tag_ptr :: proc(d: Decoder, add: Add) -> (v: Value, err: Decode_Error) { + tag := _decode_tag(d, add) or_return if t, ok := tag.?; ok { defer if err != nil { destroy(t.value) } tp := new(Tag) or_return @@ -547,11 +590,11 @@ _decode_tag_ptr :: proc(r: io.Reader, add: Add) -> (v: Value, err: Decode_Error) } // no error, no tag, this was the self described CBOR tag, skip it. - return decode(r) + return _decode_from_decoder(d) } -_decode_tag :: proc(r: io.Reader, add: Add) -> (v: Maybe(Tag), err: Decode_Error) { - num := _decode_tag_nr(r, add) or_return +_decode_tag :: proc(d: Decoder, add: Add) -> (v: Maybe(Tag), err: Decode_Error) { + num := _decode_uint_as_u64(d.reader, add) or_return // CBOR can be wrapped in a tag that decoders can use to see/check if the binary data is CBOR. // We can ignore it here. @@ -561,7 +604,7 @@ _decode_tag :: proc(r: io.Reader, add: Add) -> (v: Maybe(Tag), err: Decode_Error t := Tag{ number = num, - value = decode(r) or_return, + value = _decode_from_decoder(d) or_return, } if nested, ok := t.value.(^Tag); ok { @@ -572,7 +615,7 @@ _decode_tag :: proc(r: io.Reader, add: Add) -> (v: Maybe(Tag), err: Decode_Error return t, nil } -_decode_tag_nr :: proc(r: io.Reader, add: Add) -> (nr: Tag_Number, err: Decode_Error) { +_decode_uint_as_u64 :: proc(r: io.Reader, add: Add) -> (nr: u64, err: Decode_Error) { #partial switch add { case .One_Byte: return u64(_decode_u8(r) or_return), nil case .Two_Bytes: return u64(_decode_u16(r) or_return), nil @@ -719,30 +762,50 @@ encode_stream_map_entry :: proc(e: Encoder, key: Value, val: Value) -> Encode_Er return encode(e, val) } -// - -_decode_container_length :: proc(r: io.Reader, add: Add) -> (length: Maybe(int), is_unknown: bool, err: Decode_Error) { - if add == Add.Length_Unknown { return nil, true, nil } - #partial switch add { - case .One_Byte: length = int(_decode_u8(r) or_return) - case .Two_Bytes: length = int(_decode_u16(r) or_return) - case .Four_Bytes: - big_length := _decode_u32(r) or_return - if u64(big_length) > u64(max(int)) { - err = .Length_Too_Big - return +// For `Bytes` and `Text` strings: Decodes the number of items the header says follows. +// If the number is not specified -1 is returned and streaming should be initiated. +// A suitable starting capacity is also returned for a buffer that is allocated up the stack. +_decode_len_str :: proc(d: Decoder, add: Add) -> (n: int, scap: int, err: Decode_Error) { + if add == .Length_Unknown { + if .Disallow_Streaming in d.flags { + return -1, -1, .Disallowed_Streaming } - length = int(big_length) - case .Eight_Bytes: - big_length := _decode_u64(r) or_return - if big_length > u64(max(int)) { - err = .Length_Too_Big - return + return -1, INITIAL_STREAMED_BYTES_CAPACITY, nil + } + + _n := _decode_uint_as_u64(d.reader, add) or_return + if _n > u64(max(int)) { return -1, -1, .Length_Too_Big } + n = int(_n) + + scap = n + 1 // Space for zero byte. + if .Trusted_Input not_in d.flags { + scap = min(d.max_pre_alloc, scap) + } + + return +} + +// For `Array` and `Map` types: Decodes the number of items the header says follows. +// If the number is not specified -1 is returned and streaming should be initiated. +// A suitable starting capacity is also returned for a buffer that is allocated up the stack. +_decode_len_container :: proc(d: Decoder, add: Add) -> (n: int, scap: int, err: Decode_Error) { + if add == .Length_Unknown { + if .Disallow_Streaming in d.flags { + return -1, -1, .Disallowed_Streaming } - length = int(big_length) - case: - length = int(_decode_tiny_u8(add) or_return) + return -1, INITIAL_STREAMED_CONTAINER_CAPACITY, nil + } + + _n := _decode_uint_as_u64(d.reader, add) or_return + if _n > u64(max(int)) { return -1, -1, .Length_Too_Big } + n = int(_n) + + scap = n + if .Trusted_Input not_in d.flags { + // NOTE: if this is a map it will be twice this. + scap = min(d.max_pre_alloc / size_of(Value), scap) } + return } diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index 509896d22..d2867e7be 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -55,7 +55,7 @@ Tag_Implementation :: struct { } // Procedure responsible for umarshalling the tag out of the reader into the given `any`. -Tag_Unmarshal_Proc :: #type proc(self: ^Tag_Implementation, r: io.Reader, tag_nr: Tag_Number, v: any) -> Unmarshal_Error +Tag_Unmarshal_Proc :: #type proc(self: ^Tag_Implementation, d: Decoder, tag_nr: Tag_Number, v: any) -> Unmarshal_Error // Procedure responsible for marshalling the tag in the given `any` into the given encoder. Tag_Marshal_Proc :: #type proc(self: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error @@ -121,30 +121,30 @@ tags_register_defaults :: proc() { // // See RFC 8949 section 3.4.2. @(private) -tag_time_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { - hdr := _decode_header(r) or_return +tag_time_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(d.reader) or_return #partial switch hdr { case .U8, .U16, .U32, .U64, .Neg_U8, .Neg_U16, .Neg_U32, .Neg_U64: switch &dst in v { case time.Time: i: i64 - _unmarshal_any_ptr(r, &i, hdr) or_return + _unmarshal_any_ptr(d, &i, hdr) or_return dst = time.unix(i64(i), 0) return case: - return _unmarshal_value(r, v, hdr) + return _unmarshal_value(d, v, hdr) } case .F16, .F32, .F64: switch &dst in v { case time.Time: f: f64 - _unmarshal_any_ptr(r, &f, hdr) or_return + _unmarshal_any_ptr(d, &f, hdr) or_return whole, fract := math.modf(f) dst = time.unix(i64(whole), i64(fract * 1e9)) return case: - return _unmarshal_value(r, v, hdr) + return _unmarshal_value(d, v, hdr) } case: @@ -182,8 +182,8 @@ tag_time_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_ } @(private) -tag_big_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, tnr: Tag_Number, v: any) -> (err: Unmarshal_Error) { - hdr := _decode_header(r) or_return +tag_big_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, tnr: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(d.reader) or_return maj, add := _header_split(hdr) if maj != .Bytes { // Only bytes are supported in this tag. @@ -192,7 +192,7 @@ tag_big_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, tnr: Tag_Number, switch &dst in v { case big.Int: - bytes := err_conv(_decode_bytes(r, add)) or_return + bytes := err_conv(_decode_bytes(d, add)) or_return defer delete(bytes) if err := big.int_from_bytes_big(&dst, bytes); err != nil { @@ -246,13 +246,13 @@ tag_big_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_E } @(private) -tag_cbor_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> Unmarshal_Error { - hdr := _decode_header(r) or_return +tag_cbor_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, v: any) -> Unmarshal_Error { + hdr := _decode_header(d.reader) or_return major, add := _header_split(hdr) #partial switch major { case .Bytes: ti := reflect.type_info_base(type_info_of(v.id)) - return _unmarshal_bytes(r, v, ti, hdr, add) + return _unmarshal_bytes(d, v, ti, hdr, add) case: return .Bad_Tag_Value } @@ -283,8 +283,8 @@ tag_cbor_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_ } @(private) -tag_base64_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { - hdr := _decode_header(r) or_return +tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(d.reader) or_return major, add := _header_split(hdr) ti := reflect.type_info_base(type_info_of(v.id)) @@ -294,7 +294,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number bytes: string; { context.allocator = context.temp_allocator - bytes = string(err_conv(_decode_bytes(r, add)) or_return) + bytes = string(err_conv(_decode_bytes(d, add)) or_return) } defer delete(bytes, context.temp_allocator) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index c3ab6f908..2df99ca71 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -15,25 +15,56 @@ Types that require allocation are allocated using the given allocator. Some temporary allocations are done on the `context.temp_allocator`, but, if you want to, this can be set to a "normal" allocator, because the necessary `delete` and `free` calls are still made. This is helpful when the CBOR size is so big that you don't want to collect all the temporary allocations until the end. + +Disable streaming/indeterminate lengths with the `.Disallow_Streaming` flag. + +Shrink excess bytes in buffers and containers with the `.Shrink_Excess` flag. + +Mark the input as trusted input with the `.Trusted_Input` flag, this turns off the safety feature +of not pre-allocating more than `max_pre_alloc` bytes before reading into the bytes. You should only +do this when you own both sides of the encoding and are sure there can't be malicious bytes used as +an input. */ unmarshal :: proc { unmarshal_from_reader, unmarshal_from_string, } -// Unmarshals from a reader, see docs on the proc group `Unmarshal` for more info. -unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, allocator := context.allocator) -> Unmarshal_Error { - return _unmarshal_any_ptr(r, ptr, allocator=allocator) +unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator) -> (err: Unmarshal_Error) { + err = unmarshal_from_decoder(Decoder{ DEFAULT_MAX_PRE_ALLOC, flags, r }, ptr, allocator=allocator) + + // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. + if err == .EOF { err = .Unexpected_EOF } + return } // Unmarshals from a string, see docs on the proc group `Unmarshal` for more info. -unmarshal_from_string :: proc(s: string, ptr: ^$T, allocator := context.allocator) -> Unmarshal_Error { +unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator) -> (err: Unmarshal_Error) { sr: strings.Reader r := strings.to_reader(&sr, s) - return _unmarshal_any_ptr(r, ptr, allocator=allocator) + + err = unmarshal_from_reader(r, ptr, flags, allocator) + + // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. + if err == .EOF { err = .Unexpected_EOF } + return } -_unmarshal_any_ptr :: proc(r: io.Reader, v: any, hdr: Maybe(Header) = nil, allocator := context.allocator) -> Unmarshal_Error { +unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator) -> (err: Unmarshal_Error) { + d := d + if d.max_pre_alloc <= 0 { + d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC + } + + err = _unmarshal_any_ptr(d, ptr, allocator=allocator) + + // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. + if err == .EOF { err = .Unexpected_EOF } + return + +} + +_unmarshal_any_ptr :: proc(d: Decoder, v: any, hdr: Maybe(Header) = nil, allocator := context.allocator) -> Unmarshal_Error { context.allocator = allocator v := v @@ -48,12 +79,13 @@ _unmarshal_any_ptr :: proc(r: io.Reader, v: any, hdr: Maybe(Header) = nil, alloc } data := any{(^rawptr)(v.data)^, ti.variant.(reflect.Type_Info_Pointer).elem.id} - return _unmarshal_value(r, data, hdr.? or_else (_decode_header(r) or_return)) + return _unmarshal_value(d, data, hdr.? or_else (_decode_header(d.reader) or_return)) } -_unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_Error) { +_unmarshal_value :: proc(d: Decoder, v: any, hdr: Header) -> (err: Unmarshal_Error) { v := v ti := reflect.type_info_base(type_info_of(v.id)) + r := d.reader // If it's a union with only one variant, then treat it as that variant if u, ok := ti.variant.(reflect.Type_Info_Union); ok && len(u.variants) == 1 { @@ -73,7 +105,7 @@ _unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_E // Allow generic unmarshal by doing it into a `Value`. switch &dst in v { case Value: - dst = err_conv(decode(r, hdr)) or_return + dst = err_conv(_decode_from_decoder(d, hdr)) or_return return } @@ -253,7 +285,7 @@ _unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_E case .Tag: switch &dst in v { case ^Tag: - tval := err_conv(_decode_tag_ptr(r, add)) or_return + tval := err_conv(_decode_tag_ptr(d, add)) or_return if t, is_tag := tval.(^Tag); is_tag { dst = t return @@ -262,7 +294,7 @@ _unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_E destroy(tval) return .Bad_Tag_Value case Tag: - t := err_conv(_decode_tag(r, add)) or_return + t := err_conv(_decode_tag(d, add)) or_return if t, is_tag := t.?; is_tag { dst = t return @@ -271,33 +303,33 @@ _unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_E return .Bad_Tag_Value } - nr := err_conv(_decode_tag_nr(r, add)) or_return + nr := err_conv(_decode_uint_as_u64(r, add)) or_return // Custom tag implementations. if impl, ok := _tag_implementations_nr[nr]; ok { - return impl->unmarshal(r, nr, v) + return impl->unmarshal(d, nr, v) } else if nr == TAG_OBJECT_TYPE { - return _unmarshal_union(r, v, ti, hdr) + return _unmarshal_union(d, v, ti, hdr) } else { // Discard the tag info and unmarshal as its value. - return _unmarshal_value(r, v, _decode_header(r) or_return) + return _unmarshal_value(d, v, _decode_header(r) or_return) } return _unsupported(v, hdr, add) - case .Bytes: return _unmarshal_bytes(r, v, ti, hdr, add) - case .Text: return _unmarshal_string(r, v, ti, hdr, add) - case .Array: return _unmarshal_array(r, v, ti, hdr, add) - case .Map: return _unmarshal_map(r, v, ti, hdr, add) + case .Bytes: return _unmarshal_bytes(d, v, ti, hdr, add) + case .Text: return _unmarshal_string(d, v, ti, hdr, add) + case .Array: return _unmarshal_array(d, v, ti, hdr, add) + case .Map: return _unmarshal_map(d, v, ti, hdr, add) case: return .Bad_Major } } -_unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { +_unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { #partial switch t in ti.variant { case reflect.Type_Info_String: - bytes := err_conv(_decode_bytes(r, add)) or_return + bytes := err_conv(_decode_bytes(d, add)) or_return if t.is_cstring { raw := (^cstring)(v.data) @@ -316,7 +348,7 @@ _unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head if elem_base.id != byte { return _unsupported(v, hdr) } - bytes := err_conv(_decode_bytes(r, add)) or_return + bytes := err_conv(_decode_bytes(d, add)) or_return raw := (^mem.Raw_Slice)(v.data) raw^ = transmute(mem.Raw_Slice)bytes return @@ -326,7 +358,7 @@ _unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head if elem_base.id != byte { return _unsupported(v, hdr) } - bytes := err_conv(_decode_bytes(r, add)) or_return + bytes := err_conv(_decode_bytes(d, add)) or_return raw := (^mem.Raw_Dynamic_Array)(v.data) raw.data = raw_data(bytes) raw.len = len(bytes) @@ -339,11 +371,9 @@ _unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head if elem_base.id != byte { return _unsupported(v, hdr) } - bytes: []byte; { - context.allocator = context.temp_allocator - bytes = err_conv(_decode_bytes(r, add)) or_return - } - defer delete(bytes, context.temp_allocator) + context.allocator = context.temp_allocator + bytes := err_conv(_decode_bytes(d, add)) or_return + defer delete(bytes) if len(bytes) > t.count { return _unsupported(v, hdr) } @@ -357,10 +387,10 @@ _unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head return _unsupported(v, hdr) } -_unmarshal_string :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { +_unmarshal_string :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { #partial switch t in ti.variant { case reflect.Type_Info_String: - text := err_conv(_decode_text(r, add)) or_return + text := err_conv(_decode_text(d, add)) or_return if t.is_cstring { raw := (^cstring)(v.data) @@ -376,8 +406,8 @@ _unmarshal_string :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Hea // Enum by its variant name. case reflect.Type_Info_Enum: context.allocator = context.temp_allocator - text := err_conv(_decode_text(r, add)) or_return - defer delete(text, context.temp_allocator) + text := err_conv(_decode_text(d, add)) or_return + defer delete(text) for name, i in t.names { if name == text { @@ -388,8 +418,8 @@ _unmarshal_string :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Hea case reflect.Type_Info_Rune: context.allocator = context.temp_allocator - text := err_conv(_decode_text(r, add)) or_return - defer delete(text, context.temp_allocator) + text := err_conv(_decode_text(d, add)) or_return + defer delete(text) r := (^rune)(v.data) dr, n := utf8.decode_rune(text) @@ -404,21 +434,19 @@ _unmarshal_string :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Hea return _unsupported(v, hdr) } -_unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { - +_unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { assign_array :: proc( - r: io.Reader, + d: Decoder, da: ^mem.Raw_Dynamic_Array, elemt: ^reflect.Type_Info, - _length: Maybe(int), + length: int, growable := true, ) -> (out_of_space: bool, err: Unmarshal_Error) { - length, has_length := _length.? - for idx: uintptr = 0; !has_length || idx < uintptr(length); idx += 1 { + for idx: uintptr = 0; length == -1 || idx < uintptr(length); idx += 1 { elem_ptr := rawptr(uintptr(da.data) + idx*uintptr(elemt.size)) elem := any{elem_ptr, elemt.id} - hdr := _decode_header(r) or_return + hdr := _decode_header(d.reader) or_return // Double size if out of capacity. if da.cap <= da.len { @@ -432,8 +460,8 @@ _unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head if !ok { return false, .Out_Of_Memory } } - err = _unmarshal_value(r, elem, hdr) - if !has_length && err == .Break { break } + err = _unmarshal_value(d, elem, hdr) + if length == -1 && err == .Break { break } if err != nil { return } da.len += 1 @@ -445,26 +473,25 @@ _unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head // Allow generically storing the values array. switch &dst in v { case ^Array: - dst = err_conv(_decode_array_ptr(r, add)) or_return + dst = err_conv(_decode_array_ptr(d, add)) or_return return case Array: - dst = err_conv(_decode_array(r, add)) or_return + dst = err_conv(_decode_array(d, add)) or_return return } #partial switch t in ti.variant { case reflect.Type_Info_Slice: - _length, unknown := err_conv(_decode_container_length(r, add)) or_return - length := _length.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + length, scap := err_conv(_decode_len_container(d, add)) or_return - data := mem.alloc_bytes_non_zeroed(t.elem.size * length, t.elem.align) or_return + data := mem.alloc_bytes_non_zeroed(t.elem.size * scap, t.elem.align) or_return defer if err != nil { mem.free_bytes(data) } da := mem.Raw_Dynamic_Array{raw_data(data), 0, length, context.allocator } - assign_array(r, &da, t.elem, _length) or_return + assign_array(d, &da, t.elem, length) or_return - if da.len < da.cap { + if .Shrink_Excess in d.flags { // Ignoring an error here, but this is not critical to succeed. _ = runtime.__dynamic_array_shrink(&da, t.elem.size, t.elem.align, da.len) } @@ -475,54 +502,58 @@ _unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head return case reflect.Type_Info_Dynamic_Array: - _length, unknown := err_conv(_decode_container_length(r, add)) or_return - length := _length.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + length, scap := err_conv(_decode_len_container(d, add)) or_return - data := mem.alloc_bytes_non_zeroed(t.elem.size * length, t.elem.align) or_return + data := mem.alloc_bytes_non_zeroed(t.elem.size * scap, t.elem.align) or_return defer if err != nil { mem.free_bytes(data) } - raw := (^mem.Raw_Dynamic_Array)(v.data) - raw.data = raw_data(data) - raw.len = 0 - raw.cap = length - raw.allocator = context.allocator + raw := (^mem.Raw_Dynamic_Array)(v.data) + raw.data = raw_data(data) + raw.len = 0 + raw.cap = length + raw.allocator = context.allocator + + _ = assign_array(d, raw, t.elem, length) or_return - _ = assign_array(r, raw, t.elem, _length) or_return + if .Shrink_Excess in d.flags { + // Ignoring an error here, but this is not critical to succeed. + _ = runtime.__dynamic_array_shrink(raw, t.elem.size, t.elem.align, raw.len) + } return case reflect.Type_Info_Array: - _length, unknown := err_conv(_decode_container_length(r, add)) or_return - length := _length.? or_else t.count + _length, scap := err_conv(_decode_len_container(d, add)) or_return + length := min(scap, t.count) - if !unknown && length > t.count { + if length > t.count { return _unsupported(v, hdr) } da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, context.allocator } - out_of_space := assign_array(r, &da, t.elem, _length, growable=false) or_return + out_of_space := assign_array(d, &da, t.elem, length, growable=false) or_return if out_of_space { return _unsupported(v, hdr) } return case reflect.Type_Info_Enumerated_Array: - _length, unknown := err_conv(_decode_container_length(r, add)) or_return - length := _length.? or_else t.count + _length, scap := err_conv(_decode_len_container(d, add)) or_return + length := min(scap, t.count) - if !unknown && length > t.count { + if length > t.count { return _unsupported(v, hdr) } da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, context.allocator } - out_of_space := assign_array(r, &da, t.elem, _length, growable=false) or_return + out_of_space := assign_array(d, &da, t.elem, length, growable=false) or_return if out_of_space { return _unsupported(v, hdr) } return case reflect.Type_Info_Complex: - _length, unknown := err_conv(_decode_container_length(r, add)) or_return - length := _length.? or_else 2 + _length, scap := err_conv(_decode_len_container(d, add)) or_return + length := min(scap, 2) - if !unknown && length > 2 { + if length > 2 { return _unsupported(v, hdr) } @@ -536,15 +567,15 @@ _unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head case: unreachable() } - out_of_space := assign_array(r, &da, info, 2, growable=false) or_return + out_of_space := assign_array(d, &da, info, 2, growable=false) or_return if out_of_space { return _unsupported(v, hdr) } return case reflect.Type_Info_Quaternion: - _length, unknown := err_conv(_decode_container_length(r, add)) or_return - length := _length.? or_else 4 + _length, scap := err_conv(_decode_len_container(d, add)) or_return + length := min(scap, 4) - if !unknown && length > 4 { + if length > 4 { return _unsupported(v, hdr) } @@ -558,7 +589,7 @@ _unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head case: unreachable() } - out_of_space := assign_array(r, &da, info, 4, growable=false) or_return + out_of_space := assign_array(d, &da, info, 4, growable=false) or_return if out_of_space { return _unsupported(v, hdr) } return @@ -566,17 +597,17 @@ _unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head } } -_unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { - - decode_key :: proc(r: io.Reader, v: any) -> (k: string, err: Unmarshal_Error) { - entry_hdr := _decode_header(r) or_return +_unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + r := d.reader + decode_key :: proc(d: Decoder, v: any) -> (k: string, err: Unmarshal_Error) { + entry_hdr := _decode_header(d.reader) or_return entry_maj, entry_add := _header_split(entry_hdr) #partial switch entry_maj { case .Text: - k = err_conv(_decode_text(r, entry_add)) or_return + k = err_conv(_decode_text(d, entry_add)) or_return return case .Bytes: - bytes := err_conv(_decode_bytes(r, entry_add)) or_return + bytes := err_conv(_decode_bytes(d, entry_add)) or_return k = string(bytes) return case: @@ -588,10 +619,10 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header // Allow generically storing the map array. switch &dst in v { case ^Map: - dst = err_conv(_decode_map_ptr(r, add)) or_return + dst = err_conv(_decode_map_ptr(d, add)) or_return return case Map: - dst = err_conv(_decode_map(r, add)) or_return + dst = err_conv(_decode_map(d, add)) or_return return } @@ -601,14 +632,15 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header return _unsupported(v, hdr) } - length, unknown := err_conv(_decode_container_length(r, add)) or_return + length, scap := err_conv(_decode_len_container(d, add)) or_return + unknown := length == -1 fields := reflect.struct_fields_zipped(ti.id) - for idx := 0; unknown || idx < length.?; idx += 1 { + for idx := 0; idx < len(fields) && (unknown || idx < length); idx += 1 { // Decode key, keys can only be strings. key: string; { context.allocator = context.temp_allocator - if keyv, kerr := decode_key(r, v); unknown && kerr == .Break { + if keyv, kerr := decode_key(d, v); unknown && kerr == .Break { break } else if kerr != nil { err = kerr @@ -641,11 +673,11 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header } } - field := fields[use_field_idx] - name := field.name - ptr := rawptr(uintptr(v.data) + field.offset) - fany := any{ptr, field.type.id} - _unmarshal_value(r, fany, _decode_header(r) or_return) or_return + field := fields[use_field_idx] + name := field.name + ptr := rawptr(uintptr(v.data) + field.offset) + fany := any{ptr, field.type.id} + _unmarshal_value(d, fany, _decode_header(r) or_return) or_return } return @@ -654,6 +686,8 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header return _unsupported(v, hdr) } + // TODO: shrink excess. + raw_map := (^mem.Raw_Map)(v.data) if raw_map.allocator.procedure == nil { raw_map.allocator = context.allocator @@ -663,10 +697,11 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header _ = runtime.map_free_dynamic(raw_map^, t.map_info) } - length, unknown := err_conv(_decode_container_length(r, add)) or_return + length, scap := err_conv(_decode_len_container(d, add)) or_return + unknown := length == -1 if !unknown { // Reserve space before setting so we can return allocation errors and be efficient on big maps. - new_len := uintptr(runtime.map_len(raw_map^)+length.?) + new_len := uintptr(min(scap, runtime.map_len(raw_map^)+length)) runtime.map_reserve_dynamic(raw_map, t.map_info, new_len) or_return } @@ -676,10 +711,10 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header map_backing_value := any{raw_data(elem_backing), t.value.id} - for idx := 0; unknown || idx < length.?; idx += 1 { + for idx := 0; unknown || idx < length; idx += 1 { // Decode key, keys can only be strings. key: string - if keyv, kerr := decode_key(r, v); unknown && kerr == .Break { + if keyv, kerr := decode_key(d, v); unknown && kerr == .Break { break } else if kerr != nil { err = kerr @@ -688,14 +723,14 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header key = keyv } - if unknown { + if unknown || idx > scap { // Reserve space for new element so we can return allocator errors. new_len := uintptr(runtime.map_len(raw_map^)+1) runtime.map_reserve_dynamic(raw_map, t.map_info, new_len) or_return } mem.zero_slice(elem_backing) - _unmarshal_value(r, map_backing_value, _decode_header(r) or_return) or_return + _unmarshal_value(d, map_backing_value, _decode_header(r) or_return) or_return key_ptr := rawptr(&key) key_cstr: cstring @@ -709,6 +744,10 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header // We already reserved space for it, so this shouldn't fail. assert(set_ptr != nil) } + + if .Shrink_Excess in d.flags { + _, _ = runtime.map_shrink_dynamic(raw_map, t.map_info) + } return case: @@ -719,7 +758,8 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header // Unmarshal into a union, based on the `TAG_OBJECT_TYPE` tag of the spec, it denotes a tag which // contains an array of exactly two elements, the first is a textual representation of the following // CBOR value's type. -_unmarshal_union :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header) -> (err: Unmarshal_Error) { +_unmarshal_union :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header) -> (err: Unmarshal_Error) { + r := d.reader #partial switch t in ti.variant { case reflect.Type_Info_Union: idhdr: Header @@ -731,8 +771,8 @@ _unmarshal_union :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head return .Bad_Tag_Value } - n_items, unknown := err_conv(_decode_container_length(r, vadd)) or_return - if unknown || n_items != 2 { + n_items, _ := err_conv(_decode_len_container(d, vadd)) or_return + if n_items != 2 { return .Bad_Tag_Value } @@ -743,7 +783,7 @@ _unmarshal_union :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head } context.allocator = context.temp_allocator - target_name = err_conv(_decode_text(r, idadd)) or_return + target_name = err_conv(_decode_text(d, idadd)) or_return } defer delete(target_name, context.temp_allocator) @@ -757,7 +797,7 @@ _unmarshal_union :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head case reflect.Type_Info_Named: if vti.name == target_name { reflect.set_union_variant_raw_tag(v, tag) - return _unmarshal_value(r, any{v.data, variant.id}, _decode_header(r) or_return) + return _unmarshal_value(d, any{v.data, variant.id}, _decode_header(r) or_return) } case: @@ -769,7 +809,7 @@ _unmarshal_union :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head if variant_name == target_name { reflect.set_union_variant_raw_tag(v, tag) - return _unmarshal_value(r, any{v.data, variant.id}, _decode_header(r) or_return) + return _unmarshal_value(d, any{v.data, variant.id}, _decode_header(r) or_return) } } } diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 06b96c915..23bfbd3d8 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -4,6 +4,7 @@ import "core:bytes" import "core:encoding/cbor" import "core:fmt" import "core:intrinsics" +import "core:io" import "core:math/big" import "core:mem" import "core:os" @@ -61,7 +62,9 @@ main :: proc() { test_marshalling_maybe(&t) test_marshalling_nil_maybe(&t) - test_cbor_marshalling_union(&t) + test_marshalling_union(&t) + + test_lying_length_array(&t) test_decode_unsigned(&t) test_encode_unsigned(&t) @@ -202,7 +205,7 @@ test_marshalling :: proc(t: ^testing.T) { ev(t, err, nil) defer delete(data) - decoded, derr := cbor.decode_string(string(data)) + decoded, derr := cbor.decode(string(data)) ev(t, derr, nil) defer cbor.destroy(decoded) @@ -398,7 +401,7 @@ test_marshalling_nil_maybe :: proc(t: ^testing.T) { } @(test) -test_cbor_marshalling_union :: proc(t: ^testing.T) { +test_marshalling_union :: proc(t: ^testing.T) { My_Distinct :: distinct string My_Enum :: enum { @@ -457,6 +460,14 @@ test_cbor_marshalling_union :: proc(t: ^testing.T) { } } +@(test) +test_lying_length_array :: proc(t: ^testing.T) { + // Input says this is an array of length max(u64), this should not allocate that amount. + input := []byte{0x9B, 0x00, 0x00, 0x42, 0xFA, 0x42, 0xFA, 0x42, 0xFA, 0x42} + _, err := cbor.decode(string(input)) + expect_value(t, err, io.Error.Unexpected_EOF) // .Out_Of_Memory would be bad. +} + @(test) test_decode_unsigned :: proc(t: ^testing.T) { expect_decoding(t, "\x00", "0", u8) -- cgit v1.2.3 From 7283b5e75ccecf7dbf28072456a137b29ff983af Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 16 Dec 2023 23:44:40 +0100 Subject: encoding/cbor: minor things --- core/encoding/cbor/cbor.odin | 6 +++++- core/encoding/cbor/tags.odin | 2 +- core/encoding/cbor/unmarshal.odin | 2 -- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 9c4bb0e4e..ddbd53c8d 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -1,3 +1,7 @@ +// Package cbor encodes, decodes, marshals and unmarshals types from/into RCF 8949 compatible CBOR binary. +// Also provided are conversion to and from JSON and the CBOR diagnostic format. +// +// You can additionally provide custom CBOR tag implementations for your use cases. package cbor import "core:encoding/json" @@ -163,7 +167,7 @@ Decode_Data_Error :: enum { Nested_Tag, // When a tag's value is another tag, this is not allowed. Length_Too_Big, // When the length of a container (map, array, bytes, string) is more than `max(int)`. Disallowed_Streaming, // When the `.Disallow_Streaming` flag is set and a streaming header is encountered. - Break, + Break, // When the `break` header was found without any stream to break off. } Encode_Data_Error :: enum { diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index d2867e7be..cdb7227ef 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -89,7 +89,7 @@ tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string // Controls initialization of default tag implementations. // JS and WASI default to a panic allocator so we don't want to do it on those. -INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, ODIN_OS != .JS && ODIN_OS != .WASI) +INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, !ODIN_DEFAULT_TO_NIL_ALLOCATOR && ODIN_OS != .JS && ODIN_OS != .WASI) @(private, init, disabled=!INITIALIZE_DEFAULT_TAGS) tags_initialize_defaults :: proc() { diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 2df99ca71..dea4b749c 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -686,8 +686,6 @@ _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, return _unsupported(v, hdr) } - // TODO: shrink excess. - raw_map := (^mem.Raw_Map)(v.data) if raw_map.allocator.procedure == nil { raw_map.allocator = context.allocator -- cgit v1.2.3 From 46b58ad48d2e326c9592654e96efdf2e927dc876 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 20 Dec 2023 14:29:34 +0100 Subject: encoding/cbor: don't zero bytes we are going to write/read to/from anyway --- core/encoding/cbor/coding.odin | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index e39519e01..5d99aa6d2 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -266,7 +266,7 @@ encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { } _decode_header :: proc(r: io.Reader) -> (hdr: Header, err: io.Error) { - buf: [1]byte + buf: [1]byte = --- io.read_full(r, buf[:]) or_return return Header(buf[0]), nil } @@ -276,7 +276,7 @@ _header_split :: proc(hdr: Header) -> (Major, Add) { } _decode_u8 :: proc(r: io.Reader) -> (v: u8, err: io.Error) { - byte: [1]byte + byte: [1]byte = --- io.read_full(r, byte[:]) or_return return byte[0], nil } @@ -310,7 +310,7 @@ _decode_tiny_u8 :: proc(additional: Add) -> (u8, Decode_Data_Error) { } _decode_u16 :: proc(r: io.Reader) -> (v: u16, err: io.Error) { - bytes: [2]byte + bytes: [2]byte = --- io.read_full(r, bytes[:]) or_return return endian.unchecked_get_u16be(bytes[:]), nil } @@ -323,7 +323,7 @@ _encode_u16 :: proc(e: Encoder, v: u16, major: Major = .Unsigned) -> Encode_Erro } _encode_u16_exact :: proc(w: io.Writer, v: u16, major: Major = .Unsigned) -> (err: io.Error) { - bytes: [3]byte + bytes: [3]byte = --- bytes[0] = (u8(major) << 5) | u8(Add.Two_Bytes) endian.unchecked_put_u16be(bytes[1:], v) _, err = io.write_full(w, bytes[:]) @@ -331,7 +331,7 @@ _encode_u16_exact :: proc(w: io.Writer, v: u16, major: Major = .Unsigned) -> (er } _decode_u32 :: proc(r: io.Reader) -> (v: u32, err: io.Error) { - bytes: [4]byte + bytes: [4]byte = --- io.read_full(r, bytes[:]) or_return return endian.unchecked_get_u32be(bytes[:]), nil } @@ -344,7 +344,7 @@ _encode_u32 :: proc(e: Encoder, v: u32, major: Major = .Unsigned) -> Encode_Erro } _encode_u32_exact :: proc(w: io.Writer, v: u32, major: Major = .Unsigned) -> (err: io.Error) { - bytes: [5]byte + bytes: [5]byte = --- bytes[0] = (u8(major) << 5) | u8(Add.Four_Bytes) endian.unchecked_put_u32be(bytes[1:], v) _, err = io.write_full(w, bytes[:]) @@ -352,7 +352,7 @@ _encode_u32_exact :: proc(w: io.Writer, v: u32, major: Major = .Unsigned) -> (er } _decode_u64 :: proc(r: io.Reader) -> (v: u64, err: io.Error) { - bytes: [8]byte + bytes: [8]byte = --- io.read_full(r, bytes[:]) or_return return endian.unchecked_get_u64be(bytes[:]), nil } @@ -365,7 +365,7 @@ _encode_u64 :: proc(e: Encoder, v: u64, major: Major = .Unsigned) -> Encode_Erro } _encode_u64_exact :: proc(w: io.Writer, v: u64, major: Major = .Unsigned) -> (err: io.Error) { - bytes: [9]byte + bytes: [9]byte = --- bytes[0] = (u8(major) << 5) | u8(Add.Eight_Bytes) endian.unchecked_put_u64be(bytes[1:], v) _, err = io.write_full(w, bytes[:]) @@ -556,7 +556,7 @@ _encode_map :: proc(e: Encoder, m: Map) -> (err: Encode_Error) { for &entry, i in entries { entry.entry = m[i] - buf := strings.builder_make(0, 8, context.temp_allocator) or_return + buf := strings.builder_make(context.temp_allocator) or_return ke := e ke.writer = strings.to_stream(&buf) @@ -631,7 +631,7 @@ _encode_tag :: proc(e: Encoder, val: Tag) -> Encode_Error { } _decode_simple :: proc(r: io.Reader) -> (v: Simple, err: io.Error) { - buf: [1]byte + buf: [1]byte = --- io.read_full(r, buf[:]) or_return return Simple(buf[0]), nil } @@ -661,14 +661,14 @@ _decode_tiny_simple :: proc(add: Add) -> (Simple, Decode_Data_Error) { } _decode_f16 :: proc(r: io.Reader) -> (v: f16, err: io.Error) { - bytes: [2]byte + bytes: [2]byte = --- io.read_full(r, bytes[:]) or_return n := endian.unchecked_get_u16be(bytes[:]) return transmute(f16)n, nil } _encode_f16 :: proc(w: io.Writer, v: f16) -> (err: io.Error) { - bytes: [3]byte + bytes: [3]byte = --- bytes[0] = u8(Header.F16) endian.unchecked_put_u16be(bytes[1:], transmute(u16)v) _, err = io.write_full(w, bytes[:]) @@ -676,7 +676,7 @@ _encode_f16 :: proc(w: io.Writer, v: f16) -> (err: io.Error) { } _decode_f32 :: proc(r: io.Reader) -> (v: f32, err: io.Error) { - bytes: [4]byte + bytes: [4]byte = --- io.read_full(r, bytes[:]) or_return n := endian.unchecked_get_u32be(bytes[:]) return transmute(f32)n, nil @@ -690,7 +690,7 @@ _encode_f32 :: proc(e: Encoder, v: f32) -> io.Error { } _encode_f32_exact :: proc(w: io.Writer, v: f32) -> (err: io.Error) { - bytes: [5]byte + bytes: [5]byte = --- bytes[0] = u8(Header.F32) endian.unchecked_put_u32be(bytes[1:], transmute(u32)v) _, err = io.write_full(w, bytes[:]) @@ -698,7 +698,7 @@ _encode_f32_exact :: proc(w: io.Writer, v: f32) -> (err: io.Error) { } _decode_f64 :: proc(r: io.Reader) -> (v: f64, err: io.Error) { - bytes: [8]byte + bytes: [8]byte = --- io.read_full(r, bytes[:]) or_return n := endian.unchecked_get_u64be(bytes[:]) return transmute(f64)n, nil @@ -712,7 +712,7 @@ _encode_f64 :: proc(e: Encoder, v: f64) -> io.Error { } _encode_f64_exact :: proc(w: io.Writer, v: f64) -> (err: io.Error) { - bytes: [9]byte + bytes: [9]byte = --- bytes[0] = u8(Header.F64) endian.unchecked_put_u64be(bytes[1:], transmute(u64)v) _, err = io.write_full(w, bytes[:]) -- cgit v1.2.3 From cb8bb8bfd8df311f13d40bfc19018f70e105a1cf Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 20 Dec 2023 15:29:55 +0100 Subject: encoding/cbor: cleanup default temp allocator --- core/encoding/cbor/cbor.odin | 5 +++ core/encoding/cbor/coding.odin | 87 +++++++++++++++++++++++++++++++-------- core/encoding/cbor/marshal.odin | 15 +------ core/encoding/cbor/unmarshal.odin | 5 +-- 4 files changed, 77 insertions(+), 35 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index ddbd53c8d..9df4dfa51 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -238,6 +238,7 @@ negative_u64_to_int :: #force_inline proc(u: Negative_U64) -> i128 { // Utility for converting between the different errors when they are subsets of the other. err_conv :: proc { encode_to_marshal_err, + encode_to_marshal_err_p2, decode_to_unmarshal_err, decode_to_unmarshal_err_p, decode_to_unmarshal_err_p2, @@ -253,6 +254,10 @@ encode_to_marshal_err :: #force_inline proc(err: Encode_Error) -> Marshal_Error } } +encode_to_marshal_err_p2 :: #force_inline proc(v: $T, v2: $T2, err: Encode_Error) -> (T, T2, Marshal_Error) { + return v, v2, err_conv(err) +} + decode_to_unmarshal_err :: #force_inline proc(err: Decode_Error) -> Unmarshal_Error { switch e in err { case nil: return nil diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 5d99aa6d2..1e77a35c8 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -4,6 +4,7 @@ import "core:bytes" import "core:encoding/endian" import "core:intrinsics" import "core:io" +import "core:runtime" import "core:slice" import "core:strings" @@ -54,6 +55,9 @@ Decoder_Flag :: enum { // Makes the decoder shrink of excess capacity from allocated buffers/containers before returning. Shrink_Excess, + + // Internal flag to do initialization. + _In_Progress, } Decoder_Flags :: bit_set[Decoder_Flag] @@ -117,9 +121,8 @@ decode_from_decoder :: proc(d: Decoder, allocator := context.allocator) -> (v: V context.allocator = allocator d := d - if d.max_pre_alloc <= 0 { - d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC - } + + DECODE_PROGRESS_GUARD(&d) v, err = _decode_from_decoder(d) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. @@ -225,21 +228,9 @@ encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Enc // See the docs on the proc group `encode_into` for more info. encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { e := e - - outer: bool - defer if outer { - e.flags &~= {._In_Progress} - } - - if ._In_Progress not_in e.flags { - outer = true - e.flags |= {._In_Progress} - - if .Self_Described_CBOR in e.flags { - _encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag) or_return - } - } + ENCODE_PROGRESS_GUARD(&e) or_return + switch v_spec in v { case u8: return _encode_u8(e.writer, v_spec, .Unsigned) case u16: return _encode_u16(e, v_spec, .Unsigned) @@ -265,6 +256,66 @@ encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { } } +@(deferred_in_out=_decode_progress_end) +DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Arena_Temp) { + if ._In_Progress in d.flags { + return + } + is_begin = true + + incl_elem(&d.flags, Decoder_Flag._In_Progress) + + if context.allocator != context.temp_allocator { + tmp = runtime.default_temp_allocator_temp_begin() + } + + if d.max_pre_alloc <= 0 { + d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC + } + + return +} + +_decode_progress_end :: proc(d: ^Decoder, is_begin: bool, tmp: runtime.Arena_Temp) { + if !is_begin { + return + } + + excl_elem(&d.flags, Decoder_Flag._In_Progress) + + runtime.default_temp_allocator_temp_end(tmp) +} + +@(deferred_in_out=_encode_progress_end) +ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { + if ._In_Progress in e.flags { + return + } + is_begin = true + + incl_elem(&e.flags, Encoder_Flag._In_Progress) + + if context.allocator != context.temp_allocator { + tmp = runtime.default_temp_allocator_temp_begin() + } + + if .Self_Described_CBOR in e.flags { + _encode_u64(e^, TAG_SELF_DESCRIBED_CBOR, .Tag) or_return + } + + return +} + +_encode_progress_end :: proc(e: ^Encoder, is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { + if !is_begin || err != nil { + return + } + + excl_elem(&e.flags, Encoder_Flag._In_Progress) + + runtime.default_temp_allocator_temp_end(tmp) +} + _decode_header :: proc(r: io.Reader) -> (hdr: Header, err: io.Error) { buf: [1]byte = --- io.read_full(r, buf[:]) or_return @@ -514,7 +565,7 @@ _decode_map :: proc(d: Decoder, add: Add) -> (v: Map, err: Decode_Error) { return nil, kerr } - value := decode_from_decoder(d) or_return + value := _decode_from_decoder(d) or_return append(&items, Map_Entry{ key = key, diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index a5d5efb3e..898371adf 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -77,21 +77,8 @@ marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Mars // See docs on the `marshal_into` proc group for more info. marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { e := e - - init: bool - defer if init { - e.flags &~= {._In_Progress} - } - - // If not in progress we do initialization and set in progress. - if ._In_Progress not_in e.flags { - init = true - e.flags |= {._In_Progress} - if .Self_Described_CBOR in e.flags { - err_conv(_encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag)) or_return - } - } + err_conv(ENCODE_PROGRESS_GUARD(&e)) or_return if v == nil { return _encode_nil(e.writer) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index dea4b749c..c7de2d87a 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -52,9 +52,8 @@ unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, all unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator) -> (err: Unmarshal_Error) { d := d - if d.max_pre_alloc <= 0 { - d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC - } + + DECODE_PROGRESS_GUARD(&d) err = _unmarshal_any_ptr(d, ptr, allocator=allocator) -- cgit v1.2.3 From 85f1a60cf301abab292e1dab65e19c61c5612e8e Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 20 Dec 2023 16:08:27 +0100 Subject: encoding/cbor: cleanup comments about tags --- core/encoding/cbor/tags.odin | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index cdb7227ef..38649f634 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -13,29 +13,35 @@ import "core:time" // Tags defined in RFC 7049 that we provide implementations for. // UTC time in seconds, unmarshalled into a `core:time` `time.Time` or integer. +// Use the struct tag `cbor_tag:"1"` or `cbor_tag:"epoch"` to have your `time.Time` field en/decoded as epoch time. TAG_EPOCH_TIME_NR :: 1 TAG_EPOCH_TIME_ID :: "epoch" // Using `core:math/big`, big integers are properly encoded and decoded during marshal and unmarshal. -TAG_UNSIGNED_BIG_NR :: 2 +// These fields use this tag by default, no struct tag required. +TAG_UNSIGNED_BIG_NR :: 2 // Using `core:math/big`, big integers are properly encoded and decoded during marshal and unmarshal. -TAG_NEGATIVE_BIG_NR :: 3 +// These fields use this tag by default, no struct tag required. +TAG_NEGATIVE_BIG_NR :: 3 // TAG_DECIMAL_FRACTION :: 4 // NOTE: We could probably implement this with `math/fixed`. // Sometimes it is beneficial to carry an embedded CBOR data item that is not meant to be decoded // immediately at the time the enclosing data item is being decoded. Tag number 24 (CBOR data item) // can be used to tag the embedded byte string as a single data item encoded in CBOR format. +// Use the struct tag `cbor_tag:"24"` or `cbor_tag:"cbor"` to keep a non-decoded field (string or bytes) of raw CBOR. TAG_CBOR_NR :: 24 TAG_CBOR_ID :: "cbor" // The contents of this tag are base64 encoded during marshal and decoded during unmarshal. +// Use the struct tag `cbor_tag:"34"` or `cbor_tag:"base64"` to have your field string or bytes field en/decoded as base64. TAG_BASE64_NR :: 34 TAG_BASE64_ID :: "base64" // A tag that is used to detect the contents of a binary buffer (like a file) are CBOR. // This tag would wrap everything else, decoders can then check for this header and see if the // given content is definitely CBOR. +// Added by the encoder if it has the flag `.Self_Described_CBOR`, decoded by default. TAG_SELF_DESCRIBED_CBOR :: 55799 // A tag that is used to assign a textual type to the object following it. @@ -99,19 +105,14 @@ tags_initialize_defaults :: proc() { // Registers tags that have implementations provided by this package. // This is done by default and can be controlled with the `CBOR_INITIALIZE_DEFAULT_TAGS` define. tags_register_defaults :: proc() { - // NOTE: Not registering this the other way around, user can opt-in using the `cbor_tag:"1"` struct - // tag instead, it would lose precision and marshalling the `time.Time` struct normally is valid. - tag_register_number({nil, tag_time_unmarshal, tag_time_marshal}, TAG_EPOCH_TIME_NR, TAG_EPOCH_TIME_ID) - - // Use the struct tag `cbor_tag:"34"` to have your field encoded in a base64. - tag_register_number({nil, tag_base64_unmarshal, tag_base64_marshal}, TAG_BASE64_NR, TAG_BASE64_ID) - - // Use the struct tag `cbor_tag:"24"` to keep a non-decoded field of raw CBOR. - tag_register_number({nil, tag_cbor_unmarshal, tag_cbor_marshal}, TAG_CBOR_NR, TAG_CBOR_ID) + tag_register_number({nil, tag_time_unmarshal, tag_time_marshal}, TAG_EPOCH_TIME_NR, TAG_EPOCH_TIME_ID) + tag_register_number({nil, tag_base64_unmarshal, tag_base64_marshal}, TAG_BASE64_NR, TAG_BASE64_ID) + tag_register_number({nil, tag_cbor_unmarshal, tag_cbor_marshal}, TAG_CBOR_NR, TAG_CBOR_ID) // These following tags are registered at the type level and don't require an opt-in struct tag. // Encoding these types on its own make no sense or no data is lost to encode it. - + + // En/Decoding of `big.Int` fields by default. tag_register_type({nil, tag_big_unmarshal, tag_big_marshal}, TAG_UNSIGNED_BIG_NR, big.Int) tag_register_type({nil, tag_big_unmarshal, tag_big_marshal}, TAG_NEGATIVE_BIG_NR, big.Int) } -- cgit v1.2.3 From 3fccc77829d6479b972026c5fee7ef0f34ac589e Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 20 Dec 2023 16:20:33 +0100 Subject: encoding/cbor: clean and fixup some allocations --- core/encoding/cbor/coding.odin | 8 ++++--- core/encoding/cbor/tags.odin | 5 +---- core/encoding/cbor/unmarshal.odin | 44 +++++++++++++++++---------------------- 3 files changed, 25 insertions(+), 32 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 1e77a35c8..32ecf52bc 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -431,7 +431,9 @@ _decode_bytes_ptr :: proc(d: Decoder, add: Add, type: Major = .Bytes) -> (v: ^By return } -_decode_bytes :: proc(d: Decoder, add: Add, type: Major = .Bytes) -> (v: Bytes, err: Decode_Error) { +_decode_bytes :: proc(d: Decoder, add: Add, type: Major = .Bytes, allocator := context.allocator) -> (v: Bytes, err: Decode_Error) { + context.allocator = allocator + n, scap := _decode_len_str(d, add) or_return buf := strings.builder_make(0, scap) or_return @@ -487,8 +489,8 @@ _decode_text_ptr :: proc(d: Decoder, add: Add) -> (v: ^Text, err: Decode_Error) return } -_decode_text :: proc(d: Decoder, add: Add) -> (v: Text, err: Decode_Error) { - return (Text)(_decode_bytes(d, add, .Text) or_return), nil +_decode_text :: proc(d: Decoder, add: Add, allocator := context.temp_allocator) -> (v: Text, err: Decode_Error) { + return (Text)(_decode_bytes(d, add, .Text, allocator) or_return), nil } _encode_text :: proc(e: Encoder, val: Text) -> Encode_Error { diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index 38649f634..efe724f8c 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -293,10 +293,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, return .Bad_Tag_Value } - bytes: string; { - context.allocator = context.temp_allocator - bytes = string(err_conv(_decode_bytes(d, add)) or_return) - } + bytes := string(err_conv(_decode_bytes(d, add, allocator=context.temp_allocator)) or_return) defer delete(bytes, context.temp_allocator) #partial switch t in ti.variant { diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index c7de2d87a..ae7f97c98 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -370,9 +370,8 @@ _unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header if elem_base.id != byte { return _unsupported(v, hdr) } - context.allocator = context.temp_allocator - bytes := err_conv(_decode_bytes(d, add)) or_return - defer delete(bytes) + bytes := err_conv(_decode_bytes(d, add, allocator=context.temp_allocator)) or_return + defer delete(bytes, context.temp_allocator) if len(bytes) > t.count { return _unsupported(v, hdr) } @@ -404,9 +403,8 @@ _unmarshal_string :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Heade // Enum by its variant name. case reflect.Type_Info_Enum: - context.allocator = context.temp_allocator - text := err_conv(_decode_text(d, add)) or_return - defer delete(text) + text := err_conv(_decode_text(d, add, allocator=context.temp_allocator)) or_return + defer delete(text, context.temp_allocator) for name, i in t.names { if name == text { @@ -416,9 +414,8 @@ _unmarshal_string :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Heade } case reflect.Type_Info_Rune: - context.allocator = context.temp_allocator - text := err_conv(_decode_text(d, add)) or_return - defer delete(text) + text := err_conv(_decode_text(d, add, allocator=context.temp_allocator)) or_return + defer delete(text, context.temp_allocator) r := (^rune)(v.data) dr, n := utf8.decode_rune(text) @@ -585,7 +582,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header case quaternion64: info = type_info_of(f16) case quaternion128: info = type_info_of(f32) case quaternion256: info = type_info_of(f64) - case: unreachable() + case: unreachable() } out_of_space := assign_array(d, &da, info, 4, growable=false) or_return @@ -598,15 +595,15 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { r := d.reader - decode_key :: proc(d: Decoder, v: any) -> (k: string, err: Unmarshal_Error) { + decode_key :: proc(d: Decoder, v: any, allocator := context.allocator) -> (k: string, err: Unmarshal_Error) { entry_hdr := _decode_header(d.reader) or_return entry_maj, entry_add := _header_split(entry_hdr) #partial switch entry_maj { case .Text: - k = err_conv(_decode_text(d, entry_add)) or_return + k = err_conv(_decode_text(d, entry_add, allocator)) or_return return case .Bytes: - bytes := err_conv(_decode_bytes(d, entry_add)) or_return + bytes := err_conv(_decode_bytes(d, entry_add, allocator=allocator)) or_return k = string(bytes) return case: @@ -637,16 +634,14 @@ _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, for idx := 0; idx < len(fields) && (unknown || idx < length); idx += 1 { // Decode key, keys can only be strings. - key: string; { - context.allocator = context.temp_allocator - if keyv, kerr := decode_key(d, v); unknown && kerr == .Break { - break - } else if kerr != nil { - err = kerr - return - } else { - key = keyv - } + key: string + if keyv, kerr := decode_key(d, v, context.temp_allocator); unknown && kerr == .Break { + break + } else if kerr != nil { + err = kerr + return + } else { + key = keyv } defer delete(key, context.temp_allocator) @@ -779,8 +774,7 @@ _unmarshal_union :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header return .Bad_Tag_Value } - context.allocator = context.temp_allocator - target_name = err_conv(_decode_text(d, idadd)) or_return + target_name = err_conv(_decode_text(d, idadd, context.temp_allocator)) or_return } defer delete(target_name, context.temp_allocator) -- cgit v1.2.3 From 154e0d41c6f77feb8a11ff8a6cb4449c11dd767e Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 23 Dec 2023 18:11:52 +0100 Subject: encoding/cbor: fix wrong allocator bug --- core/encoding/cbor/coding.odin | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 32ecf52bc..ee928f68e 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -100,10 +100,9 @@ decode :: decode_from // Decodes the given string as CBOR. // See docs on the proc group `decode` for more information. decode_from_string :: proc(s: string, flags: Decoder_Flags = {}, allocator := context.allocator) -> (v: Value, err: Decode_Error) { - context.allocator = allocator r: strings.Reader strings.reader_init(&r, s) - return decode_from_reader(strings.reader_to_stream(&r), flags) + return decode_from_reader(strings.reader_to_stream(&r), flags, allocator) } // Reads a CBOR value from the given reader. @@ -489,7 +488,7 @@ _decode_text_ptr :: proc(d: Decoder, add: Add) -> (v: ^Text, err: Decode_Error) return } -_decode_text :: proc(d: Decoder, add: Add, allocator := context.temp_allocator) -> (v: Text, err: Decode_Error) { +_decode_text :: proc(d: Decoder, add: Add, allocator := context.allocator) -> (v: Text, err: Decode_Error) { return (Text)(_decode_bytes(d, add, .Text, allocator) or_return), nil } -- cgit v1.2.3 From 72d5b87b52fd4a1fb92819121e7f17b9118dac99 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 23 Dec 2023 18:12:13 +0100 Subject: encoding/cbor: clean --- core/encoding/cbor/coding.odin | 5 ++--- tests/core/encoding/cbor/test_core_cbor.odin | 23 +++++++---------------- 2 files changed, 9 insertions(+), 19 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index ee928f68e..9dd6d2639 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -316,9 +316,8 @@ _encode_progress_end :: proc(e: ^Encoder, is_begin: bool, tmp: runtime.Arena_Tem } _decode_header :: proc(r: io.Reader) -> (hdr: Header, err: io.Error) { - buf: [1]byte = --- - io.read_full(r, buf[:]) or_return - return Header(buf[0]), nil + hdr = Header(_decode_u8(r) or_return) + return } _header_split :: proc(hdr: Header) -> (Major, Add) { diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 23bfbd3d8..0fb8b521f 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -799,15 +799,8 @@ test_encode_tags :: proc(t: ^testing.T) { // Helpers -buf: bytes.Buffer -stream := bytes.buffer_to_stream(&buf) -encoder := cbor.Encoder{cbor.ENCODE_FULLY_DETERMINISTIC, stream} - expect_decoding :: proc(t: ^testing.T, encoded: string, decoded: string, type: typeid, loc := #caller_location) { - bytes.buffer_reset(&buf) - bytes.buffer_write_string(&buf, encoded) - - res, err := cbor.decode(stream) + res, err := cbor.decode(encoded) defer cbor.destroy(res) expect_value(t, reflect.union_variant_typeid(res), type, loc) @@ -820,10 +813,7 @@ expect_decoding :: proc(t: ^testing.T, encoded: string, decoded: string, type: t } expect_tag :: proc(t: ^testing.T, encoded: string, nr: cbor.Tag_Number, value_decoded: string, loc := #caller_location) { - bytes.buffer_reset(&buf) - bytes.buffer_write_string(&buf, encoded) - - res, err := cbor.decode(stream) + res, err := cbor.decode(encoded) defer cbor.destroy(res) expect_value(t, err, nil, loc) @@ -841,10 +831,7 @@ expect_tag :: proc(t: ^testing.T, encoded: string, nr: cbor.Tag_Number, value_de } expect_float :: proc(t: ^testing.T, encoded: string, expected: $T, loc := #caller_location) where intrinsics.type_is_float(T) { - bytes.buffer_reset(&buf) - bytes.buffer_write_string(&buf, encoded) - - res, err := cbor.decode(stream) + res, err := cbor.decode(encoded) defer cbor.destroy(res) expect_value(t, reflect.union_variant_typeid(res), typeid_of(T), loc) @@ -862,6 +849,10 @@ expect_float :: proc(t: ^testing.T, encoded: string, expected: $T, loc := #calle } } +buf: bytes.Buffer +stream := bytes.buffer_to_stream(&buf) +encoder := cbor.Encoder{cbor.ENCODE_FULLY_DETERMINISTIC, stream} + expect_encoding :: proc(t: ^testing.T, val: cbor.Value, encoded: string, loc := #caller_location) { bytes.buffer_reset(&buf) -- cgit v1.2.3 From 759d095548e7135bbfeb68ac6b0a21857af49527 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 23 Dec 2023 18:52:53 +0100 Subject: encoding/cbor: ignore struct fields with `cbor:"-"` --- core/encoding/cbor/marshal.odin | 37 ++++++++++++++++++++-------- core/encoding/cbor/unmarshal.odin | 4 +++ tests/core/encoding/cbor/test_core_cbor.odin | 3 +++ 3 files changed, 34 insertions(+), 10 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index 898371adf..deb7ba020 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -422,7 +422,13 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { case Tag: return err_conv(_encode_tag(e, vv)) } - err_conv(_encode_u16(e, u16(len(info.names)), .Map)) or_return + field_name :: #force_inline proc(info: runtime.Type_Info_Struct, i: int) -> string { + if cbor_name := string(reflect.struct_tag_get(reflect.Struct_Tag(info.tags[i]), "cbor")); cbor_name != "" { + return cbor_name + } else { + return info.names[i] + } + } marshal_entry :: #force_inline proc(e: Encoder, info: runtime.Type_Info_Struct, v: any, name: string, i: int) -> Marshal_Error { err_conv(_encode_text(e, name)) or_return @@ -448,13 +454,14 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { return marshal_into(e, field_any) } - - field_name :: #force_inline proc(info: runtime.Type_Info_Struct, i: int) -> string { - if cbor_name := string(reflect.struct_tag_get(reflect.Struct_Tag(info.tags[i]), "cbor")); cbor_name != "" { - return cbor_name - } else { - return info.names[i] + + n: u64; { + for _, i in info.names { + if field_name(info, i) != "-" { + n += 1 + } } + err_conv(_encode_u64(e, n, .Map)) or_return } if .Deterministic_Map_Sorting in e.flags { @@ -462,11 +469,16 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { name: string, field: int, } - entries := make([dynamic]Name, 0, len(info.names), context.temp_allocator) or_return + entries := make([dynamic]Name, 0, n, context.temp_allocator) or_return defer delete(entries) for name, i in info.names { - append(&entries, Name{field_name(info, i), i}) or_return + fname := field_name(info, i) + if fname == "-" { + continue + } + + append(&entries, Name{fname, i}) or_return } // Sort lexicographic on the bytes of the key. @@ -479,7 +491,12 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { } } else { for name, i in info.names { - marshal_entry(e, info, v, field_name(info, i), i) or_return + fname := field_name(info, i) + if fname == "-" { + continue + } + + marshal_entry(e, info, v, fname, i) or_return } } return diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index ae7f97c98..9ad25a38d 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -650,6 +650,10 @@ _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, { for field, field_idx in fields { tag_value := string(reflect.struct_tag_get(field.tag, "cbor")) + if tag_value == "-" { + continue + } + if key == tag_value { use_field_idx = field_idx break diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 0fb8b521f..daf31c277 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -130,6 +130,7 @@ Foo :: struct { small_onetwenty: i128, biggest: big.Int, smallest: big.Int, + ignore_this: ^Foo `cbor:"-"`, } FooBar :: enum { @@ -189,6 +190,7 @@ test_marshalling :: proc(t: ^testing.T) { smallie = cbor.Negative_U64(max(u64)), onetwenty = i128(12345), small_onetwenty = -i128(max(u64)), + ignore_this = &Foo{}, } big.atoi(&f.biggest, "1234567891011121314151617181920") @@ -343,6 +345,7 @@ test_marshalling :: proc(t: ^testing.T) { ev(t, backf.smallie, f.smallie) ev(t, backf.onetwenty, f.onetwenty) ev(t, backf.small_onetwenty, f.small_onetwenty) + ev(t, backf.ignore_this, nil) s_equals, s_err := big.equals(&backf.smallest, &f.smallest) ev(t, s_err, nil) -- cgit v1.2.3 From 317931a3c5179e10db941157a994c8e89b7080c2 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 23 Dec 2023 19:22:33 +0100 Subject: encoding/cbor: deterministically store bit sets as big endian --- core/encoding/cbor/cbor.odin | 18 ------------------ core/encoding/cbor/marshal.odin | 3 ++- core/encoding/cbor/unmarshal.odin | 13 ++++++------- core/reflect/reflect.odin | 21 +++++++++++++++++++++ tests/core/encoding/cbor/test_core_cbor.odin | 2 +- 5 files changed, 30 insertions(+), 27 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 9df4dfa51..3ab493b4b 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -675,21 +675,3 @@ _i128_to_uint :: proc(v: i128) -> (u: u64, m: Major, err: Encode_Data_Error) { u, err = _u128_to_u64(u128(v)) return } - -@(private) -is_bit_set_different_endian_to_platform :: proc(ti: ^runtime.Type_Info) -> bool { - if ti == nil { - return false - } - t := runtime.type_info_base(ti) - #partial switch info in t.variant { - case runtime.Type_Info_Integer: - switch info.endianness { - case .Platform: return false - case .Little: return ODIN_ENDIAN != .Little - case .Big: return ODIN_ENDIAN != .Big - } - } - return false -} - diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index deb7ba020..b7c47f252 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -541,7 +541,8 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { return marshal_into(e, any{v.data, info.base.id}) case runtime.Type_Info_Bit_Set: - do_byte_swap := is_bit_set_different_endian_to_platform(info.underlying) + // Store bit_set as big endian just like the protocol. + do_byte_swap := !reflect.bit_set_is_big_endian(v) switch ti.size * 8 { case 0: return _encode_u8(e.writer, 0) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 9ad25a38d..98ef06635 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -856,12 +856,11 @@ _assign_int :: proc(val: any, i: $T) -> bool { case uintptr: dst = uintptr(i) case: ti := type_info_of(v.id) - do_byte_swap := is_bit_set_different_endian_to_platform(ti) - #partial switch info in ti.variant { - case runtime.Type_Info_Bit_Set: + if _, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok { + do_byte_swap := !reflect.bit_set_is_big_endian(v) switch ti.size * 8 { - case 0: - case 8: + case 0: // no-op. + case 8: x := (^u8)(v.data) x^ = u8(i) case 16: @@ -876,9 +875,9 @@ _assign_int :: proc(val: any, i: $T) -> bool { case: panic("unknown bit_size size") } - case: - return false + return true } + return false } return true } diff --git a/core/reflect/reflect.odin b/core/reflect/reflect.odin index de5dec2e3..de7379ecc 100644 --- a/core/reflect/reflect.odin +++ b/core/reflect/reflect.odin @@ -934,6 +934,27 @@ set_union_value :: proc(dst: any, value: any) -> bool { panic("expected a union to reflect.set_union_variant_typeid") } +@(require_results) +bit_set_is_big_endian :: proc(value: any, loc := #caller_location) -> bool { + if value == nil { return ODIN_ENDIAN == .Big } + + ti := runtime.type_info_base(type_info_of(value.id)) + if info, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok { + if info.underlying == nil { return ODIN_ENDIAN == .Big } + + underlying_ti := runtime.type_info_base(info.underlying) + if underlying_info, uok := underlying_ti.variant.(runtime.Type_Info_Integer); uok { + switch underlying_info.endianness { + case .Platform: return ODIN_ENDIAN == .Big + case .Little: return false + case .Big: return true + } + } + + return ODIN_ENDIAN == .Big + } + panic("expected a bit_set to reflect.bit_set_is_big_endian", loc) +} @(require_results) diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index daf31c277..691a0a5ec 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -251,7 +251,7 @@ test_marshalling :: proc(t: ^testing.T) { ], "cstr": "Hellnope", "ennie": 0, - "ennieb": 2, + "ennieb": 512, "iamint": -256, "important": "!", "my_bytes": h'', -- cgit v1.2.3 From c1cf6c1a95bb489525e329280be735d7a5ce966b Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 23 Dec 2023 20:02:04 +0100 Subject: encoding/cbor: add general docs and example --- core/encoding/cbor/cbor.odin | 5 -- core/encoding/cbor/coding.odin | 8 +-- core/encoding/cbor/doc.odin | 143 ++++++++++++++++++++++++++++++++++++++ core/encoding/cbor/marshal.odin | 2 +- core/encoding/cbor/unmarshal.odin | 2 +- 5 files changed, 149 insertions(+), 11 deletions(-) create mode 100644 core/encoding/cbor/doc.odin (limited to 'core/encoding') diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 3ab493b4b..7e0f4ea1a 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -1,14 +1,9 @@ -// Package cbor encodes, decodes, marshals and unmarshals types from/into RCF 8949 compatible CBOR binary. -// Also provided are conversion to and from JSON and the CBOR diagnostic format. -// -// You can additionally provide custom CBOR tag implementations for your use cases. package cbor import "core:encoding/json" import "core:intrinsics" import "core:io" import "core:mem" -import "core:runtime" import "core:strconv" import "core:strings" diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 9dd6d2639..a9bb6e408 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -121,7 +121,7 @@ decode_from_decoder :: proc(d: Decoder, allocator := context.allocator) -> (v: V d := d - DECODE_PROGRESS_GUARD(&d) + _DECODE_PROGRESS_GUARD(&d) v, err = _decode_from_decoder(d) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. @@ -228,7 +228,7 @@ encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Enc encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { e := e - ENCODE_PROGRESS_GUARD(&e) or_return + _ENCODE_PROGRESS_GUARD(&e) or_return switch v_spec in v { case u8: return _encode_u8(e.writer, v_spec, .Unsigned) @@ -256,7 +256,7 @@ encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { } @(deferred_in_out=_decode_progress_end) -DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Arena_Temp) { +_DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Arena_Temp) { if ._In_Progress in d.flags { return } @@ -286,7 +286,7 @@ _decode_progress_end :: proc(d: ^Decoder, is_begin: bool, tmp: runtime.Arena_Tem } @(deferred_in_out=_encode_progress_end) -ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { +_ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { if ._In_Progress in e.flags { return } diff --git a/core/encoding/cbor/doc.odin b/core/encoding/cbor/doc.odin new file mode 100644 index 000000000..efcad5c9e --- /dev/null +++ b/core/encoding/cbor/doc.odin @@ -0,0 +1,143 @@ +/* +Package cbor encodes, decodes, marshals and unmarshals types from/into RCF 8949 compatible CBOR binary. +Also provided are conversion to and from JSON and the CBOR diagnostic format. + +**Allocations:** + +In general, when in the following table it says allocations are done on the `context.temp_allocator`, these allocations +are still attempted to be deallocated. +This allows you to use an allocator with freeing implemented as the `context.temp_allocator` which is handy with big CBOR. + +If you use the default `context.temp_allocator` it will be returned back to its state when the process (en/decoding, (un)marshal) started. + +- *Encoding*: If the `.Deterministic_Map_Sorting` flag is set on the encoder, this allocates on `context.temp_allocator` + some space for the keys of maps in order to sort them and then write them. + Other than that there are no allocations (only for the final bytes if you use `cbor.encode_into_bytes`. + +- *Decoding*: Allocates everything on the given allocator and input given can be deleted after decoding. + *No* allocations are done on the `context.temp_allocator`. + +- *Marshal*: Same allocation strategy as encoding. + +- *Unmarshal*: Allocates everything on the given allocator and input given can be deleted after unmarshalling. + Some temporary allocations are done on the `context.temp_allocator`. + +**Determinism:** + +CBOR defines a deterministic en/decoder, which among other things uses the smallest type possible for integers and floats, +and sorts map keys by their (encoded) lexical bytewise order. + +You can enable this behaviour using a combination of flags, also available as the `cbor.ENCODE_FULLY_DETERMINISTIC` constant. +If you just want the small size that comes with this, but not the map sorting (which has a performance cost) you can use the +`cbor.ENCODE_SMALL` constant for the flags. + +A deterministic float is a float in the smallest type (f16, f32, f64) that hasn't changed after conversion. +A deterministic integer is an integer in the smallest representation (u8, u16, u32, u64) it fits in. + +**Untrusted Input:** + +By default input is treated as untrusted, this means the sizes that are encoded in the CBOR are not blindly trusted. +If you were to trust these sizes, and allocate space for them an attacker would be able to cause massive allocations with small payloads. + +The decoder has a `max_pre_alloc` field that specifies the maximum amount of bytes (roughly) to pre allocate, a KiB by default. + +This does mean reallocations are more common though, you can, if you know the input is trusted, add the `.Trusted_Input` flag to the decoder. + +**Tags:** + +CBOR describes tags that you can wrap values with to assign a number to describe what type of data will follow. + +More information and a list of default tags can be found here: [[RFC 8949 Section 3.4;https://www.rfc-editor.org/rfc/rfc8949.html#name-tagging-of-items]]. + +A list of registered extension types can be found here: [[IANA CBOR assignments;https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml]]. + +Tags can either be assigned to a distinct Odin type (used by default), +or be used with struct tags (`cbor_tag:"base64"`, or `cbor_tag:"1"` for example). + +By default, the following tags are supported/provided by this implementation: + +- *1/epoch*: Assign this tag to `time.Time` or integer fields to use the defined seconds since epoch format. + +- *24/cbor*: Assign this tag to string or byte fields to store encoded CBOR (not decoding it). + +- *34/base64*: Assign this tag to string or byte fields to store and decode the contents in base64. + +- *2 & 3*: Used automatically by the implementation to encode and decode big numbers into/from `core:math/big`. + +- *55799*: Self described CBOR, used when `.Self_Described_CBOR` flag is used to wrap the entire binary. + This shows other implementations that we are dealing with CBOR by just looking at the first byte of input. + +- *1010*: An extension tag that defines a string type followed by its value, this is used by this implementation to support Odin's unions. + +Users can provide their own tag implementations using the `cbor.tag_register_type(...)` to register a tag for a distinct Odin type +used automatically when it is encountered during marshal and unmarshal. +Or with `cbor.tag_register_number(...)` to register a tag number along with an identifier for convenience that can be used with struct tags, +e.g. `cbor_tag:"69"` or `cbor_tag:"my_tag"`. + +You can look at the default tags provided for pointers on how these implementations work. + +Example: + package main + + import "core:encoding/cbor" + import "core:fmt" + import "core:time" + + Possibilities :: union { + string, + int, + } + + Data :: struct { + str: string, + neg: cbor.Negative_U16, // Store a CBOR value directly. + now: time.Time `cbor_tag:"epoch"`, // Wrapped in the epoch tag. + ignore_this: ^Data `cbor:"-"`, // Ignored by implementation. + renamed: f32 `cbor:"renamed :)"`, // Renamed when encoded. + my_union: Possibilities, // Union support. + } + + main :: proc() { + now := time.Time{_nsec = 1701117968 * 1e9} + + data := Data{ + str = "Hello, World!", + neg = 300, + now = now, + ignore_this = &Data{}, + renamed = 123123.125, + my_union = 3, + } + + // Marshal the struct into binary CBOR. + binary, err := cbor.marshal(data, cbor.ENCODE_FULLY_DETERMINISTIC) + assert(err == nil) + defer delete(binary) + + // Decode the binary data into a `cbor.Value`. + decoded, derr := cbor.decode(string(binary)) + assert(derr == nil) + defer cbor.destroy(decoded) + + // Turn the CBOR into a human readable representation. + diagnosis, eerr := cbor.diagnose(decoded) + assert(eerr == nil) + defer delete(diagnosis) + + fmt.println(diagnosis) + } + +Output: + { + "my_union": 1010([ + "int", + 3 + ]), + "neg": -301, + "now": 1(1701117968), + "renamed :)": 123123.12500000, + "str": "Hello, World!" + } +*/ +package cbor + diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index b7c47f252..4a0619c04 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -78,7 +78,7 @@ marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Mars marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { e := e - err_conv(ENCODE_PROGRESS_GUARD(&e)) or_return + err_conv(_ENCODE_PROGRESS_GUARD(&e)) or_return if v == nil { return _encode_nil(e.writer) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 98ef06635..0acb48083 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -53,7 +53,7 @@ unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, all unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator) -> (err: Unmarshal_Error) { d := d - DECODE_PROGRESS_GUARD(&d) + _DECODE_PROGRESS_GUARD(&d) err = _unmarshal_any_ptr(d, ptr, allocator=allocator) -- cgit v1.2.3 From c4e45d509a25ad1d341a5519606ddff59bfeb64e Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 17 Jan 2024 00:03:35 +0100 Subject: encoding/cbor: adhere to new quaternion rules of master --- core/encoding/cbor/unmarshal.odin | 6 +++--- tests/core/encoding/cbor/test_core_cbor.odin | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 0acb48083..eec999c12 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -903,9 +903,9 @@ _assign_float :: proc(val: any, f: $T) -> bool { case complex64: dst = complex(f32(f), 0) case complex128: dst = complex(f64(f), 0) - case quaternion64: dst = quaternion(f16(f), 0, 0, 0) - case quaternion128: dst = quaternion(f32(f), 0, 0, 0) - case quaternion256: dst = quaternion(f64(f), 0, 0, 0) + case quaternion64: dst = quaternion(w=f16(f), x=0, y=0, z=0) + case quaternion128: dst = quaternion(w=f32(f), x=0, y=0, z=0) + case quaternion256: dst = quaternion(w=f64(f), x=0, y=0, z=0) case: return false } diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 691a0a5ec..e7a3ef419 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -177,7 +177,7 @@ test_marshalling :: proc(t: ^testing.T) { ennie = .EFoo, ennieb = {.EBar}, - quat = quaternion(16, 17, 18, 19), + quat = quaternion(w=16, x=17, y=18, z=19), comp = complex(32, 33), important = '!', -- cgit v1.2.3 From a664d9804f64f7f9d6cb4a8bbe2e618297663c60 Mon Sep 17 00:00:00 2001 From: Laytan Date: Tue, 6 Feb 2024 19:17:07 +0100 Subject: encoding/cbor: remove usage of incl_elem and excl_elem --- core/encoding/cbor/coding.odin | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index a9bb6e408..a5f21af1f 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -262,7 +262,7 @@ _DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Are } is_begin = true - incl_elem(&d.flags, Decoder_Flag._In_Progress) + d.flags |= { ._In_Progress } if context.allocator != context.temp_allocator { tmp = runtime.default_temp_allocator_temp_begin() @@ -280,7 +280,7 @@ _decode_progress_end :: proc(d: ^Decoder, is_begin: bool, tmp: runtime.Arena_Tem return } - excl_elem(&d.flags, Decoder_Flag._In_Progress) + d.flags &~= { ._In_Progress } runtime.default_temp_allocator_temp_end(tmp) } @@ -292,7 +292,7 @@ _ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Are } is_begin = true - incl_elem(&e.flags, Encoder_Flag._In_Progress) + e.flags |= { ._In_Progress } if context.allocator != context.temp_allocator { tmp = runtime.default_temp_allocator_temp_begin() @@ -310,7 +310,7 @@ _encode_progress_end :: proc(e: ^Encoder, is_begin: bool, tmp: runtime.Arena_Tem return } - excl_elem(&e.flags, Encoder_Flag._In_Progress) + e.flags &~= { ._In_Progress } runtime.default_temp_allocator_temp_end(tmp) } -- cgit v1.2.3 From 0076c07076783e5256a501e9dc37a803757ea577 Mon Sep 17 00:00:00 2001 From: Laytan Date: Tue, 6 Feb 2024 19:20:18 +0100 Subject: encoding/cbor: core -> base --- core/encoding/cbor/cbor.odin | 3 ++- core/encoding/cbor/coding.odin | 5 +++-- core/encoding/cbor/marshal.odin | 5 +++-- core/encoding/cbor/tags.odin | 3 ++- core/encoding/cbor/unmarshal.odin | 5 +++-- tests/core/encoding/base64/base64.odin | 3 ++- tests/core/encoding/cbor/test_core_cbor.odin | 3 ++- 7 files changed, 17 insertions(+), 10 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 7e0f4ea1a..f879a11aa 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -1,7 +1,8 @@ package cbor +import "base:intrinsics" + import "core:encoding/json" -import "core:intrinsics" import "core:io" import "core:mem" import "core:strconv" diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index a5f21af1f..5719078c7 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -1,10 +1,11 @@ package cbor +import "base:intrinsics" +import "base:runtime" + import "core:bytes" import "core:encoding/endian" -import "core:intrinsics" import "core:io" -import "core:runtime" import "core:slice" import "core:strings" diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index 4a0619c04..7d93088cb 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -1,11 +1,12 @@ package cbor +import "base:intrinsics" +import "base:runtime" + import "core:bytes" -import "core:intrinsics" import "core:io" import "core:mem" import "core:reflect" -import "core:runtime" import "core:slice" import "core:strconv" import "core:strings" diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index efe724f8c..c9ddaed56 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -1,12 +1,13 @@ package cbor +import "base:runtime" + import "core:encoding/base64" import "core:io" import "core:math" import "core:math/big" import "core:mem" import "core:reflect" -import "core:runtime" import "core:strings" import "core:time" diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index eec999c12..eef5d3d99 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -1,10 +1,11 @@ package cbor -import "core:intrinsics" +import "base:intrinsics" +import "base:runtime" + import "core:io" import "core:mem" import "core:reflect" -import "core:runtime" import "core:strings" import "core:unicode/utf8" diff --git a/tests/core/encoding/base64/base64.odin b/tests/core/encoding/base64/base64.odin index 41dbba683..e48eea020 100644 --- a/tests/core/encoding/base64/base64.odin +++ b/tests/core/encoding/base64/base64.odin @@ -1,8 +1,9 @@ package test_encoding_base64 +import "base:intrinsics" + import "core:encoding/base64" import "core:fmt" -import "core:intrinsics" import "core:os" import "core:reflect" import "core:testing" diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index e7a3ef419..8262e5da4 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -1,9 +1,10 @@ package test_encoding_cbor +import "base:intrinsics" + import "core:bytes" import "core:encoding/cbor" import "core:fmt" -import "core:intrinsics" import "core:io" import "core:math/big" import "core:mem" -- cgit v1.2.3 From b11d839fb6dab106a557cf65257e31644a84725d Mon Sep 17 00:00:00 2001 From: Laytan Date: Tue, 6 Feb 2024 20:13:30 +0100 Subject: encoding/cbor: make temp allocations more explicit --- core/encoding/cbor/coding.odin | 108 +++++++-------------------- core/encoding/cbor/doc.odin | 12 ++- core/encoding/cbor/marshal.odin | 37 +++++---- core/encoding/cbor/unmarshal.odin | 19 +++-- tests/core/encoding/cbor/test_core_cbor.odin | 2 +- 5 files changed, 63 insertions(+), 115 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 5719078c7..abb832ccf 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -26,9 +26,6 @@ Encoder_Flag :: enum { // NOTE: In order to do this, all keys of a map have to be pre-computed, sorted, and // then written, this involves temporary allocations for the keys and a copy of the map itself. Deterministic_Map_Sorting, - - // Internal flag to do initialization. - _In_Progress, } Encoder_Flags :: bit_set[Encoder_Flag] @@ -40,8 +37,9 @@ ENCODE_FULLY_DETERMINISTIC :: Encoder_Flags{.Deterministic_Int_Size, .Determinis ENCODE_SMALL :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size} Encoder :: struct { - flags: Encoder_Flags, - writer: io.Writer, + flags: Encoder_Flags, + writer: io.Writer, + temp_allocator: runtime.Allocator, } Decoder_Flag :: enum { @@ -56,9 +54,6 @@ Decoder_Flag :: enum { // Makes the decoder shrink of excess capacity from allocated buffers/containers before returning. Shrink_Excess, - - // Internal flag to do initialization. - _In_Progress, } Decoder_Flags :: bit_set[Decoder_Flag] @@ -122,7 +117,9 @@ decode_from_decoder :: proc(d: Decoder, allocator := context.allocator) -> (v: V d := d - _DECODE_PROGRESS_GUARD(&d) + if d.max_pre_alloc <= 0 { + d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC + } v, err = _decode_from_decoder(d) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. @@ -191,7 +188,7 @@ have to be precomputed, sorted and only then written to the output. Empty flags will do nothing extra to the value. -The allocations for the `.Deterministic_Map_Sorting` flag are done using the `context.temp_allocator` +The allocations for the `.Deterministic_Map_Sorting` flag are done using the given temp_allocator. but are followed by the necessary `delete` and `free` calls if the allocator supports them. This is helpful when the CBOR size is so big that you don't want to collect all the temporary allocations until the end. @@ -206,22 +203,22 @@ encode :: encode_into // Encodes the CBOR value into binary CBOR allocated on the given allocator. // See the docs on the proc group `encode_into` for more info. -encode_into_bytes :: proc(v: Value, flags := ENCODE_SMALL, allocator := context.allocator) -> (data: []byte, err: Encode_Error) { +encode_into_bytes :: proc(v: Value, flags := ENCODE_SMALL, allocator := context.allocator, temp_allocator := context.temp_allocator) -> (data: []byte, err: Encode_Error) { b := strings.builder_make(allocator) or_return - encode_into_builder(&b, v, flags) or_return + encode_into_builder(&b, v, flags, temp_allocator) or_return return b.buf[:], nil } // Encodes the CBOR value into binary CBOR written to the given builder. // See the docs on the proc group `encode_into` for more info. -encode_into_builder :: proc(b: ^strings.Builder, v: Value, flags := ENCODE_SMALL) -> Encode_Error { - return encode_into_writer(strings.to_stream(b), v, flags) +encode_into_builder :: proc(b: ^strings.Builder, v: Value, flags := ENCODE_SMALL, temp_allocator := context.temp_allocator) -> Encode_Error { + return encode_into_writer(strings.to_stream(b), v, flags, temp_allocator) } // Encodes the CBOR value into binary CBOR written to the given writer. // See the docs on the proc group `encode_into` for more info. -encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Encode_Error { - return encode_into_encoder(Encoder{flags, w}, v) +encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL, temp_allocator := context.temp_allocator) -> Encode_Error { + return encode_into_encoder(Encoder{flags, w, temp_allocator}, v) } // Encodes the CBOR value into binary CBOR written to the given encoder. @@ -229,8 +226,15 @@ encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Enc encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { e := e - _ENCODE_PROGRESS_GUARD(&e) or_return - + if e.temp_allocator.procedure == nil { + e.temp_allocator = context.temp_allocator + } + + if .Self_Described_CBOR in e.flags { + _encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag) or_return + e.flags &~= { .Self_Described_CBOR } + } + switch v_spec in v { case u8: return _encode_u8(e.writer, v_spec, .Unsigned) case u16: return _encode_u16(e, v_spec, .Unsigned) @@ -256,66 +260,6 @@ encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { } } -@(deferred_in_out=_decode_progress_end) -_DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Arena_Temp) { - if ._In_Progress in d.flags { - return - } - is_begin = true - - d.flags |= { ._In_Progress } - - if context.allocator != context.temp_allocator { - tmp = runtime.default_temp_allocator_temp_begin() - } - - if d.max_pre_alloc <= 0 { - d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC - } - - return -} - -_decode_progress_end :: proc(d: ^Decoder, is_begin: bool, tmp: runtime.Arena_Temp) { - if !is_begin { - return - } - - d.flags &~= { ._In_Progress } - - runtime.default_temp_allocator_temp_end(tmp) -} - -@(deferred_in_out=_encode_progress_end) -_ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { - if ._In_Progress in e.flags { - return - } - is_begin = true - - e.flags |= { ._In_Progress } - - if context.allocator != context.temp_allocator { - tmp = runtime.default_temp_allocator_temp_begin() - } - - if .Self_Described_CBOR in e.flags { - _encode_u64(e^, TAG_SELF_DESCRIBED_CBOR, .Tag) or_return - } - - return -} - -_encode_progress_end :: proc(e: ^Encoder, is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { - if !is_begin || err != nil { - return - } - - e.flags &~= { ._In_Progress } - - runtime.default_temp_allocator_temp_end(tmp) -} - _decode_header :: proc(r: io.Reader) -> (hdr: Header, err: io.Error) { hdr = Header(_decode_u8(r) or_return) return @@ -602,13 +546,13 @@ _encode_map :: proc(e: Encoder, m: Map) -> (err: Encode_Error) { entry: Map_Entry, } - entries := make([]Map_Entry_With_Key, len(m), context.temp_allocator) or_return - defer delete(entries, context.temp_allocator) + entries := make([]Map_Entry_With_Key, len(m), e.temp_allocator) or_return + defer delete(entries, e.temp_allocator) for &entry, i in entries { entry.entry = m[i] - buf := strings.builder_make(context.temp_allocator) or_return + buf := strings.builder_make(e.temp_allocator) or_return ke := e ke.writer = strings.to_stream(&buf) @@ -624,7 +568,7 @@ _encode_map :: proc(e: Encoder, m: Map) -> (err: Encode_Error) { for entry in entries { io.write_full(e.writer, entry.encoded_key) or_return - delete(entry.encoded_key, context.temp_allocator) + delete(entry.encoded_key, e.temp_allocator) encode(e, entry.entry.value) or_return } diff --git a/core/encoding/cbor/doc.odin b/core/encoding/cbor/doc.odin index efcad5c9e..ee8ba23a0 100644 --- a/core/encoding/cbor/doc.odin +++ b/core/encoding/cbor/doc.odin @@ -4,23 +4,21 @@ Also provided are conversion to and from JSON and the CBOR diagnostic format. **Allocations:** -In general, when in the following table it says allocations are done on the `context.temp_allocator`, these allocations +In general, when in the following table it says allocations are done on the `temp_allocator`, these allocations are still attempted to be deallocated. -This allows you to use an allocator with freeing implemented as the `context.temp_allocator` which is handy with big CBOR. +This allows you to use an allocator with freeing implemented as the `temp_allocator` which is handy with big CBOR. -If you use the default `context.temp_allocator` it will be returned back to its state when the process (en/decoding, (un)marshal) started. - -- *Encoding*: If the `.Deterministic_Map_Sorting` flag is set on the encoder, this allocates on `context.temp_allocator` +- *Encoding*: If the `.Deterministic_Map_Sorting` flag is set on the encoder, this allocates on the given `temp_allocator` some space for the keys of maps in order to sort them and then write them. Other than that there are no allocations (only for the final bytes if you use `cbor.encode_into_bytes`. - *Decoding*: Allocates everything on the given allocator and input given can be deleted after decoding. - *No* allocations are done on the `context.temp_allocator`. + *No* temporary allocations are done. - *Marshal*: Same allocation strategy as encoding. - *Unmarshal*: Allocates everything on the given allocator and input given can be deleted after unmarshalling. - Some temporary allocations are done on the `context.temp_allocator`. + Some temporary allocations are done on the given `temp_allocator`. **Determinism:** diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index 7d93088cb..2ffb6b5b4 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -29,7 +29,7 @@ have to be precomputed, sorted and only then written to the output. Empty flags will do nothing extra to the value. -The allocations for the `.Deterministic_Map_Sorting` flag are done using the `context.temp_allocator` +The allocations for the `.Deterministic_Map_Sorting` flag are done using the given `temp_allocator`. but are followed by the necessary `delete` and `free` calls if the allocator supports them. This is helpful when the CBOR size is so big that you don't want to collect all the temporary allocations until the end. @@ -45,7 +45,7 @@ marshal :: marshal_into // Marshals the given value into a CBOR byte stream (allocated using the given allocator). // See docs on the `marshal_into` proc group for more info. -marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.allocator) -> (bytes: []byte, err: Marshal_Error) { +marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.allocator, temp_allocator := context.temp_allocator) -> (bytes: []byte, err: Marshal_Error) { b, alloc_err := strings.builder_make(allocator) // The builder as a stream also returns .EOF if it ran out of memory so this is consistent. if alloc_err != nil { @@ -54,7 +54,7 @@ marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.a defer if err != nil { strings.builder_destroy(&b) } - if err = marshal_into_builder(&b, v, flags); err != nil { + if err = marshal_into_builder(&b, v, flags, temp_allocator); err != nil { return } @@ -63,14 +63,14 @@ marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.a // Marshals the given value into a CBOR byte stream written to the given builder. // See docs on the `marshal_into` proc group for more info. -marshal_into_builder :: proc(b: ^strings.Builder, v: any, flags := ENCODE_SMALL) -> Marshal_Error { - return marshal_into_writer(strings.to_writer(b), v, flags) +marshal_into_builder :: proc(b: ^strings.Builder, v: any, flags := ENCODE_SMALL, temp_allocator := context.temp_allocator) -> Marshal_Error { + return marshal_into_writer(strings.to_writer(b), v, flags, temp_allocator) } // Marshals the given value into a CBOR byte stream written to the given writer. // See docs on the `marshal_into` proc group for more info. -marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Marshal_Error { - encoder := Encoder{flags, w} +marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL, temp_allocator := context.temp_allocator) -> Marshal_Error { + encoder := Encoder{flags, w, temp_allocator} return marshal_into_encoder(encoder, v) } @@ -79,7 +79,14 @@ marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Mars marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { e := e - err_conv(_ENCODE_PROGRESS_GUARD(&e)) or_return + if e.temp_allocator.procedure == nil { + e.temp_allocator = context.temp_allocator + } + + if .Self_Described_CBOR in e.flags { + err_conv(_encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag)) or_return + e.flags &~= { .Self_Described_CBOR } + } if v == nil { return _encode_nil(e.writer) @@ -321,7 +328,7 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { switch info.key.id { case string: - entries := make([dynamic]Encoded_Entry_Fast(^[]byte), 0, map_cap, context.temp_allocator) or_return + entries := make([dynamic]Encoded_Entry_Fast(^[]byte), 0, map_cap, e.temp_allocator) or_return defer delete(entries) for bucket_index in 0.. (err: Marshal_Error) { return case cstring: - entries := make([dynamic]Encoded_Entry_Fast(^cstring), 0, map_cap, context.temp_allocator) or_return + entries := make([dynamic]Encoded_Entry_Fast(^cstring), 0, map_cap, e.temp_allocator) or_return defer delete(entries) for bucket_index in 0.. (err: Marshal_Error) { return case: - entries := make([dynamic]Encoded_Entry, 0, map_cap, context.temp_allocator) or_return + entries := make([dynamic]Encoded_Entry, 0, map_cap, e.temp_allocator) or_return defer delete(entries) for bucket_index in 0.. (err: Marshal_Error) { name: string, field: int, } - entries := make([dynamic]Name, 0, n, context.temp_allocator) or_return + entries := make([dynamic]Name, 0, n, e.temp_allocator) or_return defer delete(entries) for name, i in info.names { @@ -530,7 +537,7 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { case reflect.Type_Info_Named: err_conv(_encode_text(e, vt.name)) or_return case: - builder := strings.builder_make(context.temp_allocator) or_return + builder := strings.builder_make(e.temp_allocator) or_return defer strings.builder_destroy(&builder) reflect.write_type(&builder, vti) err_conv(_encode_text(e, strings.to_string(builder))) or_return diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index eef5d3d99..6e7f3c0bb 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -13,7 +13,7 @@ import "core:unicode/utf8" Unmarshals the given CBOR into the given pointer using reflection. Types that require allocation are allocated using the given allocator. -Some temporary allocations are done on the `context.temp_allocator`, but, if you want to, +Some temporary allocations are done on the given `temp_allocator`, but, if you want to, this can be set to a "normal" allocator, because the necessary `delete` and `free` calls are still made. This is helpful when the CBOR size is so big that you don't want to collect all the temporary allocations until the end. @@ -31,8 +31,8 @@ unmarshal :: proc { unmarshal_from_string, } -unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator) -> (err: Unmarshal_Error) { - err = unmarshal_from_decoder(Decoder{ DEFAULT_MAX_PRE_ALLOC, flags, r }, ptr, allocator=allocator) +unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator, temp_allocator := context.temp_allocator) -> (err: Unmarshal_Error) { + err = unmarshal_from_decoder(Decoder{ DEFAULT_MAX_PRE_ALLOC, flags, r }, ptr, allocator, temp_allocator) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. if err == .EOF { err = .Unexpected_EOF } @@ -40,23 +40,21 @@ unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, flags := Decoder_Flags{}, } // Unmarshals from a string, see docs on the proc group `Unmarshal` for more info. -unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator) -> (err: Unmarshal_Error) { +unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator, temp_allocator := context.temp_allocator) -> (err: Unmarshal_Error) { sr: strings.Reader r := strings.to_reader(&sr, s) - err = unmarshal_from_reader(r, ptr, flags, allocator) + err = unmarshal_from_reader(r, ptr, flags, allocator, temp_allocator) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. if err == .EOF { err = .Unexpected_EOF } return } -unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator) -> (err: Unmarshal_Error) { +unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator, temp_allocator := context.temp_allocator) -> (err: Unmarshal_Error) { d := d - _DECODE_PROGRESS_GUARD(&d) - - err = _unmarshal_any_ptr(d, ptr, allocator=allocator) + err = _unmarshal_any_ptr(d, ptr, nil, allocator, temp_allocator) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. if err == .EOF { err = .Unexpected_EOF } @@ -64,8 +62,9 @@ unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.alloca } -_unmarshal_any_ptr :: proc(d: Decoder, v: any, hdr: Maybe(Header) = nil, allocator := context.allocator) -> Unmarshal_Error { +_unmarshal_any_ptr :: proc(d: Decoder, v: any, hdr: Maybe(Header) = nil, allocator := context.allocator, temp_allocator := context.temp_allocator) -> Unmarshal_Error { context.allocator = allocator + context.temp_allocator = temp_allocator v := v if v == nil || v.id == nil { diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 8262e5da4..60c122a69 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -855,7 +855,7 @@ expect_float :: proc(t: ^testing.T, encoded: string, expected: $T, loc := #calle buf: bytes.Buffer stream := bytes.buffer_to_stream(&buf) -encoder := cbor.Encoder{cbor.ENCODE_FULLY_DETERMINISTIC, stream} +encoder := cbor.Encoder{cbor.ENCODE_FULLY_DETERMINISTIC, stream, {}} expect_encoding :: proc(t: ^testing.T, val: cbor.Value, encoded: string, loc := #caller_location) { bytes.buffer_reset(&buf) -- cgit v1.2.3 From 2a39c60fe4988339a910828ba6dcb022e3086d7a Mon Sep 17 00:00:00 2001 From: Laytan Date: Tue, 6 Feb 2024 20:37:19 +0100 Subject: encoding/cbor: respect default to panic allocator --- core/encoding/cbor/tags.odin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index c9ddaed56..040ce2458 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -96,7 +96,7 @@ tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string // Controls initialization of default tag implementations. // JS and WASI default to a panic allocator so we don't want to do it on those. -INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, !ODIN_DEFAULT_TO_NIL_ALLOCATOR && ODIN_OS != .JS && ODIN_OS != .WASI) +INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, !ODIN_DEFAULT_TO_PANIC_ALLOCATOR && !ODIN_DEFAULT_TO_NIL_ALLOCATOR) @(private, init, disabled=!INITIALIZE_DEFAULT_TAGS) tags_initialize_defaults :: proc() { -- cgit v1.2.3 From 04bd3cc525e5ef366043ace552bd0f3aa7cdd4b8 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Mon, 12 Feb 2024 20:17:24 +0100 Subject: encoding/cbor: rename `diagnose` to `to_diagnostic_format` to be clearer --- core/encoding/cbor/cbor.odin | 24 ++++++++++++------------ core/encoding/cbor/doc.odin | 4 ++-- tests/core/encoding/cbor/test_core_cbor.odin | 14 +++++++------- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index f879a11aa..defae4163 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -307,23 +307,23 @@ destroy :: proc(val: Value, allocator := context.allocator) { } /* -diagnose either writes or returns a human-readable representation of the value, -optionally formatted, defined as the diagnostic format in section 8 of RFC 8949. +to_diagnostic_format either writes or returns a human-readable representation of the value, +optionally formatted, defined as the diagnostic format in [[RFC 8949 Section 8;https://www.rfc-editor.org/rfc/rfc8949.html#name-diagnostic-notation]]. Incidentally, if the CBOR does not contain any of the additional types defined on top of JSON this will also be valid JSON. */ -diagnose :: proc { - diagnostic_string, - diagnose_to_writer, +to_diagnostic_format :: proc { + to_diagnostic_format_string, + to_diagnostic_format_writer, } // Turns the given CBOR value into a human-readable string. // See docs on the proc group `diagnose` for more info. -diagnostic_string :: proc(val: Value, padding := 0, allocator := context.allocator) -> (string, mem.Allocator_Error) #optional_allocator_error { +to_diagnostic_format_string :: proc(val: Value, padding := 0, allocator := context.allocator) -> (string, mem.Allocator_Error) #optional_allocator_error { b := strings.builder_make(allocator) w := strings.to_stream(&b) - err := diagnose_to_writer(w, val, padding) + err := to_diagnostic_format_writer(w, val, padding) if err == .EOF { // The string builder stream only returns .EOF, and only if it can't write (out of memory). return "", .Out_Of_Memory @@ -335,7 +335,7 @@ diagnostic_string :: proc(val: Value, padding := 0, allocator := context.allocat // Writes the given CBOR value into the writer as human-readable text. // See docs on the proc group `diagnose` for more info. -diagnose_to_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { +to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { @(require_results) indent :: proc(padding: int) -> int { padding := padding @@ -421,7 +421,7 @@ diagnose_to_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { newline(w, padding) or_return for entry, i in v { - diagnose(w, entry, padding) or_return + to_diagnostic_format(w, entry, padding) or_return if i != len(v)-1 { comma(w, padding) or_return newline(w, padding) or_return @@ -444,9 +444,9 @@ diagnose_to_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { newline(w, padding) or_return for entry, i in v { - diagnose(w, entry.key, padding) or_return + to_diagnostic_format(w, entry.key, padding) or_return io.write_string(w, ": ") or_return - diagnose(w, entry.value, padding) or_return + to_diagnostic_format(w, entry.value, padding) or_return if i != len(v)-1 { comma(w, padding) or_return newline(w, padding) or_return @@ -460,7 +460,7 @@ diagnose_to_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { case ^Tag: io.write_u64(w, v.number) or_return io.write_string(w, "(") or_return - diagnose(w, v.value, padding) or_return + to_diagnostic_format(w, v.value, padding) or_return io.write_string(w, ")") or_return case Simple: io.write_string(w, "simple(") or_return diff --git a/core/encoding/cbor/doc.odin b/core/encoding/cbor/doc.odin index ee8ba23a0..77eac51cb 100644 --- a/core/encoding/cbor/doc.odin +++ b/core/encoding/cbor/doc.odin @@ -117,8 +117,8 @@ Example: assert(derr == nil) defer cbor.destroy(decoded) - // Turn the CBOR into a human readable representation. - diagnosis, eerr := cbor.diagnose(decoded) + // Turn the CBOR into a human readable representation defined as the diagnostic format in [[RFC 8949 Section 8;https://www.rfc-editor.org/rfc/rfc8949.html#name-diagnostic-notation]]. + diagnosis, eerr := cbor.to_diagnostic_format(decoded) assert(eerr == nil) defer delete(diagnosis) diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 60c122a69..72244e1d3 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -212,7 +212,7 @@ test_marshalling :: proc(t: ^testing.T) { ev(t, derr, nil) defer cbor.destroy(decoded) - diagnosis, eerr := cbor.diagnose(decoded) + diagnosis, eerr := cbor.to_diagnostic_format(decoded) ev(t, eerr, nil) defer delete(diagnosis) @@ -379,7 +379,7 @@ test_marshalling_maybe :: proc(t: ^testing.T) { val, derr := cbor.decode(string(data)) expect_value(t, derr, nil) - expect_value(t, cbor.diagnose(val), "1") + expect_value(t, cbor.to_diagnostic_format(val), "1") maybe_dest: Maybe(int) uerr := cbor.unmarshal(string(data), &maybe_dest) @@ -396,7 +396,7 @@ test_marshalling_nil_maybe :: proc(t: ^testing.T) { val, derr := cbor.decode(string(data)) expect_value(t, derr, nil) - expect_value(t, cbor.diagnose(val), "nil") + expect_value(t, cbor.to_diagnostic_format(val), "nil") maybe_dest: Maybe(int) uerr := cbor.unmarshal(string(data), &maybe_dest) @@ -432,7 +432,7 @@ test_marshalling_union :: proc(t: ^testing.T) { val, derr := cbor.decode(string(data)) expect_value(t, derr, nil) - expect_value(t, cbor.diagnose(val, -1), `1010(["My_Distinct", "Hello, World!"])`) + expect_value(t, cbor.to_diagnostic_format(val, -1), `1010(["My_Distinct", "Hello, World!"])`) dest: My_Union uerr := cbor.unmarshal(string(data), &dest) @@ -455,7 +455,7 @@ test_marshalling_union :: proc(t: ^testing.T) { val, derr := cbor.decode(string(data)) expect_value(t, derr, nil) - expect_value(t, cbor.diagnose(val, -1), `1010(["My_Struct", {"my_enum": 1}])`) + expect_value(t, cbor.to_diagnostic_format(val, -1), `1010(["My_Struct", {"my_enum": 1}])`) dest: My_Union_No_Nil uerr := cbor.unmarshal(string(data), &dest) @@ -810,7 +810,7 @@ expect_decoding :: proc(t: ^testing.T, encoded: string, decoded: string, type: t expect_value(t, reflect.union_variant_typeid(res), type, loc) expect_value(t, err, nil, loc) - str := cbor.diagnose(res, padding=-1) + str := cbor.to_diagnostic_format(res, padding=-1) defer delete(str) expect_value(t, str, decoded, loc) @@ -825,7 +825,7 @@ expect_tag :: proc(t: ^testing.T, encoded: string, nr: cbor.Tag_Number, value_de if tag, is_tag := res.(^cbor.Tag); is_tag { expect_value(t, tag.number, nr, loc) - str := cbor.diagnose(tag, padding=-1) + str := cbor.to_diagnostic_format(tag, padding=-1) defer delete(str) expect_value(t, str, value_decoded, loc) -- cgit v1.2.3 From 38c1fd58241ca3da4f539958b4cc10574b641138 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 15 Apr 2024 14:35:51 +0100 Subject: Keep -vet happy --- core/encoding/cbor/cbor.odin | 4 ++-- core/encoding/cbor/coding.odin | 6 ++++-- core/encoding/cbor/marshal.odin | 13 +++++-------- core/encoding/cbor/unmarshal.odin | 12 ++++++------ 4 files changed, 17 insertions(+), 18 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index defae4163..550cf87fd 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -428,7 +428,7 @@ to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> i } } - padding := dedent(padding) + padding = dedent(padding) newline(w, padding) or_return io.write_string(w, "]") or_return @@ -453,7 +453,7 @@ to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> i } } - padding := dedent(padding) + padding = dedent(padding) newline(w, padding) or_return io.write_string(w, "}") or_return diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index abb832ccf..11db994da 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -377,6 +377,7 @@ _decode_bytes_ptr :: proc(d: Decoder, add: Add, type: Major = .Bytes) -> (v: ^By _decode_bytes :: proc(d: Decoder, add: Add, type: Major = .Bytes, allocator := context.allocator) -> (v: Bytes, err: Decode_Error) { context.allocator = allocator + add := add n, scap := _decode_len_str(d, add) or_return buf := strings.builder_make(0, scap) or_return @@ -385,8 +386,9 @@ _decode_bytes :: proc(d: Decoder, add: Add, type: Major = .Bytes, allocator := c if n == -1 { indefinite_loop: for { - header := _decode_header(d.reader) or_return - maj, add := _header_split(header) + header := _decode_header(d.reader) or_return + maj: Major + maj, add = _header_split(header) #partial switch maj { case type: iter_n, iter_cap := _decode_len_str(d, add) or_return diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index 2ffb6b5b4..87e91bbd8 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -208,7 +208,6 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { } case runtime.Type_Info_Boolean: - val: bool switch b in a { case bool: return _encode_bool(e.writer, b) case b8: return _encode_bool(e.writer, bool(b)) @@ -231,7 +230,7 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { return case runtime.Type_Info_Enumerated_Array: - index := runtime.type_info_base(info.index).variant.(runtime.Type_Info_Enum) + // index := runtime.type_info_base(info.index).variant.(runtime.Type_Info_Enum) err_conv(_encode_u64(e, u64(info.count), .Array)) or_return for i in 0.. (err: Marshal_Error) { entries := make([dynamic]Name, 0, n, e.temp_allocator) or_return defer delete(entries) - for name, i in info.names { + for _, i in info.names { fname := field_name(info, i) if fname == "-" { continue @@ -498,7 +497,7 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { marshal_entry(e, info, v, entry.name, entry.field) or_return } } else { - for name, i in info.names { + for _, i in info.names { fname := field_name(info, i) if fname == "-" { continue @@ -514,14 +513,12 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { case Value: return err_conv(encode(e, vv)) } - tag := reflect.get_union_variant_raw_tag(v) - if v.data == nil || tag <= 0 { + id := reflect.union_variant_typeid(v) + if v.data == nil || id == nil { return _encode_nil(e.writer) } - id := info.variants[tag-1].id if len(info.variants) == 1 { - id := info.variants[tag-1].id return marshal_into(e, any{v.data, id}) } diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 6e7f3c0bb..5480b9125 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -518,7 +518,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header return case reflect.Type_Info_Array: - _length, scap := err_conv(_decode_len_container(d, add)) or_return + _, scap := err_conv(_decode_len_container(d, add)) or_return length := min(scap, t.count) if length > t.count { @@ -532,7 +532,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header return case reflect.Type_Info_Enumerated_Array: - _length, scap := err_conv(_decode_len_container(d, add)) or_return + _, scap := err_conv(_decode_len_container(d, add)) or_return length := min(scap, t.count) if length > t.count { @@ -546,7 +546,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header return case reflect.Type_Info_Complex: - _length, scap := err_conv(_decode_len_container(d, add)) or_return + _, scap := err_conv(_decode_len_container(d, add)) or_return length := min(scap, 2) if length > 2 { @@ -568,7 +568,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header return case reflect.Type_Info_Quaternion: - _length, scap := err_conv(_decode_len_container(d, add)) or_return + _, scap := err_conv(_decode_len_container(d, add)) or_return length := min(scap, 4) if length > 4 { @@ -628,7 +628,7 @@ _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, return _unsupported(v, hdr) } - length, scap := err_conv(_decode_len_container(d, add)) or_return + length, _ := err_conv(_decode_len_container(d, add)) or_return unknown := length == -1 fields := reflect.struct_fields_zipped(ti.id) @@ -672,7 +672,7 @@ _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, } field := fields[use_field_idx] - name := field.name + // name := field.name ptr := rawptr(uintptr(v.data) + field.offset) fany := any{ptr, field.type.id} _unmarshal_value(d, fany, _decode_header(r) or_return) or_return -- cgit v1.2.3 From 334e08c750a1eac3042b867c0e460b7bc516743e Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 12:17:39 +0100 Subject: Update numerous package declaration names --- core/compress/gzip/example.odin | 2 +- core/compress/gzip/gzip.odin | 2 +- core/compress/shoco/model.odin | 2 +- core/compress/shoco/shoco.odin | 2 +- core/compress/zlib/example.odin | 2 +- core/compress/zlib/zlib.odin | 2 +- core/container/bit_array/bit_array.odin | 2 +- core/container/bit_array/doc.odin | 2 +- core/encoding/base32/base32.odin | 2 +- core/encoding/base64/base64.odin | 2 +- core/encoding/cbor/cbor.odin | 2 +- core/encoding/cbor/coding.odin | 2 +- core/encoding/cbor/doc.odin | 2 +- core/encoding/cbor/marshal.odin | 2 +- core/encoding/cbor/tags.odin | 2 +- core/encoding/cbor/unmarshal.odin | 2 +- core/encoding/csv/reader.odin | 2 +- core/encoding/csv/writer.odin | 2 +- core/encoding/entity/entity.odin | 2 +- core/encoding/entity/generated.odin | 2 +- core/encoding/hex/hex.odin | 2 +- core/encoding/json/marshal.odin | 2 +- core/encoding/json/parser.odin | 2 +- core/encoding/json/tokenizer.odin | 2 +- core/encoding/json/types.odin | 2 +- core/encoding/json/unmarshal.odin | 2 +- core/encoding/json/validator.odin | 2 +- core/encoding/varint/doc.odin | 2 +- core/encoding/varint/leb128.odin | 2 +- core/encoding/xml/debug_print.odin | 2 +- core/encoding/xml/helpers.odin | 2 +- core/encoding/xml/tokenizer.odin | 2 +- core/encoding/xml/xml_reader.odin | 2 +- 33 files changed, 33 insertions(+), 33 deletions(-) (limited to 'core/encoding') diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin index 635134e40..09540aafc 100644 --- a/core/compress/gzip/example.odin +++ b/core/compress/gzip/example.odin @@ -1,5 +1,5 @@ //+build ignore -package gzip +package compress_gzip /* Copyright 2021 Jeroen van Rijn . diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin index 50945fc77..57ed3c3c5 100644 --- a/core/compress/gzip/gzip.odin +++ b/core/compress/gzip/gzip.odin @@ -1,4 +1,4 @@ -package gzip +package compress_gzip /* Copyright 2021 Jeroen van Rijn . diff --git a/core/compress/shoco/model.odin b/core/compress/shoco/model.odin index bbc38903d..f62236c00 100644 --- a/core/compress/shoco/model.odin +++ b/core/compress/shoco/model.odin @@ -5,7 +5,7 @@ */ // package shoco is an implementation of the shoco short string compressor -package shoco +package compress_shoco DEFAULT_MODEL :: Shoco_Model { min_char = 39, diff --git a/core/compress/shoco/shoco.odin b/core/compress/shoco/shoco.odin index e65acb0bc..3c1f412ba 100644 --- a/core/compress/shoco/shoco.odin +++ b/core/compress/shoco/shoco.odin @@ -9,7 +9,7 @@ */ // package shoco is an implementation of the shoco short string compressor -package shoco +package compress_shoco import "base:intrinsics" import "core:compress" diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin index 19017b279..fedd6671d 100644 --- a/core/compress/zlib/example.odin +++ b/core/compress/zlib/example.odin @@ -1,5 +1,5 @@ //+build ignore -package zlib +package compress_zlib /* Copyright 2021 Jeroen van Rijn . diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin index d4dc6e3d7..b7f381f2b 100644 --- a/core/compress/zlib/zlib.odin +++ b/core/compress/zlib/zlib.odin @@ -1,5 +1,5 @@ //+vet !using-param -package zlib +package compress_zlib /* Copyright 2021 Jeroen van Rijn . diff --git a/core/container/bit_array/bit_array.odin b/core/container/bit_array/bit_array.odin index dbd2e0d3a..a8720715c 100644 --- a/core/container/bit_array/bit_array.odin +++ b/core/container/bit_array/bit_array.odin @@ -1,4 +1,4 @@ -package dynamic_bit_array +package container_dynamic_bit_array import "base:intrinsics" import "core:mem" diff --git a/core/container/bit_array/doc.odin b/core/container/bit_array/doc.odin index 371f63f0e..77e1904a8 100644 --- a/core/container/bit_array/doc.odin +++ b/core/container/bit_array/doc.odin @@ -49,4 +49,4 @@ The Bit Array can be used in several ways: fmt.printf("Freed.\n") } */ -package dynamic_bit_array +package container_dynamic_bit_array diff --git a/core/encoding/base32/base32.odin b/core/encoding/base32/base32.odin index 7ab35afd0..962a3ead4 100644 --- a/core/encoding/base32/base32.odin +++ b/core/encoding/base32/base32.odin @@ -1,4 +1,4 @@ -package base32 +package encoding_base32 // @note(zh): Encoding utility for Base32 // A secondary param can be used to supply a custom alphabet to diff --git a/core/encoding/base64/base64.odin b/core/encoding/base64/base64.odin index 535d457d5..1013a7d0b 100644 --- a/core/encoding/base64/base64.odin +++ b/core/encoding/base64/base64.odin @@ -1,4 +1,4 @@ -package base64 +package encoding_base64 import "core:io" import "core:mem" diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 550cf87fd..d0e406ab1 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -1,4 +1,4 @@ -package cbor +package encoding_cbor import "base:intrinsics" diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 11db994da..0d276a7a1 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -1,4 +1,4 @@ -package cbor +package encoding_cbor import "base:intrinsics" import "base:runtime" diff --git a/core/encoding/cbor/doc.odin b/core/encoding/cbor/doc.odin index 77eac51cb..937b1b61b 100644 --- a/core/encoding/cbor/doc.odin +++ b/core/encoding/cbor/doc.odin @@ -137,5 +137,5 @@ Output: "str": "Hello, World!" } */ -package cbor +package encoding_cbor diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index 87e91bbd8..37c9dd180 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -1,4 +1,4 @@ -package cbor +package encoding_cbor import "base:intrinsics" import "base:runtime" diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index 040ce2458..3dc79a5dd 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -1,4 +1,4 @@ -package cbor +package encoding_cbor import "base:runtime" diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 5480b9125..a1524d9f4 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -1,4 +1,4 @@ -package cbor +package encoding_cbor import "base:intrinsics" import "base:runtime" diff --git a/core/encoding/csv/reader.odin b/core/encoding/csv/reader.odin index 44a9fdcc4..22eea9568 100644 --- a/core/encoding/csv/reader.odin +++ b/core/encoding/csv/reader.odin @@ -1,6 +1,6 @@ // package csv reads and writes comma-separated values (CSV) files. // This package supports the format described in RFC 4180 -package csv +package encoding_csv import "core:bufio" import "core:bytes" diff --git a/core/encoding/csv/writer.odin b/core/encoding/csv/writer.odin index d519104f2..46145ecc1 100644 --- a/core/encoding/csv/writer.odin +++ b/core/encoding/csv/writer.odin @@ -1,4 +1,4 @@ -package csv +package encoding_csv import "core:io" import "core:strings" diff --git a/core/encoding/entity/entity.odin b/core/encoding/entity/entity.odin index ec640c69f..cee6230ef 100644 --- a/core/encoding/entity/entity.odin +++ b/core/encoding/entity/entity.odin @@ -1,4 +1,4 @@ -package unicode_entity +package encoding_unicode_entity /* A unicode entity encoder/decoder diff --git a/core/encoding/entity/generated.odin b/core/encoding/entity/generated.odin index 3d1c02513..d2acde20d 100644 --- a/core/encoding/entity/generated.odin +++ b/core/encoding/entity/generated.odin @@ -1,4 +1,4 @@ -package unicode_entity +package encoding_unicode_entity /* ------ GENERATED ------ DO NOT EDIT ------ GENERATED ------ DO NOT EDIT ------ GENERATED ------ diff --git a/core/encoding/hex/hex.odin b/core/encoding/hex/hex.odin index ef0bab1d0..dbffe216b 100644 --- a/core/encoding/hex/hex.odin +++ b/core/encoding/hex/hex.odin @@ -1,4 +1,4 @@ -package hex +package encoding_hex import "core:strings" diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin index 3d57316b3..04ef6d434 100644 --- a/core/encoding/json/marshal.odin +++ b/core/encoding/json/marshal.odin @@ -1,4 +1,4 @@ -package json +package encoding_json import "core:mem" import "core:math/bits" diff --git a/core/encoding/json/parser.odin b/core/encoding/json/parser.odin index 8bcef1339..3973725dc 100644 --- a/core/encoding/json/parser.odin +++ b/core/encoding/json/parser.odin @@ -1,4 +1,4 @@ -package json +package encoding_json import "core:mem" import "core:unicode/utf8" diff --git a/core/encoding/json/tokenizer.odin b/core/encoding/json/tokenizer.odin index a406a73a5..5c20a2cc3 100644 --- a/core/encoding/json/tokenizer.odin +++ b/core/encoding/json/tokenizer.odin @@ -1,4 +1,4 @@ -package json +package encoding_json import "core:unicode/utf8" diff --git a/core/encoding/json/types.odin b/core/encoding/json/types.odin index 20c806236..73e183615 100644 --- a/core/encoding/json/types.odin +++ b/core/encoding/json/types.odin @@ -1,4 +1,4 @@ -package json +package encoding_json import "core:strings" diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index b2052e43c..691303521 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -1,4 +1,4 @@ -package json +package encoding_json import "core:mem" import "core:math" diff --git a/core/encoding/json/validator.odin b/core/encoding/json/validator.odin index 961c2dc23..a6873319d 100644 --- a/core/encoding/json/validator.odin +++ b/core/encoding/json/validator.odin @@ -1,4 +1,4 @@ -package json +package encoding_json import "core:mem" diff --git a/core/encoding/varint/doc.odin b/core/encoding/varint/doc.odin index 5e4708a59..c0a09873c 100644 --- a/core/encoding/varint/doc.odin +++ b/core/encoding/varint/doc.odin @@ -25,4 +25,4 @@ ``` */ -package varint \ No newline at end of file +package encoding_varint \ No newline at end of file diff --git a/core/encoding/varint/leb128.odin b/core/encoding/varint/leb128.odin index 1cdbb81b0..ca6513f04 100644 --- a/core/encoding/varint/leb128.odin +++ b/core/encoding/varint/leb128.odin @@ -8,7 +8,7 @@ // package varint implements variable length integer encoding and decoding using // the LEB128 format as used by DWARF debug info, Android .dex and other file formats. -package varint +package encoding_varint // In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file. // Instead we'll set limits on the values we'll encode/decode diff --git a/core/encoding/xml/debug_print.odin b/core/encoding/xml/debug_print.odin index 2607bec23..be958baaa 100644 --- a/core/encoding/xml/debug_print.odin +++ b/core/encoding/xml/debug_print.odin @@ -1,4 +1,4 @@ -package xml +package encoding_xml /* An XML 1.0 / 1.1 parser diff --git a/core/encoding/xml/helpers.odin b/core/encoding/xml/helpers.odin index 42a5258b3..a9d4ad493 100644 --- a/core/encoding/xml/helpers.odin +++ b/core/encoding/xml/helpers.odin @@ -1,4 +1,4 @@ -package xml +package encoding_xml /* An XML 1.0 / 1.1 parser diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin index a223a75d6..0f87c366b 100644 --- a/core/encoding/xml/tokenizer.odin +++ b/core/encoding/xml/tokenizer.odin @@ -1,4 +1,4 @@ -package xml +package encoding_xml /* An XML 1.0 / 1.1 parser diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index bf8646bc3..5b4b12948 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -24,7 +24,7 @@ MAYBE: List of contributors: - Jeroen van Rijn: Initial implementation. */ -package xml +package encoding_xml // An XML 1.0 / 1.1 parser import "core:bytes" -- cgit v1.2.3 From 3812d5e002fd2a2f4762b7732c72e49c1c6ee767 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Fri, 19 Apr 2024 00:19:02 +0100 Subject: Only override the comma value on `*_init` if it is "invalid" --- core/encoding/csv/reader.odin | 5 ++++- core/encoding/csv/writer.odin | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/csv/reader.odin b/core/encoding/csv/reader.odin index 22eea9568..5d3626b9f 100644 --- a/core/encoding/csv/reader.odin +++ b/core/encoding/csv/reader.odin @@ -91,7 +91,10 @@ DEFAULT_RECORD_BUFFER_CAPACITY :: 256 // reader_init initializes a new Reader from r reader_init :: proc(reader: ^Reader, r: io.Reader, buffer_allocator := context.allocator) { - reader.comma = ',' + switch reader.comma { + case '\x00', '\n', '\r', 0xfffd: + reader.comma = ',' + } context.allocator = buffer_allocator reserve(&reader.record_buffer, DEFAULT_RECORD_BUFFER_CAPACITY) diff --git a/core/encoding/csv/writer.odin b/core/encoding/csv/writer.odin index 46145ecc1..132fa0a51 100644 --- a/core/encoding/csv/writer.odin +++ b/core/encoding/csv/writer.odin @@ -17,7 +17,10 @@ Writer :: struct { // writer_init initializes a Writer that writes to w writer_init :: proc(writer: ^Writer, w: io.Writer) { - writer.comma = ',' + switch writer.comma { + case '\x00', '\n', '\r', 0xfffd: + writer.comma = ',' + } writer.w = w } -- cgit v1.2.3 From 20223345a4376c6490736ca952427b919c178985 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Fri, 19 Apr 2024 00:33:31 +0100 Subject: Return partial reads --- core/encoding/csv/reader.odin | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'core/encoding') diff --git a/core/encoding/csv/reader.odin b/core/encoding/csv/reader.odin index 5d3626b9f..f8c72c423 100644 --- a/core/encoding/csv/reader.odin +++ b/core/encoding/csv/reader.odin @@ -124,6 +124,7 @@ reader_destroy :: proc(r: ^Reader) { // read reads a single record (a slice of fields) from r // // All \r\n sequences are normalized to \n, including multi-line field +@(require_results) read :: proc(r: ^Reader, allocator := context.allocator) -> (record: []string, err: Error) { if r.reuse_record { record, err = _read_record(r, &r.last_record, allocator) @@ -136,6 +137,7 @@ read :: proc(r: ^Reader, allocator := context.allocator) -> (record: []string, e } // is_io_error checks where an Error is a specific io.Error kind +@(require_results) is_io_error :: proc(err: Error, io_err: io.Error) -> bool { if v, ok := err.(io.Error); ok { return v == io_err @@ -143,10 +145,10 @@ is_io_error :: proc(err: Error, io_err: io.Error) -> bool { return false } - // read_all reads all the remaining records from r. // Each record is a slice of fields. // read_all is defined to read until an EOF, and does not treat, and does not treat EOF as an error +@(require_results) read_all :: proc(r: ^Reader, allocator := context.allocator) -> ([][]string, Error) { context.allocator = allocator records: [dynamic][]string @@ -156,13 +158,18 @@ read_all :: proc(r: ^Reader, allocator := context.allocator) -> ([][]string, Err return records[:], nil } if rerr != nil { - return nil, rerr + // allow for a partial read + if record != nil { + append(&records, record) + } + return records[:], rerr } append(&records, record) } } // read reads a single record (a slice of fields) from the provided input. +@(require_results) read_from_string :: proc(input: string, record_allocator := context.allocator, buffer_allocator := context.allocator) -> (record: []string, n: int, err: Error) { ir: strings.Reader strings.reader_init(&ir, input) @@ -178,6 +185,7 @@ read_from_string :: proc(input: string, record_allocator := context.allocator, b // read_all reads all the remaining records from the provided input. +@(require_results) read_all_from_string :: proc(input: string, records_allocator := context.allocator, buffer_allocator := context.allocator) -> ([][]string, Error) { ir: strings.Reader strings.reader_init(&ir, input) @@ -189,7 +197,7 @@ read_all_from_string :: proc(input: string, records_allocator := context.allocat return read_all(&r, records_allocator) } -@private +@(private, require_results) is_valid_delim :: proc(r: rune) -> bool { switch r { case 0, '"', '\r', '\n', utf8.RUNE_ERROR: @@ -198,8 +206,9 @@ is_valid_delim :: proc(r: rune) -> bool { return utf8.valid_rune(r) } -@private +@(private, require_results) _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.allocator) -> ([]string, Error) { + @(require_results) read_line :: proc(r: ^Reader) -> ([]byte, io.Error) { if !r.multiline_fields { line, err := bufio.reader_read_slice(&r.r, '\n') @@ -269,6 +278,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all unreachable() } + @(require_results) length_newline :: proc(b: []byte) -> int { if len(b) > 0 && b[len(b)-1] == '\n' { return 1 @@ -276,6 +286,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all return 0 } + @(require_results) next_rune :: proc(b: []byte) -> rune { r, _ := utf8.decode_rune(b) return r -- cgit v1.2.3