diff options
| author | Laytan Laats <laytanlaats@hotmail.com> | 2023-12-23 20:02:04 +0100 |
|---|---|---|
| committer | Laytan Laats <laytanlaats@hotmail.com> | 2024-03-04 17:26:19 +0100 |
| commit | c1cf6c1a95bb489525e329280be735d7a5ce966b (patch) | |
| tree | 6775a9519a9332917c94a3dcdd0b6f43243b7799 /core/encoding/cbor | |
| parent | 317931a3c5179e10db941157a994c8e89b7080c2 (diff) | |
encoding/cbor: add general docs and example
Diffstat (limited to 'core/encoding/cbor')
| -rw-r--r-- | core/encoding/cbor/cbor.odin | 5 | ||||
| -rw-r--r-- | core/encoding/cbor/coding.odin | 8 | ||||
| -rw-r--r-- | core/encoding/cbor/doc.odin | 143 | ||||
| -rw-r--r-- | core/encoding/cbor/marshal.odin | 2 | ||||
| -rw-r--r-- | core/encoding/cbor/unmarshal.odin | 2 |
5 files changed, 149 insertions, 11 deletions
diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 3ab493b4b..7e0f4ea1a 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -1,14 +1,9 @@ -// Package cbor encodes, decodes, marshals and unmarshals types from/into RCF 8949 compatible CBOR binary. -// Also provided are conversion to and from JSON and the CBOR diagnostic format. -// -// You can additionally provide custom CBOR tag implementations for your use cases. package cbor import "core:encoding/json" import "core:intrinsics" import "core:io" import "core:mem" -import "core:runtime" import "core:strconv" import "core:strings" diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 9dd6d2639..a9bb6e408 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -121,7 +121,7 @@ decode_from_decoder :: proc(d: Decoder, allocator := context.allocator) -> (v: V d := d - DECODE_PROGRESS_GUARD(&d) + _DECODE_PROGRESS_GUARD(&d) v, err = _decode_from_decoder(d) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. @@ -228,7 +228,7 @@ encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Enc encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { e := e - ENCODE_PROGRESS_GUARD(&e) or_return + _ENCODE_PROGRESS_GUARD(&e) or_return switch v_spec in v { case u8: return _encode_u8(e.writer, v_spec, .Unsigned) @@ -256,7 +256,7 @@ encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { } @(deferred_in_out=_decode_progress_end) -DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Arena_Temp) { +_DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Arena_Temp) { if ._In_Progress in d.flags { return } @@ -286,7 +286,7 @@ _decode_progress_end :: proc(d: ^Decoder, is_begin: bool, tmp: runtime.Arena_Tem } @(deferred_in_out=_encode_progress_end) -ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { +_ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { if ._In_Progress in e.flags { return } diff --git a/core/encoding/cbor/doc.odin b/core/encoding/cbor/doc.odin new file mode 100644 index 000000000..efcad5c9e --- /dev/null +++ b/core/encoding/cbor/doc.odin @@ -0,0 +1,143 @@ +/* +Package cbor encodes, decodes, marshals and unmarshals types from/into RCF 8949 compatible CBOR binary. +Also provided are conversion to and from JSON and the CBOR diagnostic format. + +**Allocations:** + +In general, when in the following table it says allocations are done on the `context.temp_allocator`, these allocations +are still attempted to be deallocated. +This allows you to use an allocator with freeing implemented as the `context.temp_allocator` which is handy with big CBOR. + +If you use the default `context.temp_allocator` it will be returned back to its state when the process (en/decoding, (un)marshal) started. + +- *Encoding*: If the `.Deterministic_Map_Sorting` flag is set on the encoder, this allocates on `context.temp_allocator` + some space for the keys of maps in order to sort them and then write them. + Other than that there are no allocations (only for the final bytes if you use `cbor.encode_into_bytes`. + +- *Decoding*: Allocates everything on the given allocator and input given can be deleted after decoding. + *No* allocations are done on the `context.temp_allocator`. + +- *Marshal*: Same allocation strategy as encoding. + +- *Unmarshal*: Allocates everything on the given allocator and input given can be deleted after unmarshalling. + Some temporary allocations are done on the `context.temp_allocator`. + +**Determinism:** + +CBOR defines a deterministic en/decoder, which among other things uses the smallest type possible for integers and floats, +and sorts map keys by their (encoded) lexical bytewise order. + +You can enable this behaviour using a combination of flags, also available as the `cbor.ENCODE_FULLY_DETERMINISTIC` constant. +If you just want the small size that comes with this, but not the map sorting (which has a performance cost) you can use the +`cbor.ENCODE_SMALL` constant for the flags. + +A deterministic float is a float in the smallest type (f16, f32, f64) that hasn't changed after conversion. +A deterministic integer is an integer in the smallest representation (u8, u16, u32, u64) it fits in. + +**Untrusted Input:** + +By default input is treated as untrusted, this means the sizes that are encoded in the CBOR are not blindly trusted. +If you were to trust these sizes, and allocate space for them an attacker would be able to cause massive allocations with small payloads. + +The decoder has a `max_pre_alloc` field that specifies the maximum amount of bytes (roughly) to pre allocate, a KiB by default. + +This does mean reallocations are more common though, you can, if you know the input is trusted, add the `.Trusted_Input` flag to the decoder. + +**Tags:** + +CBOR describes tags that you can wrap values with to assign a number to describe what type of data will follow. + +More information and a list of default tags can be found here: [[RFC 8949 Section 3.4;https://www.rfc-editor.org/rfc/rfc8949.html#name-tagging-of-items]]. + +A list of registered extension types can be found here: [[IANA CBOR assignments;https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml]]. + +Tags can either be assigned to a distinct Odin type (used by default), +or be used with struct tags (`cbor_tag:"base64"`, or `cbor_tag:"1"` for example). + +By default, the following tags are supported/provided by this implementation: + +- *1/epoch*: Assign this tag to `time.Time` or integer fields to use the defined seconds since epoch format. + +- *24/cbor*: Assign this tag to string or byte fields to store encoded CBOR (not decoding it). + +- *34/base64*: Assign this tag to string or byte fields to store and decode the contents in base64. + +- *2 & 3*: Used automatically by the implementation to encode and decode big numbers into/from `core:math/big`. + +- *55799*: Self described CBOR, used when `.Self_Described_CBOR` flag is used to wrap the entire binary. + This shows other implementations that we are dealing with CBOR by just looking at the first byte of input. + +- *1010*: An extension tag that defines a string type followed by its value, this is used by this implementation to support Odin's unions. + +Users can provide their own tag implementations using the `cbor.tag_register_type(...)` to register a tag for a distinct Odin type +used automatically when it is encountered during marshal and unmarshal. +Or with `cbor.tag_register_number(...)` to register a tag number along with an identifier for convenience that can be used with struct tags, +e.g. `cbor_tag:"69"` or `cbor_tag:"my_tag"`. + +You can look at the default tags provided for pointers on how these implementations work. + +Example: + package main + + import "core:encoding/cbor" + import "core:fmt" + import "core:time" + + Possibilities :: union { + string, + int, + } + + Data :: struct { + str: string, + neg: cbor.Negative_U16, // Store a CBOR value directly. + now: time.Time `cbor_tag:"epoch"`, // Wrapped in the epoch tag. + ignore_this: ^Data `cbor:"-"`, // Ignored by implementation. + renamed: f32 `cbor:"renamed :)"`, // Renamed when encoded. + my_union: Possibilities, // Union support. + } + + main :: proc() { + now := time.Time{_nsec = 1701117968 * 1e9} + + data := Data{ + str = "Hello, World!", + neg = 300, + now = now, + ignore_this = &Data{}, + renamed = 123123.125, + my_union = 3, + } + + // Marshal the struct into binary CBOR. + binary, err := cbor.marshal(data, cbor.ENCODE_FULLY_DETERMINISTIC) + assert(err == nil) + defer delete(binary) + + // Decode the binary data into a `cbor.Value`. + decoded, derr := cbor.decode(string(binary)) + assert(derr == nil) + defer cbor.destroy(decoded) + + // Turn the CBOR into a human readable representation. + diagnosis, eerr := cbor.diagnose(decoded) + assert(eerr == nil) + defer delete(diagnosis) + + fmt.println(diagnosis) + } + +Output: + { + "my_union": 1010([ + "int", + 3 + ]), + "neg": -301, + "now": 1(1701117968), + "renamed :)": 123123.12500000, + "str": "Hello, World!" + } +*/ +package cbor + diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index b7c47f252..4a0619c04 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -78,7 +78,7 @@ marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Mars marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { e := e - err_conv(ENCODE_PROGRESS_GUARD(&e)) or_return + err_conv(_ENCODE_PROGRESS_GUARD(&e)) or_return if v == nil { return _encode_nil(e.writer) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 98ef06635..0acb48083 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -53,7 +53,7 @@ unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, all unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator) -> (err: Unmarshal_Error) { d := d - DECODE_PROGRESS_GUARD(&d) + _DECODE_PROGRESS_GUARD(&d) err = _unmarshal_any_ptr(d, ptr, allocator=allocator) |