diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2022-04-27 14:37:15 +0200 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2022-04-27 14:37:15 +0200 |
| commit | c4e0d1efa1ec655bae9134b95a0fcd060cc7bbea (patch) | |
| tree | c29bd0b78138e8d67aebe34ac689d13e32d9d15f /core/encoding | |
| parent | 6e61abc7d06f22129f93110a9f652c3eec21f0c6 (diff) | |
| parent | 9349dfba8fec53f52f77a0c8928e115ec93ff447 (diff) | |
Merge branch 'master' into xml
Diffstat (limited to 'core/encoding')
| -rw-r--r-- | core/encoding/hxa/doc.odin | 12 | ||||
| -rw-r--r-- | core/encoding/hxa/read.odin | 29 | ||||
| -rw-r--r-- | core/encoding/hxa/write.odin | 6 | ||||
| -rw-r--r-- | core/encoding/json/marshal.odin | 11 | ||||
| -rw-r--r-- | core/encoding/json/parser.odin | 6 | ||||
| -rw-r--r-- | core/encoding/json/unmarshal.odin | 50 | ||||
| -rw-r--r-- | core/encoding/varint/doc.odin | 28 | ||||
| -rw-r--r-- | core/encoding/varint/leb128.odin | 165 |
8 files changed, 264 insertions, 43 deletions
diff --git a/core/encoding/hxa/doc.odin b/core/encoding/hxa/doc.odin index 16b94a243..230d6ea66 100644 --- a/core/encoding/hxa/doc.odin +++ b/core/encoding/hxa/doc.odin @@ -27,7 +27,7 @@ // Construction history, or BSP trees would make the format too large to serve its purpose. // The facilities of the formats to store meta data should make the format flexible enough // for most uses. Adding HxA support should be something anyone can do in a days work. - +// // Structure: // ---------- // HxA is designed to be extremely simple to parse, and is therefore based around conventions. It has @@ -45,17 +45,17 @@ // of a number of named layers. All layers in the stack have the same number of elements. Each layer // describes one property of the primitive. Each layer can have multiple channels and each layer can // store data of a different type. - +// // HaX stores 3 kinds of nodes // - Pixel data. // - Polygon geometry data. // - Meta data only. - +// // Pixel Nodes stores pixels in a layer stack. A layer may store things like Albedo, Roughness, // Reflectance, Light maps, Masks, Normal maps, and Displacement. Layers use the channels of the // layers to store things like color. The length of the layer stack is determined by the type and // dimensions stored in the - +// // Geometry data is stored in 3 separate layer stacks for: vertex data, corner data and face data. The // vertex data stores things like verities, blend shapes, weight maps, and vertex colors. The first // layer in a vertex stack has to be a 3 channel layer named "position" describing the base position @@ -63,7 +63,7 @@ // for things like UV, normals, and adjacency. The first layer in a corner stack has to be a 1 channel // integer layer named "index" describing the vertices used to form polygons. The last value in each // polygon has a negative - 1 index to indicate the end of the polygon. - +// // Example: // A quad and a tri with the vertex index: // [0, 1, 2, 3] [1, 4, 2] @@ -72,7 +72,7 @@ // The face stack stores values per face. the length of the face stack has to match the number of // negative values in the index layer in the corner stack. The face stack can be used to store things // like material index. - +// // Storage // ------- // All data is stored in little endian byte order with no padding. The layout mirrors the structs diff --git a/core/encoding/hxa/read.odin b/core/encoding/hxa/read.odin index ef7edc8b7..abe295530 100644 --- a/core/encoding/hxa/read.odin +++ b/core/encoding/hxa/read.odin @@ -39,6 +39,9 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato read_value :: proc(r: ^Reader, $T: typeid) -> (value: T, err: Read_Error) { remaining := len(r.data) - r.offset if remaining < size_of(T) { + if r.print_error { + fmt.eprintf("file '%s' failed to read value at offset %v\n", r.filename, r.offset) + } err = .Short_Read return } @@ -51,6 +54,10 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato read_array :: proc(r: ^Reader, $T: typeid, count: int) -> (value: []T, err: Read_Error) { remaining := len(r.data) - r.offset if remaining < size_of(T)*count { + if r.print_error { + fmt.eprintf("file '%s' failed to read array of %d elements at offset %v\n", + r.filename, count, r.offset) + } err = .Short_Read return } @@ -82,7 +89,8 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato type := read_value(r, Meta_Value_Type) or_return if type > max(Meta_Value_Type) { if r.print_error { - fmt.eprintf("HxA Error: file '%s' has meta value type %d. Maximum value is ", r.filename, u8(type), u8(max(Meta_Value_Type))) + fmt.eprintf("HxA Error: file '%s' has meta value type %d. Maximum value is %d\n", + r.filename, u8(type), u8(max(Meta_Value_Type))) } err = .Invalid_Data return @@ -114,7 +122,8 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato type := read_value(r, Layer_Data_Type) or_return if type > max(type) { if r.print_error { - fmt.eprintf("HxA Error: file '%s' has layer data type %d. Maximum value is ", r.filename, u8(type), u8(max(Layer_Data_Type))) + fmt.eprintf("HxA Error: file '%s' has layer data type %d. Maximum value is %d\n", + r.filename, u8(type), u8(max(Layer_Data_Type))) } err = .Invalid_Data return @@ -134,13 +143,23 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato } if len(data) < size_of(Header) { + if print_error { + fmt.eprintf("HxA Error: file '%s' has no header\n", filename) + } + err = .Short_Read return } context.allocator = allocator header := cast(^Header)raw_data(data) - assert(header.magic_number == MAGIC_NUMBER) + if (header.magic_number != MAGIC_NUMBER) { + if print_error { + fmt.eprintf("HxA Error: file '%s' has invalid magic number 0x%x\n", filename, header.magic_number) + } + err = .Invalid_Data + return + } r := &Reader{ filename = filename, @@ -150,6 +169,7 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato } node_count := 0 + file.header = header^ file.nodes = make([]Node, header.internal_node_count) defer if err != nil { nodes_destroy(file.nodes) @@ -162,7 +182,8 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato type := read_value(r, Node_Type) or_return if type > max(Node_Type) { if r.print_error { - fmt.eprintf("HxA Error: file '%s' has node type %d. Maximum value is ", r.filename, u8(type), u8(max(Node_Type))) + fmt.eprintf("HxA Error: file '%s' has node type %d. Maximum value is %d\n", + r.filename, u8(type), u8(max(Node_Type))) } err = .Invalid_Data return diff --git a/core/encoding/hxa/write.odin b/core/encoding/hxa/write.odin index e774018b2..5bb950e81 100644 --- a/core/encoding/hxa/write.odin +++ b/core/encoding/hxa/write.odin @@ -84,7 +84,7 @@ write_internal :: proc(w: ^Writer, file: File) { write_metadata :: proc(w: ^Writer, meta_data: []Meta) { for m in meta_data { - name_len := max(len(m.name), 255) + name_len := min(len(m.name), 255) write_value(w, u8(name_len)) write_string(w, m.name[:name_len]) @@ -127,7 +127,7 @@ write_internal :: proc(w: ^Writer, file: File) { write_layer_stack :: proc(w: ^Writer, layers: Layer_Stack) { write_value(w, u32(len(layers))) for layer in layers { - name_len := max(len(layer.name), 255) + name_len := min(len(layer.name), 255) write_value(w, u8(name_len)) write_string(w, layer .name[:name_len]) @@ -152,7 +152,7 @@ write_internal :: proc(w: ^Writer, file: File) { return } - write_value(w, &Header{ + write_value(w, Header{ magic_number = MAGIC_NUMBER, version = LATEST_VERSION, internal_node_count = u32le(len(file.nodes)), diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin index adbcb95be..9c54f35f0 100644 --- a/core/encoding/json/marshal.odin +++ b/core/encoding/json/marshal.odin @@ -8,17 +8,18 @@ import "core:strings" import "core:io" Marshal_Data_Error :: enum { + None, Unsupported_Type, } -Marshal_Error :: union { +Marshal_Error :: union #shared_nil { Marshal_Data_Error, io.Error, } marshal :: proc(v: any, allocator := context.allocator) -> (data: []byte, err: Marshal_Error) { b := strings.make_builder(allocator) - defer if err != .None { + defer if err != nil { strings.destroy_builder(&b) } @@ -27,7 +28,7 @@ marshal :: proc(v: any, allocator := context.allocator) -> (data: []byte, err: M if len(b.buf) != 0 { data = b.buf[:] } - return data, .None + return data, nil } marshal_to_builder :: proc(b: ^strings.Builder, v: any) -> Marshal_Error { @@ -285,8 +286,8 @@ marshal_to_writer :: proc(w: io.Writer, v: any) -> (err: Marshal_Error) { case runtime.Type_Info_Integer: switch info.endianness { case .Platform: return false - case .Little: return ODIN_ENDIAN != "little" - case .Big: return ODIN_ENDIAN != "big" + case .Little: return ODIN_ENDIAN != .Little + case .Big: return ODIN_ENDIAN != .Big } } return false diff --git a/core/encoding/json/parser.odin b/core/encoding/json/parser.odin index c682ec9bd..7bf88c565 100644 --- a/core/encoding/json/parser.odin +++ b/core/encoding/json/parser.odin @@ -354,6 +354,12 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a b := bytes_make(len(s) + 2*utf8.UTF_MAX, 1, allocator) or_return w := copy(b, s[0:i]) + + if len(b) == 0 && allocator.data == nil { + // `unmarshal_count_array` calls us with a nil allocator + return string(b[:w]), nil + } + loop: for i < len(s) { c := s[i] switch { diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index fe3137b7e..bd48011f1 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -52,11 +52,11 @@ unmarshal_any :: proc(data: []byte, v: any, spec := DEFAULT_SPECIFICATION, alloc if p.spec == .MJSON { #partial switch p.curr_token.kind { case .Ident, .String: - return unmarsal_object(&p, data, .EOF) + return unmarshal_object(&p, data, .EOF) } } - return unmarsal_value(&p, data) + return unmarshal_value(&p, data) } @@ -148,7 +148,7 @@ assign_float :: proc(val: any, f: $T) -> bool { @(private) -unmarsal_string :: proc(p: ^Parser, val: any, str: string, ti: ^reflect.Type_Info) -> bool { +unmarshal_string_token :: proc(p: ^Parser, val: any, str: string, ti: ^reflect.Type_Info) -> bool { val := val switch dst in &val { case string: @@ -198,7 +198,7 @@ unmarsal_string :: proc(p: ^Parser, val: any, str: string, ti: ^reflect.Type_Inf @(private) -unmarsal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { +unmarshal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token} token := p.curr_token @@ -257,7 +257,7 @@ unmarsal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { case .Ident: advance_token(p) if p.spec == .MJSON { - if unmarsal_string(p, any{v.data, ti.id}, token.text, ti) { + if unmarshal_string_token(p, any{v.data, ti.id}, token.text, ti) { return nil } } @@ -266,7 +266,7 @@ unmarsal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { case .String: advance_token(p) str := unquote_string(token, p.spec, p.allocator) or_return - if unmarsal_string(p, any{v.data, ti.id}, str, ti) { + if unmarshal_string_token(p, any{v.data, ti.id}, str, ti) { return nil } delete(str, p.allocator) @@ -274,10 +274,10 @@ unmarsal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { case .Open_Brace: - return unmarsal_object(p, v, .Close_Brace) + return unmarshal_object(p, v, .Close_Brace) case .Open_Bracket: - return unmarsal_array(p, v) + return unmarshal_array(p, v) case: if p.spec != .JSON { @@ -312,16 +312,16 @@ unmarsal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { @(private) -unmarsal_expect_token :: proc(p: ^Parser, kind: Token_Kind, loc := #caller_location) -> Token { +unmarshal_expect_token :: proc(p: ^Parser, kind: Token_Kind, loc := #caller_location) -> Token { prev := p.curr_token err := expect_token(p, kind) - assert(err == nil, "unmarsal_expect_token") + assert(err == nil, "unmarshal_expect_token") return prev } @(private) -unmarsal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unmarshal_Error) { +unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unmarshal_Error) { UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token} if end_token == .Close_Brace { @@ -342,7 +342,7 @@ unmarsal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unma key, _ := parse_object_key(p, p.allocator) defer delete(key, p.allocator) - unmarsal_expect_token(p, .Colon) + unmarshal_expect_token(p, .Colon) fields := reflect.struct_fields_zipped(ti.id) @@ -378,7 +378,7 @@ unmarsal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unma field_ptr := rawptr(uintptr(v.data) + offset) field := any{field_ptr, type.id} - unmarsal_value(p, field) or_return + unmarshal_value(p, field) or_return if parse_comma(p) { break struct_loop @@ -407,11 +407,11 @@ unmarsal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unma map_loop: for p.curr_token.kind != end_token { key, _ := parse_object_key(p, p.allocator) - unmarsal_expect_token(p, .Colon) + unmarshal_expect_token(p, .Colon) mem.zero_slice(elem_backing) - if err := unmarsal_value(p, map_backing_value); err != nil { + if err := unmarshal_value(p, map_backing_value); err != nil { delete(key, p.allocator) return err } @@ -443,7 +443,7 @@ unmarsal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unma enumerated_array_loop: for p.curr_token.kind != end_token { key, _ := parse_object_key(p, p.allocator) - unmarsal_expect_token(p, .Colon) + unmarshal_expect_token(p, .Colon) defer delete(key, p.allocator) index := -1 @@ -460,7 +460,7 @@ unmarsal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unma index_ptr := rawptr(uintptr(v.data) + uintptr(index*t.elem_size)) index_any := any{index_ptr, t.elem.id} - unmarsal_value(p, index_any) or_return + unmarshal_value(p, index_any) or_return if parse_comma(p) { break enumerated_array_loop @@ -480,10 +480,10 @@ unmarsal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unma @(private) -unmarsal_count_array :: proc(p: ^Parser) -> (length: uintptr) { +unmarshal_count_array :: proc(p: ^Parser) -> (length: uintptr) { p_backup := p^ p.allocator = mem.nil_allocator() - unmarsal_expect_token(p, .Open_Bracket) + unmarshal_expect_token(p, .Open_Bracket) array_length_loop: for p.curr_token.kind != .Close_Bracket { _, _ = parse_value(p) length += 1 @@ -497,9 +497,9 @@ unmarsal_count_array :: proc(p: ^Parser) -> (length: uintptr) { } @(private) -unmarsal_array :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { +unmarshal_array :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { assign_array :: proc(p: ^Parser, base: rawptr, elem: ^reflect.Type_Info, length: uintptr) -> Unmarshal_Error { - unmarsal_expect_token(p, .Open_Bracket) + unmarshal_expect_token(p, .Open_Bracket) for idx: uintptr = 0; p.curr_token.kind != .Close_Bracket; idx += 1 { assert(idx < length) @@ -507,14 +507,14 @@ unmarsal_array :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { elem_ptr := rawptr(uintptr(base) + idx*uintptr(elem.size)) elem := any{elem_ptr, elem.id} - unmarsal_value(p, elem) or_return + unmarshal_value(p, elem) or_return if parse_comma(p) { break } } - unmarsal_expect_token(p, .Close_Bracket) + unmarshal_expect_token(p, .Close_Bracket) return nil @@ -524,7 +524,7 @@ unmarsal_array :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { ti := reflect.type_info_base(type_info_of(v.id)) - length := unmarsal_count_array(p) + length := unmarshal_count_array(p) #partial switch t in ti.variant { case reflect.Type_Info_Slice: @@ -578,4 +578,4 @@ unmarsal_array :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { } return UNSUPPORTED_TYPE -}
\ No newline at end of file +} diff --git a/core/encoding/varint/doc.odin b/core/encoding/varint/doc.odin new file mode 100644 index 000000000..5e4708a59 --- /dev/null +++ b/core/encoding/varint/doc.odin @@ -0,0 +1,28 @@ +/* + Implementation of the LEB128 variable integer encoding as used by DWARF encoding and DEX files, among others. + + Author of this Odin package: Jeroen van Rijn + + Example: + ```odin + import "core:encoding/varint" + import "core:fmt" + + main :: proc() { + buf: [varint.LEB128_MAX_BYTES]u8 + + value := u128(42) + + encode_size, encode_err := varint.encode_uleb128(buf[:], value) + assert(encode_size == 1 && encode_err == .None) + + fmt.printf("Encoded as %v\n", buf[:encode_size]) + decoded_val, decode_size, decode_err := varint.decode_uleb128(buf[:]) + + assert(decoded_val == value && decode_size == encode_size && decode_err == .None) + fmt.printf("Decoded as %v, using %v byte%v\n", decoded_val, decode_size, "" if decode_size == 1 else "s") + } + ``` + +*/ +package varint
\ No newline at end of file diff --git a/core/encoding/varint/leb128.odin b/core/encoding/varint/leb128.odin new file mode 100644 index 000000000..f8fcc7de5 --- /dev/null +++ b/core/encoding/varint/leb128.odin @@ -0,0 +1,165 @@ +/* + Copyright 2022 Jeroen van Rijn <nom@duclavier.com>. + Made available under Odin's BSD-3 license. + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ + +// package varint implements variable length integer encoding and decoding using +// the LEB128 format as used by DWARF debug info, Android .dex and other file formats. +package varint + +// In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file. +// Instead we'll set limits on the values we'll encode/decode +// 18 * 7 bits = 126, which means that a possible 19th byte may at most be `0b0000_0011`. +LEB128_MAX_BYTES :: 19 + +Error :: enum { + None = 0, + Buffer_Too_Small = 1, + Value_Too_Large = 2, +} + +// Decode a slice of bytes encoding an unsigned LEB128 integer into value and number of bytes used. +// Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes. +decode_uleb128_buffer :: proc(buf: []u8) -> (val: u128, size: int, err: Error) { + if len(buf) == 0 { + return 0, 0, .Buffer_Too_Small + } + + for v in buf { + val, size, err = decode_uleb128_byte(v, size, val) + if err != .Buffer_Too_Small { + return + } + } + + if err == .Buffer_Too_Small { + val, size = 0, 0 + } + return +} + +// Decodes an unsigned LEB128 integer into value a byte at a time. +// Returns `.None` when decoded properly, `.Value_Too_Large` when they value +// exceeds the limits of a u128, and `.Buffer_Too_Small` when it's not yet fully decoded. +decode_uleb128_byte :: proc(input: u8, offset: int, accumulator: u128) -> (val: u128, size: int, err: Error) { + size = offset + 1 + + // 18 * 7 bits = 126, which means that a possible 19th byte may at most be 0b0000_0011. + if size > LEB128_MAX_BYTES || size == LEB128_MAX_BYTES && input > 0b0000_0011 { + return 0, 0, .Value_Too_Large + } + + val = accumulator | u128(input & 0x7f) << uint(offset * 7) + + if input < 128 { + // We're done + return + } + + // If the buffer runs out before the number ends, return an error. + return val, size, .Buffer_Too_Small +} +decode_uleb128 :: proc {decode_uleb128_buffer, decode_uleb128_byte} + +// Decode a slice of bytes encoding a signed LEB128 integer into value and number of bytes used. +// Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes. +decode_ileb128_buffer :: proc(buf: []u8) -> (val: i128, size: int, err: Error) { + if len(buf) == 0 { + return 0, 0, .Buffer_Too_Small + } + + for v in buf { + val, size, err = decode_ileb128_byte(v, size, val) + if err != .Buffer_Too_Small { + return + } + } + + if err == .Buffer_Too_Small { + val, size = 0, 0 + } + return +} + +// Decode a a signed LEB128 integer into value and number of bytes used, one byte at a time. +// Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes. +decode_ileb128_byte :: proc(input: u8, offset: int, accumulator: i128) -> (val: i128, size: int, err: Error) { + size = offset + 1 + shift := uint(offset * 7) + + // 18 * 7 bits = 126, which including sign means we can have a 19th byte. + if size > LEB128_MAX_BYTES || size == LEB128_MAX_BYTES && input > 0x7f { + return 0, 0, .Value_Too_Large + } + + val = accumulator | i128(input & 0x7f) << shift + + if input < 128 { + if input & 0x40 == 0x40 { + val |= max(i128) << (shift + 7) + } + return val, size, .None + } + return val, size, .Buffer_Too_Small +} +decode_ileb128 :: proc{decode_ileb128_buffer, decode_ileb128_byte} + +// Encode `val` into `buf` as an unsigned LEB128 encoded series of bytes. +// `buf` must be appropriately sized. +encode_uleb128 :: proc(buf: []u8, val: u128) -> (size: int, err: Error) { + val := val + + for { + size += 1 + + if size > len(buf) { + return 0, .Buffer_Too_Small + } + + low := val & 0x7f + val >>= 7 + + if val > 0 { + low |= 0x80 // more bytes to follow + } + buf[size - 1] = u8(low) + + if val == 0 { break } + } + return +} + +@(private) +SIGN_MASK :: (i128(1) << 121) // sign extend mask + +// Encode `val` into `buf` as a signed LEB128 encoded series of bytes. +// `buf` must be appropriately sized. +encode_ileb128 :: proc(buf: []u8, val: i128) -> (size: int, err: Error) { + val := val + more := true + + for more { + size += 1 + + if size > len(buf) { + return 0, .Buffer_Too_Small + } + + low := val & 0x7f + val >>= 7 + + low = (low ~ SIGN_MASK) - SIGN_MASK + + if (val == 0 && low & 0x40 != 0x40) || (val == -1 && low & 0x40 == 0x40) { + more = false + } else { + low |= 0x80 + } + + buf[size - 1] = u8(low) + } + return +}
\ No newline at end of file |