diff options
| author | Courtney Strachan <courtney.strachan@gmail.com> | 2025-10-06 02:41:44 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-10-06 02:41:44 +0100 |
| commit | 6de2d6e8ca687c989bbb7806e5cbe8d791e425bf (patch) | |
| tree | 03a2e0a84c7c1530215f8e3f59a7f643b39b3677 /core/encoding | |
| parent | dbbe96ae5c343f0e803de6ee508207a62571534f (diff) | |
| parent | 0f97382fa3e46da80705c00dfe02f3deb9562e4f (diff) | |
Merge branch 'odin-lang:master' into master
Diffstat (limited to 'core/encoding')
| -rw-r--r-- | core/encoding/ansi/ansi.odin | 137 | ||||
| -rw-r--r-- | core/encoding/ansi/doc.odin | 20 | ||||
| -rw-r--r-- | core/encoding/base32/base32_test.odin | 1 | ||||
| -rw-r--r-- | core/encoding/cbor/cbor.odin | 6 | ||||
| -rw-r--r-- | core/encoding/cbor/marshal.odin | 36 | ||||
| -rw-r--r-- | core/encoding/cbor/tags.odin | 12 | ||||
| -rw-r--r-- | core/encoding/cbor/unmarshal.odin | 35 | ||||
| -rw-r--r-- | core/encoding/csv/doc.odin | 2 | ||||
| -rw-r--r-- | core/encoding/csv/reader.odin | 2 | ||||
| -rw-r--r-- | core/encoding/entity/entity.odin | 54 | ||||
| -rw-r--r-- | core/encoding/hxa/read.odin | 4 | ||||
| -rw-r--r-- | core/encoding/json/marshal.odin | 16 | ||||
| -rw-r--r-- | core/encoding/json/tokenizer.odin | 4 | ||||
| -rw-r--r-- | core/encoding/json/unmarshal.odin | 46 | ||||
| -rw-r--r-- | core/encoding/uuid/generation.odin | 4 | ||||
| -rw-r--r-- | core/encoding/xml/tokenizer.odin | 38 | ||||
| -rw-r--r-- | core/encoding/xml/xml_reader.odin | 106 |
17 files changed, 236 insertions, 287 deletions
diff --git a/core/encoding/ansi/ansi.odin b/core/encoding/ansi/ansi.odin deleted file mode 100644 index 5550a1671..000000000 --- a/core/encoding/ansi/ansi.odin +++ /dev/null @@ -1,137 +0,0 @@ -package ansi - -BEL :: "\a" // Bell -BS :: "\b" // Backspace -ESC :: "\e" // Escape - -// Fe Escape sequences - -CSI :: ESC + "[" // Control Sequence Introducer -OSC :: ESC + "]" // Operating System Command -ST :: ESC + "\\" // String Terminator - -// CSI sequences - -CUU :: "A" // Cursor Up -CUD :: "B" // Cursor Down -CUF :: "C" // Cursor Forward -CUB :: "D" // Cursor Back -CNL :: "E" // Cursor Next Line -CPL :: "F" // Cursor Previous Line -CHA :: "G" // Cursor Horizontal Absolute -CUP :: "H" // Cursor Position -ED :: "J" // Erase in Display -EL :: "K" // Erase in Line -SU :: "S" // Scroll Up -SD :: "T" // Scroll Down -HVP :: "f" // Horizontal Vertical Position -SGR :: "m" // Select Graphic Rendition -AUX_ON :: "5i" // AUX Port On -AUX_OFF :: "4i" // AUX Port Off -DSR :: "6n" // Device Status Report - -// CSI: private sequences - -SCP :: "s" // Save Current Cursor Position -RCP :: "u" // Restore Saved Cursor Position -DECAWM_ON :: "?7h" // Auto Wrap Mode (Enabled) -DECAWM_OFF :: "?7l" // Auto Wrap Mode (Disabled) -DECTCEM_SHOW :: "?25h" // Text Cursor Enable Mode (Visible) -DECTCEM_HIDE :: "?25l" // Text Cursor Enable Mode (Invisible) - -// SGR sequences - -RESET :: "0" -BOLD :: "1" -FAINT :: "2" -ITALIC :: "3" // Not widely supported. -UNDERLINE :: "4" -BLINK_SLOW :: "5" -BLINK_RAPID :: "6" // Not widely supported. -INVERT :: "7" // Also known as reverse video. -HIDE :: "8" // Not widely supported. -STRIKE :: "9" -FONT_PRIMARY :: "10" -FONT_ALT1 :: "11" -FONT_ALT2 :: "12" -FONT_ALT3 :: "13" -FONT_ALT4 :: "14" -FONT_ALT5 :: "15" -FONT_ALT6 :: "16" -FONT_ALT7 :: "17" -FONT_ALT8 :: "18" -FONT_ALT9 :: "19" -FONT_FRAKTUR :: "20" // Rarely supported. -UNDERLINE_DOUBLE :: "21" // May be interpreted as "disable bold." -NO_BOLD_FAINT :: "22" -NO_ITALIC_BLACKLETTER :: "23" -NO_UNDERLINE :: "24" -NO_BLINK :: "25" -PROPORTIONAL_SPACING :: "26" -NO_REVERSE :: "27" -NO_HIDE :: "28" -NO_STRIKE :: "29" - -FG_BLACK :: "30" -FG_RED :: "31" -FG_GREEN :: "32" -FG_YELLOW :: "33" -FG_BLUE :: "34" -FG_MAGENTA :: "35" -FG_CYAN :: "36" -FG_WHITE :: "37" -FG_COLOR :: "38" -FG_COLOR_8_BIT :: "38;5" // Followed by ";n" where n is in 0..=255 -FG_COLOR_24_BIT :: "38;2" // Followed by ";r;g;b" where r,g,b are in 0..=255 -FG_DEFAULT :: "39" - -BG_BLACK :: "40" -BG_RED :: "41" -BG_GREEN :: "42" -BG_YELLOW :: "43" -BG_BLUE :: "44" -BG_MAGENTA :: "45" -BG_CYAN :: "46" -BG_WHITE :: "47" -BG_COLOR :: "48" -BG_COLOR_8_BIT :: "48;5" // Followed by ";n" where n is in 0..=255 -BG_COLOR_24_BIT :: "48;2" // Followed by ";r;g;b" where r,g,b are in 0..=255 -BG_DEFAULT :: "49" - -NO_PROPORTIONAL_SPACING :: "50" -FRAMED :: "51" -ENCIRCLED :: "52" -OVERLINED :: "53" -NO_FRAME_ENCIRCLE :: "54" -NO_OVERLINE :: "55" - -// SGR: non-standard bright colors - -FG_BRIGHT_BLACK :: "90" // Also known as grey. -FG_BRIGHT_RED :: "91" -FG_BRIGHT_GREEN :: "92" -FG_BRIGHT_YELLOW :: "93" -FG_BRIGHT_BLUE :: "94" -FG_BRIGHT_MAGENTA :: "95" -FG_BRIGHT_CYAN :: "96" -FG_BRIGHT_WHITE :: "97" - -BG_BRIGHT_BLACK :: "100" // Also known as grey. -BG_BRIGHT_RED :: "101" -BG_BRIGHT_GREEN :: "102" -BG_BRIGHT_YELLOW :: "103" -BG_BRIGHT_BLUE :: "104" -BG_BRIGHT_MAGENTA :: "105" -BG_BRIGHT_CYAN :: "106" -BG_BRIGHT_WHITE :: "107" - -// Fp Escape sequences - -DECSC :: ESC + "7" // DEC Save Cursor -DECRC :: ESC + "8" // DEC Restore Cursor - -// OSC sequences - -WINDOW_TITLE :: "2" // Followed by ";<text>" ST. -HYPERLINK :: "8" // Followed by ";[params];<URI>" ST. Closed by OSC HYPERLINK ";;" ST. -CLIPBOARD :: "52" // Followed by ";c;<Base64-encoded string>" ST. diff --git a/core/encoding/ansi/doc.odin b/core/encoding/ansi/doc.odin deleted file mode 100644 index 966e6be00..000000000 --- a/core/encoding/ansi/doc.odin +++ /dev/null @@ -1,20 +0,0 @@ -/* -package ansi implements constant references to many widely-supported ANSI -escape codes, primarily used in terminal emulators for enhanced graphics, such -as colors, text styling, and animated displays. - -For example, you can print out a line of cyan text like this: - fmt.println(ansi.CSI + ansi.FG_CYAN + ansi.SGR + "Hellope!" + ansi.CSI + ansi.RESET + ansi.SGR) - -Multiple SGR (Select Graphic Rendition) codes can be joined by semicolons: - fmt.println(ansi.CSI + ansi.BOLD + ";" + ansi.FG_BLUE + ansi.SGR + "Hellope!" + ansi.CSI + ansi.RESET + ansi.SGR) - -If your terminal supports 24-bit true color mode, you can also do this: - fmt.println(ansi.CSI + ansi.FG_COLOR_24_BIT + ";0;255;255" + ansi.SGR + "Hellope!" + ansi.CSI + ansi.RESET + ansi.SGR) - -For more information, see: -- [[ https://en.wikipedia.org/wiki/ANSI_escape_code ]] -- [[ https://www.vt100.net/docs/vt102-ug/chapter5.html ]] -- [[ https://invisible-island.net/xterm/ctlseqs/ctlseqs.html ]] -*/ -package ansi diff --git a/core/encoding/base32/base32_test.odin b/core/encoding/base32/base32_test.odin index ea41ae36f..07d5c8080 100644 --- a/core/encoding/base32/base32_test.odin +++ b/core/encoding/base32/base32_test.odin @@ -1,3 +1,4 @@ +#+test package encoding_base32 import "core:testing" diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 8eb829ed3..1fb7c34ab 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -385,17 +385,17 @@ to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> i // which we want for the diagnostic format. case f16: buf: [64]byte - str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f16), 8*size_of(f16)) + str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f16), 8*size_of(f16)) if str[0] == '+' && str != "+Inf" { str = str[1:] } io.write_string(w, str) or_return case f32: buf: [128]byte - str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f32), 8*size_of(f32)) + str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f32), 8*size_of(f32)) if str[0] == '+' && str != "+Inf" { str = str[1:] } io.write_string(w, str) or_return case f64: buf: [256]byte - str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f64), 8*size_of(f64)) + str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f64), 8*size_of(f64)) if str[0] == '+' && str != "+Inf" { str = str[1:] } io.write_string(w, str) or_return diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index aca71deb2..b23087c90 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -612,6 +612,42 @@ _marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (er case: panic("unknown bit_size size") } + case runtime.Type_Info_Matrix: + count := info.column_count * info.elem_stride + err_conv(_encode_u64(e, u64(count), .Array)) or_return + + if impl, ok := _tag_implementations_type[info.elem.id]; ok { + for i in 0..<count { + data := uintptr(v.data) + uintptr(i*info.elem_size) + impl->marshal(e, any{rawptr(data), info.elem.id}) or_return + } + return + } + + elem_ti := runtime.type_info_core(type_info_of(info.elem.id)) + for i in 0..<count { + data := uintptr(v.data) + uintptr(i*info.elem_size) + _marshal_into_encoder(e, any{rawptr(data), info.elem.id}, elem_ti) or_return + } + return + + case runtime.Type_Info_Simd_Vector: + err_conv(_encode_u64(e, u64(info.count), .Array)) or_return + + if impl, ok := _tag_implementations_type[info.elem.id]; ok { + for i in 0..<info.count { + data := uintptr(v.data) + uintptr(i*info.elem_size) + impl->marshal(e, any{rawptr(data), info.elem.id}) or_return + } + return + } + + elem_ti := runtime.type_info_core(type_info_of(info.elem.id)) + for i in 0..<info.count { + data := uintptr(v.data) + uintptr(i*info.elem_size) + _marshal_into_encoder(e, any{rawptr(data), info.elem.id}, elem_ti) or_return + } + return } return _unsupported(v.id, nil) diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index 17420af46..be07b926a 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -82,14 +82,16 @@ _tag_implementations_id: map[string]Tag_Implementation _tag_implementations_type: map[typeid]Tag_Implementation // Register a custom tag implementation to be used when marshalling that type and unmarshalling that tag number. -tag_register_type :: proc(impl: Tag_Implementation, nr: Tag_Number, type: typeid) { +tag_register_type :: proc "contextless" (impl: Tag_Implementation, nr: Tag_Number, type: typeid) { + context = runtime.default_context() _tag_implementations_nr[nr] = impl _tag_implementations_type[type] = impl } // Register a custom tag implementation to be used when marshalling that tag number or marshalling // a field with the struct tag `cbor_tag:"nr"`. -tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string) { +tag_register_number :: proc "contextless" (impl: Tag_Implementation, nr: Tag_Number, id: string) { + context = runtime.default_context() _tag_implementations_nr[nr] = impl _tag_implementations_id[id] = impl } @@ -98,13 +100,13 @@ tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, !ODIN_DEFAULT_TO_PANIC_ALLOCATOR && !ODIN_DEFAULT_TO_NIL_ALLOCATOR) @(private, init, disabled=!INITIALIZE_DEFAULT_TAGS) -tags_initialize_defaults :: proc() { +tags_initialize_defaults :: proc "contextless" () { tags_register_defaults() } // Registers tags that have implementations provided by this package. // This is done by default and can be controlled with the `CBOR_INITIALIZE_DEFAULT_TAGS` define. -tags_register_defaults :: proc() { +tags_register_defaults :: proc "contextless" () { tag_register_number({nil, tag_time_unmarshal, tag_time_marshal}, TAG_EPOCH_TIME_NR, TAG_EPOCH_TIME_ID) tag_register_number({nil, tag_base64_unmarshal, tag_base64_marshal}, TAG_BASE64_NR, TAG_BASE64_ID) tag_register_number({nil, tag_cbor_unmarshal, tag_cbor_marshal}, TAG_CBOR_NR, TAG_CBOR_ID) @@ -298,7 +300,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, #partial switch t in ti.variant { case reflect.Type_Info_String: - + assert(t.encoding == .UTF_8) if t.is_cstring { length := base64.decoded_len(bytes) builder := strings.builder_make(0, length+1) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index c39255d9d..043b2ec60 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -29,6 +29,7 @@ an input. unmarshal :: proc { unmarshal_from_reader, unmarshal_from_string, + unmarshal_from_bytes, } unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator, temp_allocator := context.temp_allocator, loc := #caller_location) -> (err: Unmarshal_Error) { @@ -51,6 +52,11 @@ unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, all return } +// Unmarshals from a slice of bytes, see docs on the proc group `Unmarshal` for more info. +unmarshal_from_bytes :: proc(bytes: []byte, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator, temp_allocator := context.temp_allocator, loc := #caller_location) -> (err: Unmarshal_Error) { + return unmarshal_from_string(string(bytes), ptr, flags, allocator, temp_allocator, loc) +} + unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator, temp_allocator := context.temp_allocator, loc := #caller_location) -> (err: Unmarshal_Error) { d := d @@ -329,6 +335,8 @@ _unmarshal_value :: proc(d: Decoder, v: any, hdr: Header, allocator := context.a _unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add, allocator := context.allocator, loc := #caller_location) -> (err: Unmarshal_Error) { #partial switch t in ti.variant { case reflect.Type_Info_String: + assert(t.encoding == .UTF_8) + bytes := err_conv(_decode_bytes(d, add, allocator=allocator, loc=loc)) or_return if t.is_cstring { @@ -487,7 +495,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header data := mem.alloc_bytes_non_zeroed(t.elem.size * scap, t.elem.align, allocator=allocator, loc=loc) or_return defer if err != nil { mem.free_bytes(data, allocator=allocator, loc=loc) } - da := mem.Raw_Dynamic_Array{raw_data(data), 0, length, context.allocator } + da := mem.Raw_Dynamic_Array{raw_data(data), 0, scap, context.allocator } assign_array(d, &da, t.elem, length) or_return @@ -585,6 +593,31 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header if out_of_space { return _unsupported(v, hdr) } return + case reflect.Type_Info_Matrix: + count := t.column_count * t.elem_stride + length, _ := err_conv(_decode_len_container(d, add)) or_return + if length > count { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, allocator } + + out_of_space := assign_array(d, &da, t.elem, length, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case reflect.Type_Info_Simd_Vector: + length, _ := err_conv(_decode_len_container(d, add)) or_return + if length > t.count { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, allocator } + + out_of_space := assign_array(d, &da, t.elem, length, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + case: return _unsupported(v, hdr) } } diff --git a/core/encoding/csv/doc.odin b/core/encoding/csv/doc.odin index bfeadafd6..7abe2be49 100644 --- a/core/encoding/csv/doc.odin +++ b/core/encoding/csv/doc.odin @@ -63,8 +63,6 @@ Example: read_csv_from_string :: proc(filename: string) { r: csv.Reader r.trim_leading_space = true - r.reuse_record = true // Without it you have to delete(record) - r.reuse_record_buffer = true // Without it you have to each of the fields within it defer csv.reader_destroy(&r) csv_data, ok := os.read_entire_file(filename) diff --git a/core/encoding/csv/reader.odin b/core/encoding/csv/reader.odin index 5348624d5..577ef219d 100644 --- a/core/encoding/csv/reader.odin +++ b/core/encoding/csv/reader.odin @@ -130,7 +130,7 @@ reader_destroy :: proc(r: ^Reader) { for record, row_idx in csv.iterator_next(&r) { ... } TIP: If you process the results within the loop and don't need to own the results, - you can set the Reader's `reuse_record` and `reuse_record_reuse_record_buffer` to true; + you can set the Reader's `reuse_record` and `reuse_record_buffer` to true; you won't need to delete the record or its fields. */ iterator_next :: proc(r: ^Reader) -> (record: []string, idx: int, err: Error, more: bool) { diff --git a/core/encoding/entity/entity.odin b/core/encoding/entity/entity.odin index d2f1d46b2..cb8fa8611 100644 --- a/core/encoding/entity/entity.odin +++ b/core/encoding/entity/entity.odin @@ -108,7 +108,7 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator := it couldn't have been part of an XML tag body to be decoded here. Keep in mind that we could already *be* inside a CDATA tag. - If so, write `>` as a literal and continue. + If so, write `<` as a literal and continue. */ if in_data { write_rune(&builder, '<') @@ -119,11 +119,9 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator := case ']': // If we're unboxing _and_ decoding CDATA, we'll have to check for the end tag. if in_data { - if t.read_offset + len(CDATA_END) < len(t.src) { - if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END { - in_data = false - t.read_offset += len(CDATA_END) - 1 - } + if strings.has_prefix(t.src[t.offset:], CDATA_END) { + in_data = false + t.read_offset += len(CDATA_END) - 1 } continue } else { @@ -297,40 +295,40 @@ _handle_xml_special :: proc(t: ^Tokenizer, builder: ^strings.Builder, options: X assert(t != nil && t.r == '<') if t.read_offset + len(CDATA_START) >= len(t.src) { return false, .None } - if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START { - t.read_offset += len(CDATA_START) - 1 - + s := string(t.src[t.offset:]) + if strings.has_prefix(s, CDATA_START) { if .Unbox_CDATA in options && .Decode_CDATA in options { // We're unboxing _and_ decoding CDATA + t.read_offset += len(CDATA_START) - 1 return true, .None } - // CDATA is passed through. - offset := t.offset - - // Scan until end of CDATA. + // CDATA is passed through. Scan until end of CDATA. + start_offset := t.offset + t.read_offset += len(CDATA_START) for { - advance(t) or_return - if t.r < 0 { return true, .CDATA_Not_Terminated } - - if t.read_offset + len(CDATA_END) < len(t.src) { - if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END { - t.read_offset += len(CDATA_END) - 1 + advance(t) + if t.r < 0 { + // error(t, offset, "[scan_string] CDATA was not terminated\n") + return true, .CDATA_Not_Terminated + } - cdata := string(t.src[offset : t.read_offset]) - - if .Unbox_CDATA in options { - cdata = cdata[len(CDATA_START):] - cdata = cdata[:len(cdata) - len(CDATA_END)] - } + // Scan until the end of a CDATA tag. + if s = string(t.src[t.read_offset:]); strings.has_prefix(s, CDATA_END) { + t.read_offset += len(CDATA_END) + cdata := string(t.src[start_offset:t.read_offset]) - write_string(builder, cdata) - return false, .None + if .Unbox_CDATA in options { + cdata = cdata[len(CDATA_START):] + cdata = cdata[:len(cdata) - len(CDATA_END)] } + write_string(builder, cdata) + return false, .None } } - } else if string(t.src[t.offset:][:len(COMMENT_START)]) == COMMENT_START { + + } else if strings.has_prefix(s, COMMENT_START) { t.read_offset += len(COMMENT_START) // Comment is passed through by default. offset := t.offset diff --git a/core/encoding/hxa/read.odin b/core/encoding/hxa/read.odin index a679946f8..6dde16848 100644 --- a/core/encoding/hxa/read.odin +++ b/core/encoding/hxa/read.odin @@ -79,7 +79,6 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato read_meta :: proc(r: ^Reader, capacity: u32le, allocator := context.allocator, loc := #caller_location) -> (meta_data: []Meta, err: Read_Error) { meta_data = make([]Meta, int(capacity), allocator=allocator) count := 0 - defer meta_data = meta_data[:count] for &m in meta_data { m.name = read_name(r) or_return @@ -105,6 +104,7 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato count += 1 } + meta_data = meta_data[:count] return } @@ -112,7 +112,6 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato stack_count := read_value(r, u32le) or_return layer_count := 0 layers = make(Layer_Stack, stack_count, allocator=allocator, loc=loc) - defer layers = layers[:layer_count] for &layer in layers { layer.name = read_name(r) or_return layer.components = read_value(r, u8) or_return @@ -136,6 +135,7 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato layer_count += 1 } + layers = layers[:layer_count] return } diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin index ed6de2f52..2fb507edf 100644 --- a/core/encoding/json/marshal.odin +++ b/core/encoding/json/marshal.odin @@ -108,13 +108,13 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err: if opt.write_uint_as_hex && (opt.spec == .JSON5 || opt.spec == .MJSON) { switch i in a { case u8, u16, u32, u64, u128: - s = strconv.append_bits_128(buf[:], u, 16, info.signed, 8*ti.size, "0123456789abcdef", { .Prefix }) + s = strconv.write_bits_128(buf[:], u, 16, info.signed, 8*ti.size, "0123456789abcdef", { .Prefix }) case: - s = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil) + s = strconv.write_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil) } } else { - s = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil) + s = strconv.write_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil) } io.write_string(w, s) or_return @@ -292,7 +292,7 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err: case runtime.Type_Info_Integer: buf: [40]byte u := cast_any_int_to_u128(ka) - name = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*kti.size, "0123456789", nil) + name = strconv.write_bits_128(buf[:], u, 10, info.signed, 8*kti.size, "0123456789", nil) opt_write_key(w, opt, name) or_return case: return .Unsupported_Type @@ -359,10 +359,10 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err: #partial switch info in ti.variant { case runtime.Type_Info_String: switch x in v { - case string: - return x == "" - case cstring: - return x == nil || x == "" + case string: return x == "" + case cstring: return x == nil || x == "" + case string16: return x == "" + case cstring16: return x == nil || x == "" } case runtime.Type_Info_Any: return v.(any) == nil diff --git a/core/encoding/json/tokenizer.odin b/core/encoding/json/tokenizer.odin index e46d879a7..ad928b7d9 100644 --- a/core/encoding/json/tokenizer.odin +++ b/core/encoding/json/tokenizer.odin @@ -101,7 +101,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { } } - scan_espace :: proc(t: ^Tokenizer) -> bool { + scan_escape :: proc(t: ^Tokenizer) -> bool { switch t.r { case '"', '\'', '\\', '/', 'b', 'n', 'r', 't', 'f': next_rune(t) @@ -310,7 +310,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { break } if r == '\\' { - scan_espace(t) + scan_escape(t) } } diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index 57371e360..0b65adaac 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -117,9 +117,25 @@ assign_int :: proc(val: any, i: $T) -> bool { case uint: dst = uint (i) case uintptr: dst = uintptr(i) case: + is_bit_set_different_endian_to_platform :: proc(ti: ^runtime.Type_Info) -> bool { + if ti == nil { + return false + } + t := runtime.type_info_base(ti) + #partial switch info in t.variant { + case runtime.Type_Info_Integer: + switch info.endianness { + case .Platform: return false + case .Little: return ODIN_ENDIAN != .Little + case .Big: return ODIN_ENDIAN != .Big + } + } + return false + } + ti := type_info_of(v.id) - if _, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok { - do_byte_swap := !reflect.bit_set_is_big_endian(v) + if info, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok { + do_byte_swap := is_bit_set_different_endian_to_platform(info.underlying) switch ti.size * 8 { case 0: // no-op. case 8: @@ -390,6 +406,9 @@ unmarshal_expect_token :: proc(p: ^Parser, kind: Token_Kind, loc := #caller_loca return prev } +// Struct tags can include not only the name of the JSON key, but also a tag such as `omitempty`. +// Example: `json:"key_name,omitempty"` +// This returns the first field as `json_name`, and the rest are returned as `extra`. @(private) json_name_from_tag_value :: proc(value: string) -> (json_name, extra: string) { json_name = value @@ -425,12 +444,6 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm defer delete(key, p.allocator) unmarshal_expect_token(p, .Colon) - - field_test :: #force_inline proc "contextless" (field_used: [^]byte, offset: uintptr) -> bool { - prev_set := field_used[offset/8] & byte(offset&7) != 0 - field_used[offset/8] |= byte(offset&7) - return prev_set - } field_used_bytes := (reflect.size_of_typeid(ti.id)+7)/8 field_used := intrinsics.alloca(field_used_bytes + 1, 1) // + 1 to not overflow on size_of 0 types. @@ -449,7 +462,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm if use_field_idx < 0 { for field, field_idx in fields { - if key == field.name { + tag_value := reflect.struct_tag_get(field.tag, "json") + json_name, _ := json_name_from_tag_value(tag_value) + if json_name == "" && key == field.name { use_field_idx = field_idx break } @@ -470,7 +485,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm } } - if field.name == key || (field.tag != "" && reflect.struct_tag_get(field.tag, "json") == key) { + tag_value := reflect.struct_tag_get(field.tag, "json") + json_name, _ := json_name_from_tag_value(tag_value) + if (json_name == "" && field.name == key) || json_name == key { offset = field.offset type = field.type found = true @@ -492,6 +509,11 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm } if field_found { + field_test :: #force_inline proc "contextless" (field_used: [^]byte, offset: uintptr) -> bool { + prev_set := field_used[offset/8] & byte(offset&7) != 0 + field_used[offset/8] |= byte(offset&7) + return prev_set + } if field_test(field_used, offset) { return .Multiple_Use_Field } @@ -548,7 +570,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm key_ptr: rawptr #partial switch tk in t.key.variant { - case runtime.Type_Info_String: + case runtime.Type_Info_String: + assert(tk.encoding == .UTF_8) + key_ptr = rawptr(&key) key_cstr: cstring if reflect.is_cstring(t.key) { diff --git a/core/encoding/uuid/generation.odin b/core/encoding/uuid/generation.odin index 7c9d4b80c..b210f6a52 100644 --- a/core/encoding/uuid/generation.odin +++ b/core/encoding/uuid/generation.odin @@ -240,7 +240,7 @@ Example: import "core:encoding/uuid" import "core:fmt" - main :: proc() { + generate_v8_hash_bytes_example :: proc() { my_uuid := uuid.generate_v8_hash(uuid.Namespace_DNS, "www.odin-lang.org", .SHA256) my_uuid_string := uuid.to_string(my_uuid, context.temp_allocator) fmt.println(my_uuid_string) @@ -306,7 +306,7 @@ Example: import "core:encoding/uuid" import "core:fmt" - main :: proc() { + generate_v8_hash_string_example :: proc() { my_uuid := uuid.generate_v8_hash(uuid.Namespace_DNS, "www.odin-lang.org", .SHA256) my_uuid_string := uuid.to_string(my_uuid, context.temp_allocator) fmt.println(my_uuid_string) diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin index a2bbaf28e..3ef9a6388 100644 --- a/core/encoding/xml/tokenizer.odin +++ b/core/encoding/xml/tokenizer.odin @@ -16,6 +16,7 @@ package encoding_xml import "core:fmt" import "core:unicode" import "core:unicode/utf8" +import "core:strings" Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any) @@ -121,7 +122,7 @@ default_error_handler :: proc(pos: Pos, msg: string, args: ..any) { error :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) { pos := offset_to_pos(t, offset) if t.err != nil { - t.err(pos, msg, ..args) + t.err(pos=pos, fmt=msg, args=args) } t.error_count += 1 } @@ -268,32 +269,27 @@ scan_comment :: proc(t: ^Tokenizer) -> (comment: string, err: Error) { // Skip CDATA skip_cdata :: proc(t: ^Tokenizer) -> (err: Error) { - if t.read_offset + len(CDATA_START) >= len(t.src) { - // Can't be the start of a CDATA tag. + if s := string(t.src[t.offset:]); !strings.has_prefix(s, CDATA_START) { return .None } - if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START { - t.read_offset += len(CDATA_START) - offset := t.offset + t.read_offset += len(CDATA_START) + offset := t.offset - cdata_scan: for { - advance_rune(t) - if t.ch < 0 { - error(t, offset, "[scan_string] CDATA was not terminated\n") - return .Premature_EOF - } + cdata_scan: for { + advance_rune(t) + if t.ch < 0 { + error(t, offset, "[scan_string] CDATA was not terminated\n") + return .Premature_EOF + } - // Scan until the end of a CDATA tag. - if t.read_offset + len(CDATA_END) < len(t.src) { - if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END { - t.read_offset += len(CDATA_END) - break cdata_scan - } - } + // Scan until the end of a CDATA tag. + if s := string(t.src[t.read_offset:]); strings.has_prefix(s, CDATA_END) { + t.read_offset += len(CDATA_END) + break cdata_scan } } - return + return .None } @(optimization_mode="favor_size") @@ -393,6 +389,8 @@ scan :: proc(t: ^Tokenizer, multiline_string := false) -> Token { case '/': kind = .Slash case '-': kind = .Dash case ':': kind = .Colon + case '[': kind = .Open_Bracket + case ']': kind = .Close_Bracket case '"', '\'': kind = .Invalid diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index b8c8b13a4..707d2b3f3 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -56,7 +56,7 @@ Option_Flag :: enum { Option_Flags :: bit_set[Option_Flag; u16] Document :: struct { - elements: [dynamic]Element, + elements: [dynamic]Element `fmt:"v,element_count"`, element_count: Element_ID, prologue: Attributes, @@ -70,15 +70,15 @@ Document :: struct { // If we encounter comments before the root node, and the option to intern comments is given, this is where they'll live. // Otherwise they'll be in the element tree. - comments: [dynamic]string, + comments: [dynamic]string `fmt:"-"`, // Internal - tokenizer: ^Tokenizer, - allocator: mem.Allocator, + tokenizer: ^Tokenizer `fmt:"-"`, + allocator: mem.Allocator `fmt:"-"`, // Input. Either the original buffer, or a copy if `.Input_May_Be_Modified` isn't specified. - input: []u8, - strings_to_free: [dynamic]string, + input: []u8 `fmt:"-"`, + strings_to_free: [dynamic]string `fmt:"-"`, } Element :: struct { @@ -175,7 +175,7 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha data = bytes.clone(data) } - t := &Tokenizer{} + t := new(Tokenizer) init(t, string(data), path, error_handler) doc = new(Document) @@ -195,7 +195,6 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha loop: for { skip_whitespace(t) - // NOTE(Jeroen): This is faster as a switch. switch t.ch { case '<': // Consume peeked `<` @@ -306,9 +305,17 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha } } + case .Open_Bracket: + // This could be a CDATA tag part of a tag's body. Unread the `<![` + t.offset -= 3 + + // Instead of calling `parse_body` here, we could also `continue loop` + // and fall through to the `case:` at the bottom of the outer loop. + // This makes the intent clearer. + parse_body(doc, element, opts) or_return + case: - error(t, t.offset, "Invalid Token after <!. Expected .Ident, got %#v\n", next) - return + error(t, t.offset, "Unexpected Token after <!: %#v", next) } } else if open.kind == .Question { @@ -341,38 +348,7 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha case: // This should be a tag's body text. - body_text := scan_string(t, t.offset) or_return - needs_processing := .Unbox_CDATA in opts.flags - needs_processing |= .Decode_SGML_Entities in opts.flags - - if !needs_processing { - append(&doc.elements[element].value, body_text) - continue - } - - decode_opts := entity.XML_Decode_Options{} - if .Keep_Tag_Body_Comments not_in opts.flags { - decode_opts += { .Comment_Strip } - } - - if .Decode_SGML_Entities not_in opts.flags { - decode_opts += { .No_Entity_Decode } - } - - if .Unbox_CDATA in opts.flags { - decode_opts += { .Unbox_CDATA } - if .Decode_SGML_Entities in opts.flags { - decode_opts += { .Decode_CDATA } - } - } - - decoded, decode_err := entity.decode_xml(body_text, decode_opts) - if decode_err == .None { - append(&doc.elements[element].value, decoded) - append(&doc.strings_to_free, decoded) - } else { - append(&doc.elements[element].value, body_text) - } + parse_body(doc, element, opts) or_return } } @@ -427,6 +403,7 @@ destroy :: proc(doc: ^Document) { } delete(doc.strings_to_free) + free(doc.tokenizer) free(doc) } @@ -457,8 +434,6 @@ parse_attribute :: proc(doc: ^Document) -> (attr: Attribute, offset: int, err: E t := doc.tokenizer key := expect(t, .Ident) or_return - offset = t.offset - len(key.text) - _ = expect(t, .Eq) or_return value := expect(t, .String, multiline_string=true) or_return @@ -591,6 +566,47 @@ parse_doctype :: proc(doc: ^Document) -> (err: Error) { return .None } +parse_body :: proc(doc: ^Document, element: Element_ID, opts: Options) -> (err: Error) { + assert(doc != nil) + context.allocator = doc.allocator + t := doc.tokenizer + + body_text := scan_string(t, t.offset) or_return + needs_processing := .Unbox_CDATA in opts.flags + needs_processing |= .Decode_SGML_Entities in opts.flags + + if !needs_processing { + append(&doc.elements[element].value, body_text) + return + } + + decode_opts := entity.XML_Decode_Options{} + if .Keep_Tag_Body_Comments not_in opts.flags { + decode_opts += { .Comment_Strip } + } + + if .Decode_SGML_Entities not_in opts.flags { + decode_opts += { .No_Entity_Decode } + } + + if .Unbox_CDATA in opts.flags { + decode_opts += { .Unbox_CDATA } + if .Decode_SGML_Entities in opts.flags { + decode_opts += { .Decode_CDATA } + } + } + + decoded, decode_err := entity.decode_xml(body_text, decode_opts) + if decode_err == .None { + append(&doc.elements[element].value, decoded) + append(&doc.strings_to_free, decoded) + } else { + append(&doc.elements[element].value, body_text) + } + + return +} + Element_ID :: u32 new_element :: proc(doc: ^Document) -> (id: Element_ID) { @@ -609,4 +625,4 @@ new_element :: proc(doc: ^Document) -> (id: Element_ID) { cur := doc.element_count doc.element_count += 1 return cur -} +}
\ No newline at end of file |