aboutsummaryrefslogtreecommitdiff
path: root/core/encoding
diff options
context:
space:
mode:
authorCourtney Strachan <courtney.strachan@gmail.com>2025-10-06 02:41:44 +0100
committerGitHub <noreply@github.com>2025-10-06 02:41:44 +0100
commit6de2d6e8ca687c989bbb7806e5cbe8d791e425bf (patch)
tree03a2e0a84c7c1530215f8e3f59a7f643b39b3677 /core/encoding
parentdbbe96ae5c343f0e803de6ee508207a62571534f (diff)
parent0f97382fa3e46da80705c00dfe02f3deb9562e4f (diff)
Merge branch 'odin-lang:master' into master
Diffstat (limited to 'core/encoding')
-rw-r--r--core/encoding/ansi/ansi.odin137
-rw-r--r--core/encoding/ansi/doc.odin20
-rw-r--r--core/encoding/base32/base32_test.odin1
-rw-r--r--core/encoding/cbor/cbor.odin6
-rw-r--r--core/encoding/cbor/marshal.odin36
-rw-r--r--core/encoding/cbor/tags.odin12
-rw-r--r--core/encoding/cbor/unmarshal.odin35
-rw-r--r--core/encoding/csv/doc.odin2
-rw-r--r--core/encoding/csv/reader.odin2
-rw-r--r--core/encoding/entity/entity.odin54
-rw-r--r--core/encoding/hxa/read.odin4
-rw-r--r--core/encoding/json/marshal.odin16
-rw-r--r--core/encoding/json/tokenizer.odin4
-rw-r--r--core/encoding/json/unmarshal.odin46
-rw-r--r--core/encoding/uuid/generation.odin4
-rw-r--r--core/encoding/xml/tokenizer.odin38
-rw-r--r--core/encoding/xml/xml_reader.odin106
17 files changed, 236 insertions, 287 deletions
diff --git a/core/encoding/ansi/ansi.odin b/core/encoding/ansi/ansi.odin
deleted file mode 100644
index 5550a1671..000000000
--- a/core/encoding/ansi/ansi.odin
+++ /dev/null
@@ -1,137 +0,0 @@
-package ansi
-
-BEL :: "\a" // Bell
-BS :: "\b" // Backspace
-ESC :: "\e" // Escape
-
-// Fe Escape sequences
-
-CSI :: ESC + "[" // Control Sequence Introducer
-OSC :: ESC + "]" // Operating System Command
-ST :: ESC + "\\" // String Terminator
-
-// CSI sequences
-
-CUU :: "A" // Cursor Up
-CUD :: "B" // Cursor Down
-CUF :: "C" // Cursor Forward
-CUB :: "D" // Cursor Back
-CNL :: "E" // Cursor Next Line
-CPL :: "F" // Cursor Previous Line
-CHA :: "G" // Cursor Horizontal Absolute
-CUP :: "H" // Cursor Position
-ED :: "J" // Erase in Display
-EL :: "K" // Erase in Line
-SU :: "S" // Scroll Up
-SD :: "T" // Scroll Down
-HVP :: "f" // Horizontal Vertical Position
-SGR :: "m" // Select Graphic Rendition
-AUX_ON :: "5i" // AUX Port On
-AUX_OFF :: "4i" // AUX Port Off
-DSR :: "6n" // Device Status Report
-
-// CSI: private sequences
-
-SCP :: "s" // Save Current Cursor Position
-RCP :: "u" // Restore Saved Cursor Position
-DECAWM_ON :: "?7h" // Auto Wrap Mode (Enabled)
-DECAWM_OFF :: "?7l" // Auto Wrap Mode (Disabled)
-DECTCEM_SHOW :: "?25h" // Text Cursor Enable Mode (Visible)
-DECTCEM_HIDE :: "?25l" // Text Cursor Enable Mode (Invisible)
-
-// SGR sequences
-
-RESET :: "0"
-BOLD :: "1"
-FAINT :: "2"
-ITALIC :: "3" // Not widely supported.
-UNDERLINE :: "4"
-BLINK_SLOW :: "5"
-BLINK_RAPID :: "6" // Not widely supported.
-INVERT :: "7" // Also known as reverse video.
-HIDE :: "8" // Not widely supported.
-STRIKE :: "9"
-FONT_PRIMARY :: "10"
-FONT_ALT1 :: "11"
-FONT_ALT2 :: "12"
-FONT_ALT3 :: "13"
-FONT_ALT4 :: "14"
-FONT_ALT5 :: "15"
-FONT_ALT6 :: "16"
-FONT_ALT7 :: "17"
-FONT_ALT8 :: "18"
-FONT_ALT9 :: "19"
-FONT_FRAKTUR :: "20" // Rarely supported.
-UNDERLINE_DOUBLE :: "21" // May be interpreted as "disable bold."
-NO_BOLD_FAINT :: "22"
-NO_ITALIC_BLACKLETTER :: "23"
-NO_UNDERLINE :: "24"
-NO_BLINK :: "25"
-PROPORTIONAL_SPACING :: "26"
-NO_REVERSE :: "27"
-NO_HIDE :: "28"
-NO_STRIKE :: "29"
-
-FG_BLACK :: "30"
-FG_RED :: "31"
-FG_GREEN :: "32"
-FG_YELLOW :: "33"
-FG_BLUE :: "34"
-FG_MAGENTA :: "35"
-FG_CYAN :: "36"
-FG_WHITE :: "37"
-FG_COLOR :: "38"
-FG_COLOR_8_BIT :: "38;5" // Followed by ";n" where n is in 0..=255
-FG_COLOR_24_BIT :: "38;2" // Followed by ";r;g;b" where r,g,b are in 0..=255
-FG_DEFAULT :: "39"
-
-BG_BLACK :: "40"
-BG_RED :: "41"
-BG_GREEN :: "42"
-BG_YELLOW :: "43"
-BG_BLUE :: "44"
-BG_MAGENTA :: "45"
-BG_CYAN :: "46"
-BG_WHITE :: "47"
-BG_COLOR :: "48"
-BG_COLOR_8_BIT :: "48;5" // Followed by ";n" where n is in 0..=255
-BG_COLOR_24_BIT :: "48;2" // Followed by ";r;g;b" where r,g,b are in 0..=255
-BG_DEFAULT :: "49"
-
-NO_PROPORTIONAL_SPACING :: "50"
-FRAMED :: "51"
-ENCIRCLED :: "52"
-OVERLINED :: "53"
-NO_FRAME_ENCIRCLE :: "54"
-NO_OVERLINE :: "55"
-
-// SGR: non-standard bright colors
-
-FG_BRIGHT_BLACK :: "90" // Also known as grey.
-FG_BRIGHT_RED :: "91"
-FG_BRIGHT_GREEN :: "92"
-FG_BRIGHT_YELLOW :: "93"
-FG_BRIGHT_BLUE :: "94"
-FG_BRIGHT_MAGENTA :: "95"
-FG_BRIGHT_CYAN :: "96"
-FG_BRIGHT_WHITE :: "97"
-
-BG_BRIGHT_BLACK :: "100" // Also known as grey.
-BG_BRIGHT_RED :: "101"
-BG_BRIGHT_GREEN :: "102"
-BG_BRIGHT_YELLOW :: "103"
-BG_BRIGHT_BLUE :: "104"
-BG_BRIGHT_MAGENTA :: "105"
-BG_BRIGHT_CYAN :: "106"
-BG_BRIGHT_WHITE :: "107"
-
-// Fp Escape sequences
-
-DECSC :: ESC + "7" // DEC Save Cursor
-DECRC :: ESC + "8" // DEC Restore Cursor
-
-// OSC sequences
-
-WINDOW_TITLE :: "2" // Followed by ";<text>" ST.
-HYPERLINK :: "8" // Followed by ";[params];<URI>" ST. Closed by OSC HYPERLINK ";;" ST.
-CLIPBOARD :: "52" // Followed by ";c;<Base64-encoded string>" ST.
diff --git a/core/encoding/ansi/doc.odin b/core/encoding/ansi/doc.odin
deleted file mode 100644
index 966e6be00..000000000
--- a/core/encoding/ansi/doc.odin
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
-package ansi implements constant references to many widely-supported ANSI
-escape codes, primarily used in terminal emulators for enhanced graphics, such
-as colors, text styling, and animated displays.
-
-For example, you can print out a line of cyan text like this:
- fmt.println(ansi.CSI + ansi.FG_CYAN + ansi.SGR + "Hellope!" + ansi.CSI + ansi.RESET + ansi.SGR)
-
-Multiple SGR (Select Graphic Rendition) codes can be joined by semicolons:
- fmt.println(ansi.CSI + ansi.BOLD + ";" + ansi.FG_BLUE + ansi.SGR + "Hellope!" + ansi.CSI + ansi.RESET + ansi.SGR)
-
-If your terminal supports 24-bit true color mode, you can also do this:
- fmt.println(ansi.CSI + ansi.FG_COLOR_24_BIT + ";0;255;255" + ansi.SGR + "Hellope!" + ansi.CSI + ansi.RESET + ansi.SGR)
-
-For more information, see:
-- [[ https://en.wikipedia.org/wiki/ANSI_escape_code ]]
-- [[ https://www.vt100.net/docs/vt102-ug/chapter5.html ]]
-- [[ https://invisible-island.net/xterm/ctlseqs/ctlseqs.html ]]
-*/
-package ansi
diff --git a/core/encoding/base32/base32_test.odin b/core/encoding/base32/base32_test.odin
index ea41ae36f..07d5c8080 100644
--- a/core/encoding/base32/base32_test.odin
+++ b/core/encoding/base32/base32_test.odin
@@ -1,3 +1,4 @@
+#+test
package encoding_base32
import "core:testing"
diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin
index 8eb829ed3..1fb7c34ab 100644
--- a/core/encoding/cbor/cbor.odin
+++ b/core/encoding/cbor/cbor.odin
@@ -385,17 +385,17 @@ to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> i
// which we want for the diagnostic format.
case f16:
buf: [64]byte
- str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f16), 8*size_of(f16))
+ str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f16), 8*size_of(f16))
if str[0] == '+' && str != "+Inf" { str = str[1:] }
io.write_string(w, str) or_return
case f32:
buf: [128]byte
- str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f32), 8*size_of(f32))
+ str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f32), 8*size_of(f32))
if str[0] == '+' && str != "+Inf" { str = str[1:] }
io.write_string(w, str) or_return
case f64:
buf: [256]byte
- str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f64), 8*size_of(f64))
+ str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f64), 8*size_of(f64))
if str[0] == '+' && str != "+Inf" { str = str[1:] }
io.write_string(w, str) or_return
diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin
index aca71deb2..b23087c90 100644
--- a/core/encoding/cbor/marshal.odin
+++ b/core/encoding/cbor/marshal.odin
@@ -612,6 +612,42 @@ _marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (er
case:
panic("unknown bit_size size")
}
+ case runtime.Type_Info_Matrix:
+ count := info.column_count * info.elem_stride
+ err_conv(_encode_u64(e, u64(count), .Array)) or_return
+
+ if impl, ok := _tag_implementations_type[info.elem.id]; ok {
+ for i in 0..<count {
+ data := uintptr(v.data) + uintptr(i*info.elem_size)
+ impl->marshal(e, any{rawptr(data), info.elem.id}) or_return
+ }
+ return
+ }
+
+ elem_ti := runtime.type_info_core(type_info_of(info.elem.id))
+ for i in 0..<count {
+ data := uintptr(v.data) + uintptr(i*info.elem_size)
+ _marshal_into_encoder(e, any{rawptr(data), info.elem.id}, elem_ti) or_return
+ }
+ return
+
+ case runtime.Type_Info_Simd_Vector:
+ err_conv(_encode_u64(e, u64(info.count), .Array)) or_return
+
+ if impl, ok := _tag_implementations_type[info.elem.id]; ok {
+ for i in 0..<info.count {
+ data := uintptr(v.data) + uintptr(i*info.elem_size)
+ impl->marshal(e, any{rawptr(data), info.elem.id}) or_return
+ }
+ return
+ }
+
+ elem_ti := runtime.type_info_core(type_info_of(info.elem.id))
+ for i in 0..<info.count {
+ data := uintptr(v.data) + uintptr(i*info.elem_size)
+ _marshal_into_encoder(e, any{rawptr(data), info.elem.id}, elem_ti) or_return
+ }
+ return
}
return _unsupported(v.id, nil)
diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin
index 17420af46..be07b926a 100644
--- a/core/encoding/cbor/tags.odin
+++ b/core/encoding/cbor/tags.odin
@@ -82,14 +82,16 @@ _tag_implementations_id: map[string]Tag_Implementation
_tag_implementations_type: map[typeid]Tag_Implementation
// Register a custom tag implementation to be used when marshalling that type and unmarshalling that tag number.
-tag_register_type :: proc(impl: Tag_Implementation, nr: Tag_Number, type: typeid) {
+tag_register_type :: proc "contextless" (impl: Tag_Implementation, nr: Tag_Number, type: typeid) {
+ context = runtime.default_context()
_tag_implementations_nr[nr] = impl
_tag_implementations_type[type] = impl
}
// Register a custom tag implementation to be used when marshalling that tag number or marshalling
// a field with the struct tag `cbor_tag:"nr"`.
-tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string) {
+tag_register_number :: proc "contextless" (impl: Tag_Implementation, nr: Tag_Number, id: string) {
+ context = runtime.default_context()
_tag_implementations_nr[nr] = impl
_tag_implementations_id[id] = impl
}
@@ -98,13 +100,13 @@ tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string
INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, !ODIN_DEFAULT_TO_PANIC_ALLOCATOR && !ODIN_DEFAULT_TO_NIL_ALLOCATOR)
@(private, init, disabled=!INITIALIZE_DEFAULT_TAGS)
-tags_initialize_defaults :: proc() {
+tags_initialize_defaults :: proc "contextless" () {
tags_register_defaults()
}
// Registers tags that have implementations provided by this package.
// This is done by default and can be controlled with the `CBOR_INITIALIZE_DEFAULT_TAGS` define.
-tags_register_defaults :: proc() {
+tags_register_defaults :: proc "contextless" () {
tag_register_number({nil, tag_time_unmarshal, tag_time_marshal}, TAG_EPOCH_TIME_NR, TAG_EPOCH_TIME_ID)
tag_register_number({nil, tag_base64_unmarshal, tag_base64_marshal}, TAG_BASE64_NR, TAG_BASE64_ID)
tag_register_number({nil, tag_cbor_unmarshal, tag_cbor_marshal}, TAG_CBOR_NR, TAG_CBOR_ID)
@@ -298,7 +300,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number,
#partial switch t in ti.variant {
case reflect.Type_Info_String:
-
+ assert(t.encoding == .UTF_8)
if t.is_cstring {
length := base64.decoded_len(bytes)
builder := strings.builder_make(0, length+1)
diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin
index c39255d9d..043b2ec60 100644
--- a/core/encoding/cbor/unmarshal.odin
+++ b/core/encoding/cbor/unmarshal.odin
@@ -29,6 +29,7 @@ an input.
unmarshal :: proc {
unmarshal_from_reader,
unmarshal_from_string,
+ unmarshal_from_bytes,
}
unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator, temp_allocator := context.temp_allocator, loc := #caller_location) -> (err: Unmarshal_Error) {
@@ -51,6 +52,11 @@ unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, all
return
}
+// Unmarshals from a slice of bytes, see docs on the proc group `Unmarshal` for more info.
+unmarshal_from_bytes :: proc(bytes: []byte, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator, temp_allocator := context.temp_allocator, loc := #caller_location) -> (err: Unmarshal_Error) {
+ return unmarshal_from_string(string(bytes), ptr, flags, allocator, temp_allocator, loc)
+}
+
unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator, temp_allocator := context.temp_allocator, loc := #caller_location) -> (err: Unmarshal_Error) {
d := d
@@ -329,6 +335,8 @@ _unmarshal_value :: proc(d: Decoder, v: any, hdr: Header, allocator := context.a
_unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add, allocator := context.allocator, loc := #caller_location) -> (err: Unmarshal_Error) {
#partial switch t in ti.variant {
case reflect.Type_Info_String:
+ assert(t.encoding == .UTF_8)
+
bytes := err_conv(_decode_bytes(d, add, allocator=allocator, loc=loc)) or_return
if t.is_cstring {
@@ -487,7 +495,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header
data := mem.alloc_bytes_non_zeroed(t.elem.size * scap, t.elem.align, allocator=allocator, loc=loc) or_return
defer if err != nil { mem.free_bytes(data, allocator=allocator, loc=loc) }
- da := mem.Raw_Dynamic_Array{raw_data(data), 0, length, context.allocator }
+ da := mem.Raw_Dynamic_Array{raw_data(data), 0, scap, context.allocator }
assign_array(d, &da, t.elem, length) or_return
@@ -585,6 +593,31 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header
if out_of_space { return _unsupported(v, hdr) }
return
+ case reflect.Type_Info_Matrix:
+ count := t.column_count * t.elem_stride
+ length, _ := err_conv(_decode_len_container(d, add)) or_return
+ if length > count {
+ return _unsupported(v, hdr)
+ }
+
+ da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, allocator }
+
+ out_of_space := assign_array(d, &da, t.elem, length, growable=false) or_return
+ if out_of_space { return _unsupported(v, hdr) }
+ return
+
+ case reflect.Type_Info_Simd_Vector:
+ length, _ := err_conv(_decode_len_container(d, add)) or_return
+ if length > t.count {
+ return _unsupported(v, hdr)
+ }
+
+ da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, allocator }
+
+ out_of_space := assign_array(d, &da, t.elem, length, growable=false) or_return
+ if out_of_space { return _unsupported(v, hdr) }
+ return
+
case: return _unsupported(v, hdr)
}
}
diff --git a/core/encoding/csv/doc.odin b/core/encoding/csv/doc.odin
index bfeadafd6..7abe2be49 100644
--- a/core/encoding/csv/doc.odin
+++ b/core/encoding/csv/doc.odin
@@ -63,8 +63,6 @@ Example:
read_csv_from_string :: proc(filename: string) {
r: csv.Reader
r.trim_leading_space = true
- r.reuse_record = true // Without it you have to delete(record)
- r.reuse_record_buffer = true // Without it you have to each of the fields within it
defer csv.reader_destroy(&r)
csv_data, ok := os.read_entire_file(filename)
diff --git a/core/encoding/csv/reader.odin b/core/encoding/csv/reader.odin
index 5348624d5..577ef219d 100644
--- a/core/encoding/csv/reader.odin
+++ b/core/encoding/csv/reader.odin
@@ -130,7 +130,7 @@ reader_destroy :: proc(r: ^Reader) {
for record, row_idx in csv.iterator_next(&r) { ... }
TIP: If you process the results within the loop and don't need to own the results,
- you can set the Reader's `reuse_record` and `reuse_record_reuse_record_buffer` to true;
+ you can set the Reader's `reuse_record` and `reuse_record_buffer` to true;
you won't need to delete the record or its fields.
*/
iterator_next :: proc(r: ^Reader) -> (record: []string, idx: int, err: Error, more: bool) {
diff --git a/core/encoding/entity/entity.odin b/core/encoding/entity/entity.odin
index d2f1d46b2..cb8fa8611 100644
--- a/core/encoding/entity/entity.odin
+++ b/core/encoding/entity/entity.odin
@@ -108,7 +108,7 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
it couldn't have been part of an XML tag body to be decoded here.
Keep in mind that we could already *be* inside a CDATA tag.
- If so, write `>` as a literal and continue.
+ If so, write `<` as a literal and continue.
*/
if in_data {
write_rune(&builder, '<')
@@ -119,11 +119,9 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
case ']':
// If we're unboxing _and_ decoding CDATA, we'll have to check for the end tag.
if in_data {
- if t.read_offset + len(CDATA_END) < len(t.src) {
- if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END {
- in_data = false
- t.read_offset += len(CDATA_END) - 1
- }
+ if strings.has_prefix(t.src[t.offset:], CDATA_END) {
+ in_data = false
+ t.read_offset += len(CDATA_END) - 1
}
continue
} else {
@@ -297,40 +295,40 @@ _handle_xml_special :: proc(t: ^Tokenizer, builder: ^strings.Builder, options: X
assert(t != nil && t.r == '<')
if t.read_offset + len(CDATA_START) >= len(t.src) { return false, .None }
- if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START {
- t.read_offset += len(CDATA_START) - 1
-
+ s := string(t.src[t.offset:])
+ if strings.has_prefix(s, CDATA_START) {
if .Unbox_CDATA in options && .Decode_CDATA in options {
// We're unboxing _and_ decoding CDATA
+ t.read_offset += len(CDATA_START) - 1
return true, .None
}
- // CDATA is passed through.
- offset := t.offset
-
- // Scan until end of CDATA.
+ // CDATA is passed through. Scan until end of CDATA.
+ start_offset := t.offset
+ t.read_offset += len(CDATA_START)
for {
- advance(t) or_return
- if t.r < 0 { return true, .CDATA_Not_Terminated }
-
- if t.read_offset + len(CDATA_END) < len(t.src) {
- if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END {
- t.read_offset += len(CDATA_END) - 1
+ advance(t)
+ if t.r < 0 {
+ // error(t, offset, "[scan_string] CDATA was not terminated\n")
+ return true, .CDATA_Not_Terminated
+ }
- cdata := string(t.src[offset : t.read_offset])
-
- if .Unbox_CDATA in options {
- cdata = cdata[len(CDATA_START):]
- cdata = cdata[:len(cdata) - len(CDATA_END)]
- }
+ // Scan until the end of a CDATA tag.
+ if s = string(t.src[t.read_offset:]); strings.has_prefix(s, CDATA_END) {
+ t.read_offset += len(CDATA_END)
+ cdata := string(t.src[start_offset:t.read_offset])
- write_string(builder, cdata)
- return false, .None
+ if .Unbox_CDATA in options {
+ cdata = cdata[len(CDATA_START):]
+ cdata = cdata[:len(cdata) - len(CDATA_END)]
}
+ write_string(builder, cdata)
+ return false, .None
}
}
- } else if string(t.src[t.offset:][:len(COMMENT_START)]) == COMMENT_START {
+
+ } else if strings.has_prefix(s, COMMENT_START) {
t.read_offset += len(COMMENT_START)
// Comment is passed through by default.
offset := t.offset
diff --git a/core/encoding/hxa/read.odin b/core/encoding/hxa/read.odin
index a679946f8..6dde16848 100644
--- a/core/encoding/hxa/read.odin
+++ b/core/encoding/hxa/read.odin
@@ -79,7 +79,6 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato
read_meta :: proc(r: ^Reader, capacity: u32le, allocator := context.allocator, loc := #caller_location) -> (meta_data: []Meta, err: Read_Error) {
meta_data = make([]Meta, int(capacity), allocator=allocator)
count := 0
- defer meta_data = meta_data[:count]
for &m in meta_data {
m.name = read_name(r) or_return
@@ -105,6 +104,7 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato
count += 1
}
+ meta_data = meta_data[:count]
return
}
@@ -112,7 +112,6 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato
stack_count := read_value(r, u32le) or_return
layer_count := 0
layers = make(Layer_Stack, stack_count, allocator=allocator, loc=loc)
- defer layers = layers[:layer_count]
for &layer in layers {
layer.name = read_name(r) or_return
layer.components = read_value(r, u8) or_return
@@ -136,6 +135,7 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato
layer_count += 1
}
+ layers = layers[:layer_count]
return
}
diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin
index ed6de2f52..2fb507edf 100644
--- a/core/encoding/json/marshal.odin
+++ b/core/encoding/json/marshal.odin
@@ -108,13 +108,13 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
if opt.write_uint_as_hex && (opt.spec == .JSON5 || opt.spec == .MJSON) {
switch i in a {
case u8, u16, u32, u64, u128:
- s = strconv.append_bits_128(buf[:], u, 16, info.signed, 8*ti.size, "0123456789abcdef", { .Prefix })
+ s = strconv.write_bits_128(buf[:], u, 16, info.signed, 8*ti.size, "0123456789abcdef", { .Prefix })
case:
- s = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil)
+ s = strconv.write_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil)
}
} else {
- s = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil)
+ s = strconv.write_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil)
}
io.write_string(w, s) or_return
@@ -292,7 +292,7 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
case runtime.Type_Info_Integer:
buf: [40]byte
u := cast_any_int_to_u128(ka)
- name = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*kti.size, "0123456789", nil)
+ name = strconv.write_bits_128(buf[:], u, 10, info.signed, 8*kti.size, "0123456789", nil)
opt_write_key(w, opt, name) or_return
case: return .Unsupported_Type
@@ -359,10 +359,10 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
#partial switch info in ti.variant {
case runtime.Type_Info_String:
switch x in v {
- case string:
- return x == ""
- case cstring:
- return x == nil || x == ""
+ case string: return x == ""
+ case cstring: return x == nil || x == ""
+ case string16: return x == ""
+ case cstring16: return x == nil || x == ""
}
case runtime.Type_Info_Any:
return v.(any) == nil
diff --git a/core/encoding/json/tokenizer.odin b/core/encoding/json/tokenizer.odin
index e46d879a7..ad928b7d9 100644
--- a/core/encoding/json/tokenizer.odin
+++ b/core/encoding/json/tokenizer.odin
@@ -101,7 +101,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
}
}
- scan_espace :: proc(t: ^Tokenizer) -> bool {
+ scan_escape :: proc(t: ^Tokenizer) -> bool {
switch t.r {
case '"', '\'', '\\', '/', 'b', 'n', 'r', 't', 'f':
next_rune(t)
@@ -310,7 +310,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
break
}
if r == '\\' {
- scan_espace(t)
+ scan_escape(t)
}
}
diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin
index 57371e360..0b65adaac 100644
--- a/core/encoding/json/unmarshal.odin
+++ b/core/encoding/json/unmarshal.odin
@@ -117,9 +117,25 @@ assign_int :: proc(val: any, i: $T) -> bool {
case uint: dst = uint (i)
case uintptr: dst = uintptr(i)
case:
+ is_bit_set_different_endian_to_platform :: proc(ti: ^runtime.Type_Info) -> bool {
+ if ti == nil {
+ return false
+ }
+ t := runtime.type_info_base(ti)
+ #partial switch info in t.variant {
+ case runtime.Type_Info_Integer:
+ switch info.endianness {
+ case .Platform: return false
+ case .Little: return ODIN_ENDIAN != .Little
+ case .Big: return ODIN_ENDIAN != .Big
+ }
+ }
+ return false
+ }
+
ti := type_info_of(v.id)
- if _, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok {
- do_byte_swap := !reflect.bit_set_is_big_endian(v)
+ if info, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok {
+ do_byte_swap := is_bit_set_different_endian_to_platform(info.underlying)
switch ti.size * 8 {
case 0: // no-op.
case 8:
@@ -390,6 +406,9 @@ unmarshal_expect_token :: proc(p: ^Parser, kind: Token_Kind, loc := #caller_loca
return prev
}
+// Struct tags can include not only the name of the JSON key, but also a tag such as `omitempty`.
+// Example: `json:"key_name,omitempty"`
+// This returns the first field as `json_name`, and the rest are returned as `extra`.
@(private)
json_name_from_tag_value :: proc(value: string) -> (json_name, extra: string) {
json_name = value
@@ -425,12 +444,6 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
defer delete(key, p.allocator)
unmarshal_expect_token(p, .Colon)
-
- field_test :: #force_inline proc "contextless" (field_used: [^]byte, offset: uintptr) -> bool {
- prev_set := field_used[offset/8] & byte(offset&7) != 0
- field_used[offset/8] |= byte(offset&7)
- return prev_set
- }
field_used_bytes := (reflect.size_of_typeid(ti.id)+7)/8
field_used := intrinsics.alloca(field_used_bytes + 1, 1) // + 1 to not overflow on size_of 0 types.
@@ -449,7 +462,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
if use_field_idx < 0 {
for field, field_idx in fields {
- if key == field.name {
+ tag_value := reflect.struct_tag_get(field.tag, "json")
+ json_name, _ := json_name_from_tag_value(tag_value)
+ if json_name == "" && key == field.name {
use_field_idx = field_idx
break
}
@@ -470,7 +485,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
}
}
- if field.name == key || (field.tag != "" && reflect.struct_tag_get(field.tag, "json") == key) {
+ tag_value := reflect.struct_tag_get(field.tag, "json")
+ json_name, _ := json_name_from_tag_value(tag_value)
+ if (json_name == "" && field.name == key) || json_name == key {
offset = field.offset
type = field.type
found = true
@@ -492,6 +509,11 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
}
if field_found {
+ field_test :: #force_inline proc "contextless" (field_used: [^]byte, offset: uintptr) -> bool {
+ prev_set := field_used[offset/8] & byte(offset&7) != 0
+ field_used[offset/8] |= byte(offset&7)
+ return prev_set
+ }
if field_test(field_used, offset) {
return .Multiple_Use_Field
}
@@ -548,7 +570,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
key_ptr: rawptr
#partial switch tk in t.key.variant {
- case runtime.Type_Info_String:
+ case runtime.Type_Info_String:
+ assert(tk.encoding == .UTF_8)
+
key_ptr = rawptr(&key)
key_cstr: cstring
if reflect.is_cstring(t.key) {
diff --git a/core/encoding/uuid/generation.odin b/core/encoding/uuid/generation.odin
index 7c9d4b80c..b210f6a52 100644
--- a/core/encoding/uuid/generation.odin
+++ b/core/encoding/uuid/generation.odin
@@ -240,7 +240,7 @@ Example:
import "core:encoding/uuid"
import "core:fmt"
- main :: proc() {
+ generate_v8_hash_bytes_example :: proc() {
my_uuid := uuid.generate_v8_hash(uuid.Namespace_DNS, "www.odin-lang.org", .SHA256)
my_uuid_string := uuid.to_string(my_uuid, context.temp_allocator)
fmt.println(my_uuid_string)
@@ -306,7 +306,7 @@ Example:
import "core:encoding/uuid"
import "core:fmt"
- main :: proc() {
+ generate_v8_hash_string_example :: proc() {
my_uuid := uuid.generate_v8_hash(uuid.Namespace_DNS, "www.odin-lang.org", .SHA256)
my_uuid_string := uuid.to_string(my_uuid, context.temp_allocator)
fmt.println(my_uuid_string)
diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin
index a2bbaf28e..3ef9a6388 100644
--- a/core/encoding/xml/tokenizer.odin
+++ b/core/encoding/xml/tokenizer.odin
@@ -16,6 +16,7 @@ package encoding_xml
import "core:fmt"
import "core:unicode"
import "core:unicode/utf8"
+import "core:strings"
Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any)
@@ -121,7 +122,7 @@ default_error_handler :: proc(pos: Pos, msg: string, args: ..any) {
error :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) {
pos := offset_to_pos(t, offset)
if t.err != nil {
- t.err(pos, msg, ..args)
+ t.err(pos=pos, fmt=msg, args=args)
}
t.error_count += 1
}
@@ -268,32 +269,27 @@ scan_comment :: proc(t: ^Tokenizer) -> (comment: string, err: Error) {
// Skip CDATA
skip_cdata :: proc(t: ^Tokenizer) -> (err: Error) {
- if t.read_offset + len(CDATA_START) >= len(t.src) {
- // Can't be the start of a CDATA tag.
+ if s := string(t.src[t.offset:]); !strings.has_prefix(s, CDATA_START) {
return .None
}
- if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START {
- t.read_offset += len(CDATA_START)
- offset := t.offset
+ t.read_offset += len(CDATA_START)
+ offset := t.offset
- cdata_scan: for {
- advance_rune(t)
- if t.ch < 0 {
- error(t, offset, "[scan_string] CDATA was not terminated\n")
- return .Premature_EOF
- }
+ cdata_scan: for {
+ advance_rune(t)
+ if t.ch < 0 {
+ error(t, offset, "[scan_string] CDATA was not terminated\n")
+ return .Premature_EOF
+ }
- // Scan until the end of a CDATA tag.
- if t.read_offset + len(CDATA_END) < len(t.src) {
- if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END {
- t.read_offset += len(CDATA_END)
- break cdata_scan
- }
- }
+ // Scan until the end of a CDATA tag.
+ if s := string(t.src[t.read_offset:]); strings.has_prefix(s, CDATA_END) {
+ t.read_offset += len(CDATA_END)
+ break cdata_scan
}
}
- return
+ return .None
}
@(optimization_mode="favor_size")
@@ -393,6 +389,8 @@ scan :: proc(t: ^Tokenizer, multiline_string := false) -> Token {
case '/': kind = .Slash
case '-': kind = .Dash
case ':': kind = .Colon
+ case '[': kind = .Open_Bracket
+ case ']': kind = .Close_Bracket
case '"', '\'':
kind = .Invalid
diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin
index b8c8b13a4..707d2b3f3 100644
--- a/core/encoding/xml/xml_reader.odin
+++ b/core/encoding/xml/xml_reader.odin
@@ -56,7 +56,7 @@ Option_Flag :: enum {
Option_Flags :: bit_set[Option_Flag; u16]
Document :: struct {
- elements: [dynamic]Element,
+ elements: [dynamic]Element `fmt:"v,element_count"`,
element_count: Element_ID,
prologue: Attributes,
@@ -70,15 +70,15 @@ Document :: struct {
// If we encounter comments before the root node, and the option to intern comments is given, this is where they'll live.
// Otherwise they'll be in the element tree.
- comments: [dynamic]string,
+ comments: [dynamic]string `fmt:"-"`,
// Internal
- tokenizer: ^Tokenizer,
- allocator: mem.Allocator,
+ tokenizer: ^Tokenizer `fmt:"-"`,
+ allocator: mem.Allocator `fmt:"-"`,
// Input. Either the original buffer, or a copy if `.Input_May_Be_Modified` isn't specified.
- input: []u8,
- strings_to_free: [dynamic]string,
+ input: []u8 `fmt:"-"`,
+ strings_to_free: [dynamic]string `fmt:"-"`,
}
Element :: struct {
@@ -175,7 +175,7 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
data = bytes.clone(data)
}
- t := &Tokenizer{}
+ t := new(Tokenizer)
init(t, string(data), path, error_handler)
doc = new(Document)
@@ -195,7 +195,6 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
loop: for {
skip_whitespace(t)
- // NOTE(Jeroen): This is faster as a switch.
switch t.ch {
case '<':
// Consume peeked `<`
@@ -306,9 +305,17 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
}
}
+ case .Open_Bracket:
+ // This could be a CDATA tag part of a tag's body. Unread the `<![`
+ t.offset -= 3
+
+ // Instead of calling `parse_body` here, we could also `continue loop`
+ // and fall through to the `case:` at the bottom of the outer loop.
+ // This makes the intent clearer.
+ parse_body(doc, element, opts) or_return
+
case:
- error(t, t.offset, "Invalid Token after <!. Expected .Ident, got %#v\n", next)
- return
+ error(t, t.offset, "Unexpected Token after <!: %#v", next)
}
} else if open.kind == .Question {
@@ -341,38 +348,7 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
case:
// This should be a tag's body text.
- body_text := scan_string(t, t.offset) or_return
- needs_processing := .Unbox_CDATA in opts.flags
- needs_processing |= .Decode_SGML_Entities in opts.flags
-
- if !needs_processing {
- append(&doc.elements[element].value, body_text)
- continue
- }
-
- decode_opts := entity.XML_Decode_Options{}
- if .Keep_Tag_Body_Comments not_in opts.flags {
- decode_opts += { .Comment_Strip }
- }
-
- if .Decode_SGML_Entities not_in opts.flags {
- decode_opts += { .No_Entity_Decode }
- }
-
- if .Unbox_CDATA in opts.flags {
- decode_opts += { .Unbox_CDATA }
- if .Decode_SGML_Entities in opts.flags {
- decode_opts += { .Decode_CDATA }
- }
- }
-
- decoded, decode_err := entity.decode_xml(body_text, decode_opts)
- if decode_err == .None {
- append(&doc.elements[element].value, decoded)
- append(&doc.strings_to_free, decoded)
- } else {
- append(&doc.elements[element].value, body_text)
- }
+ parse_body(doc, element, opts) or_return
}
}
@@ -427,6 +403,7 @@ destroy :: proc(doc: ^Document) {
}
delete(doc.strings_to_free)
+ free(doc.tokenizer)
free(doc)
}
@@ -457,8 +434,6 @@ parse_attribute :: proc(doc: ^Document) -> (attr: Attribute, offset: int, err: E
t := doc.tokenizer
key := expect(t, .Ident) or_return
- offset = t.offset - len(key.text)
-
_ = expect(t, .Eq) or_return
value := expect(t, .String, multiline_string=true) or_return
@@ -591,6 +566,47 @@ parse_doctype :: proc(doc: ^Document) -> (err: Error) {
return .None
}
+parse_body :: proc(doc: ^Document, element: Element_ID, opts: Options) -> (err: Error) {
+ assert(doc != nil)
+ context.allocator = doc.allocator
+ t := doc.tokenizer
+
+ body_text := scan_string(t, t.offset) or_return
+ needs_processing := .Unbox_CDATA in opts.flags
+ needs_processing |= .Decode_SGML_Entities in opts.flags
+
+ if !needs_processing {
+ append(&doc.elements[element].value, body_text)
+ return
+ }
+
+ decode_opts := entity.XML_Decode_Options{}
+ if .Keep_Tag_Body_Comments not_in opts.flags {
+ decode_opts += { .Comment_Strip }
+ }
+
+ if .Decode_SGML_Entities not_in opts.flags {
+ decode_opts += { .No_Entity_Decode }
+ }
+
+ if .Unbox_CDATA in opts.flags {
+ decode_opts += { .Unbox_CDATA }
+ if .Decode_SGML_Entities in opts.flags {
+ decode_opts += { .Decode_CDATA }
+ }
+ }
+
+ decoded, decode_err := entity.decode_xml(body_text, decode_opts)
+ if decode_err == .None {
+ append(&doc.elements[element].value, decoded)
+ append(&doc.strings_to_free, decoded)
+ } else {
+ append(&doc.elements[element].value, body_text)
+ }
+
+ return
+}
+
Element_ID :: u32
new_element :: proc(doc: ^Document) -> (id: Element_ID) {
@@ -609,4 +625,4 @@ new_element :: proc(doc: ^Document) -> (id: Element_ID) {
cur := doc.element_count
doc.element_count += 1
return cur
-}
+} \ No newline at end of file