Merge branch 'odin-lang:master' into master

author: Courtney Strachan <courtney.strachan@gmail.com> 2025-10-06 02:41:44 +0100
committer: GitHub <noreply@github.com> 2025-10-06 02:41:44 +0100
commit: 6de2d6e8ca687c989bbb7806e5cbe8d791e425bf (patch)
tree: 03a2e0a84c7c1530215f8e3f59a7f643b39b3677 /core/encoding
parent: dbbe96ae5c343f0e803de6ee508207a62571534f (diff)
parent: 0f97382fa3e46da80705c00dfe02f3deb9562e4f (diff)
17 files changed, 236 insertions, 287 deletions
diff --git a/core/encoding/ansi/ansi.odin b/core/encoding/ansi/ansi.odin
deleted file mode 100644
index 5550a1671..000000000
--- a/core/encoding/ansi/ansi.odin
+++ /dev/null
@@ -1,137 +0,0 @@
-package ansi
-
-BEL     :: "\a" // Bell
-BS      :: "\b" // Backspace
-ESC     :: "\e" // Escape
-
-// Fe Escape sequences
-
-CSI     :: ESC + "["  // Control Sequence Introducer
-OSC     :: ESC + "]"  // Operating System Command
-ST      :: ESC + "\\" // String Terminator
-
-// CSI sequences
-
-CUU     :: "A"  // Cursor Up
-CUD     :: "B"  // Cursor Down
-CUF     :: "C"  // Cursor Forward
-CUB     :: "D"  // Cursor Back
-CNL     :: "E"  // Cursor Next Line
-CPL     :: "F"  // Cursor Previous Line
-CHA     :: "G"  // Cursor Horizontal Absolute
-CUP     :: "H"  // Cursor Position
-ED      :: "J"  // Erase in Display
-EL      :: "K"  // Erase in Line
-SU      :: "S"  // Scroll Up
-SD      :: "T"  // Scroll Down
-HVP     :: "f"  // Horizontal Vertical Position
-SGR     :: "m"  // Select Graphic Rendition
-AUX_ON  :: "5i" // AUX Port On
-AUX_OFF :: "4i" // AUX Port Off
-DSR     :: "6n" // Device Status Report
-
-// CSI: private sequences
-
-SCP          :: "s"    // Save Current Cursor Position
-RCP          :: "u"    // Restore Saved Cursor Position
-DECAWM_ON    :: "?7h"  // Auto Wrap Mode (Enabled)
-DECAWM_OFF   :: "?7l"  // Auto Wrap Mode (Disabled)
-DECTCEM_SHOW :: "?25h" // Text Cursor Enable Mode (Visible)
-DECTCEM_HIDE :: "?25l" // Text Cursor Enable Mode (Invisible)
-
-// SGR sequences
-
-RESET                   :: "0"
-BOLD                    :: "1"
-FAINT                   :: "2"
-ITALIC                  :: "3" // Not widely supported.
-UNDERLINE               :: "4"
-BLINK_SLOW              :: "5"
-BLINK_RAPID             :: "6" // Not widely supported.
-INVERT                  :: "7" // Also known as reverse video.
-HIDE                    :: "8" // Not widely supported.
-STRIKE                  :: "9"
-FONT_PRIMARY            :: "10"
-FONT_ALT1               :: "11"
-FONT_ALT2               :: "12"
-FONT_ALT3               :: "13"
-FONT_ALT4               :: "14"
-FONT_ALT5               :: "15"
-FONT_ALT6               :: "16"
-FONT_ALT7               :: "17"
-FONT_ALT8               :: "18"
-FONT_ALT9               :: "19"
-FONT_FRAKTUR            :: "20" // Rarely supported.
-UNDERLINE_DOUBLE        :: "21" // May be interpreted as "disable bold."
-NO_BOLD_FAINT           :: "22"
-NO_ITALIC_BLACKLETTER   :: "23"
-NO_UNDERLINE            :: "24"
-NO_BLINK                :: "25"
-PROPORTIONAL_SPACING    :: "26"
-NO_REVERSE              :: "27"
-NO_HIDE                 :: "28"
-NO_STRIKE               :: "29"
-
-FG_BLACK                :: "30"
-FG_RED                  :: "31"
-FG_GREEN                :: "32"
-FG_YELLOW               :: "33"
-FG_BLUE                 :: "34"
-FG_MAGENTA              :: "35"
-FG_CYAN                 :: "36"
-FG_WHITE                :: "37"
-FG_COLOR                :: "38"
-FG_COLOR_8_BIT          :: "38;5" // Followed by ";n" where n is in 0..=255
-FG_COLOR_24_BIT         :: "38;2" // Followed by ";r;g;b" where r,g,b are in 0..=255
-FG_DEFAULT              :: "39"
-
-BG_BLACK                :: "40"
-BG_RED                  :: "41"
-BG_GREEN                :: "42"
-BG_YELLOW               :: "43"
-BG_BLUE                 :: "44"
-BG_MAGENTA              :: "45"
-BG_CYAN                 :: "46"
-BG_WHITE                :: "47"
-BG_COLOR                :: "48"
-BG_COLOR_8_BIT          :: "48;5" // Followed by ";n" where n is in 0..=255
-BG_COLOR_24_BIT         :: "48;2" // Followed by ";r;g;b" where r,g,b are in 0..=255
-BG_DEFAULT              :: "49"
-
-NO_PROPORTIONAL_SPACING :: "50"
-FRAMED                  :: "51"
-ENCIRCLED               :: "52"
-OVERLINED               :: "53"
-NO_FRAME_ENCIRCLE       :: "54"
-NO_OVERLINE             :: "55"
-
-// SGR: non-standard bright colors
-
-FG_BRIGHT_BLACK         :: "90" // Also known as grey.
-FG_BRIGHT_RED           :: "91"
-FG_BRIGHT_GREEN         :: "92"
-FG_BRIGHT_YELLOW        :: "93"
-FG_BRIGHT_BLUE          :: "94"
-FG_BRIGHT_MAGENTA       :: "95"
-FG_BRIGHT_CYAN          :: "96"
-FG_BRIGHT_WHITE         :: "97"
-
-BG_BRIGHT_BLACK         :: "100" // Also known as grey.
-BG_BRIGHT_RED           :: "101"
-BG_BRIGHT_GREEN         :: "102"
-BG_BRIGHT_YELLOW        :: "103"
-BG_BRIGHT_BLUE          :: "104"
-BG_BRIGHT_MAGENTA       :: "105"
-BG_BRIGHT_CYAN          :: "106"
-BG_BRIGHT_WHITE         :: "107"
-
-// Fp Escape sequences
-
-DECSC :: ESC + "7" // DEC Save Cursor
-DECRC :: ESC + "8" // DEC Restore Cursor
-
-// OSC sequences
-
-WINDOW_TITLE :: "2"  // Followed by ";<text>" ST.
-HYPERLINK    :: "8"  // Followed by ";[params];<URI>" ST. Closed by OSC HYPERLINK ";;" ST.
-CLIPBOARD    :: "52" // Followed by ";c;<Base64-encoded string>" ST.
diff --git a/core/encoding/ansi/doc.odin b/core/encoding/ansi/doc.odin
deleted file mode 100644
index 966e6be00..000000000
--- a/core/encoding/ansi/doc.odin
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
-package ansi implements constant references to many widely-supported ANSI
-escape codes, primarily used in terminal emulators for enhanced graphics, such
-as colors, text styling, and animated displays.
-
-For example, you can print out a line of cyan text like this:
-	fmt.println(ansi.CSI + ansi.FG_CYAN + ansi.SGR + "Hellope!" + ansi.CSI + ansi.RESET + ansi.SGR)
-
-Multiple SGR (Select Graphic Rendition) codes can be joined by semicolons:
-	fmt.println(ansi.CSI + ansi.BOLD + ";" + ansi.FG_BLUE + ansi.SGR + "Hellope!" + ansi.CSI + ansi.RESET + ansi.SGR)
-
-If your terminal supports 24-bit true color mode, you can also do this:
-	fmt.println(ansi.CSI + ansi.FG_COLOR_24_BIT + ";0;255;255" + ansi.SGR + "Hellope!" + ansi.CSI + ansi.RESET + ansi.SGR)
-
-For more information, see:
-- [[ https://en.wikipedia.org/wiki/ANSI_escape_code ]]
-- [[ https://www.vt100.net/docs/vt102-ug/chapter5.html ]]
-- [[ https://invisible-island.net/xterm/ctlseqs/ctlseqs.html ]]
-*/
-package ansi
diff --git a/core/encoding/base32/base32_test.odin b/core/encoding/base32/base32_test.odin
index ea41ae36f..07d5c8080 100644
--- a/core/encoding/base32/base32_test.odin
+++ b/core/encoding/base32/base32_test.odin
@@ -1,3 +1,4 @@
+#+test
 package encoding_base32
 
 import "core:testing"
diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin
index 8eb829ed3..1fb7c34ab 100644
--- a/core/encoding/cbor/cbor.odin
+++ b/core/encoding/cbor/cbor.odin
@@ -385,17 +385,17 @@ to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> i
 	// which we want for the diagnostic format.
 	case f16:
 		buf: [64]byte
-		str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f16), 8*size_of(f16))
+		str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f16), 8*size_of(f16))
 		if str[0] == '+' && str != "+Inf" { str = str[1:] }
 		io.write_string(w, str) or_return
 	case f32:
 		buf: [128]byte
-		str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f32), 8*size_of(f32))
+		str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f32), 8*size_of(f32))
 		if str[0] == '+' && str != "+Inf" { str = str[1:] }
 		io.write_string(w, str) or_return
 	case f64:
 		buf: [256]byte
-		str := strconv.append_float(buf[:], f64(v), 'f', 2*size_of(f64), 8*size_of(f64))
+		str := strconv.write_float(buf[:], f64(v), 'f', 2*size_of(f64), 8*size_of(f64))
 		if str[0] == '+' && str != "+Inf" { str = str[1:] }
 		io.write_string(w, str) or_return
 
diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin
index aca71deb2..b23087c90 100644
--- a/core/encoding/cbor/marshal.odin
+++ b/core/encoding/cbor/marshal.odin
@@ -612,6 +612,42 @@ _marshal_into_encoder :: proc(e: Encoder, v: any, ti: ^runtime.Type_Info) -> (er
 		case:
 			panic("unknown bit_size size")
 		}
+	case runtime.Type_Info_Matrix:
+		count := info.column_count * info.elem_stride
+		err_conv(_encode_u64(e, u64(count), .Array)) or_return
+
+		if impl, ok := _tag_implementations_type[info.elem.id]; ok {
+			for i in 0..<count {
+				data := uintptr(v.data) + uintptr(i*info.elem_size)
+				impl->marshal(e, any{rawptr(data), info.elem.id}) or_return
+			}
+			return
+		}
+
+		elem_ti := runtime.type_info_core(type_info_of(info.elem.id))
+		for i in 0..<count {
+			data := uintptr(v.data) + uintptr(i*info.elem_size)
+			_marshal_into_encoder(e, any{rawptr(data), info.elem.id}, elem_ti) or_return
+		}
+		return
+
+	case runtime.Type_Info_Simd_Vector:
+		err_conv(_encode_u64(e, u64(info.count), .Array)) or_return
+
+		if impl, ok := _tag_implementations_type[info.elem.id]; ok {
+			for i in 0..<info.count {
+				data := uintptr(v.data) + uintptr(i*info.elem_size)
+				impl->marshal(e, any{rawptr(data), info.elem.id}) or_return
+			}
+			return
+		}
+
+		elem_ti := runtime.type_info_core(type_info_of(info.elem.id))
+		for i in 0..<info.count {
+			data := uintptr(v.data) + uintptr(i*info.elem_size)
+			_marshal_into_encoder(e, any{rawptr(data), info.elem.id}, elem_ti) or_return
+		}
+		return
 	}
 
 	return _unsupported(v.id, nil)
diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin
index 17420af46..be07b926a 100644
--- a/core/encoding/cbor/tags.odin
+++ b/core/encoding/cbor/tags.odin
@@ -82,14 +82,16 @@ _tag_implementations_id: map[string]Tag_Implementation
 _tag_implementations_type: map[typeid]Tag_Implementation
 
 // Register a custom tag implementation to be used when marshalling that type and unmarshalling that tag number.
-tag_register_type :: proc(impl: Tag_Implementation, nr: Tag_Number, type: typeid) {
+tag_register_type :: proc "contextless" (impl: Tag_Implementation, nr: Tag_Number, type: typeid) {
+	context = runtime.default_context()
 	_tag_implementations_nr[nr] = impl
 	_tag_implementations_type[type] = impl
 }
 
 // Register a custom tag implementation to be used when marshalling that tag number or marshalling
 // a field with the struct tag `cbor_tag:"nr"`.
-tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string) {
+tag_register_number :: proc "contextless" (impl: Tag_Implementation, nr: Tag_Number, id: string) {
+	context = runtime.default_context()
 	_tag_implementations_nr[nr] = impl
 	_tag_implementations_id[id] = impl
 }
@@ -98,13 +100,13 @@ tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string
 INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, !ODIN_DEFAULT_TO_PANIC_ALLOCATOR && !ODIN_DEFAULT_TO_NIL_ALLOCATOR)
 
 @(private, init, disabled=!INITIALIZE_DEFAULT_TAGS)
-tags_initialize_defaults :: proc() {
+tags_initialize_defaults :: proc "contextless" () {
 	tags_register_defaults()
 }
 
 // Registers tags that have implementations provided by this package.
 // This is done by default and can be controlled with the `CBOR_INITIALIZE_DEFAULT_TAGS` define.
-tags_register_defaults :: proc() {
+tags_register_defaults :: proc "contextless" () {
 	tag_register_number({nil, tag_time_unmarshal,   tag_time_marshal},   TAG_EPOCH_TIME_NR, TAG_EPOCH_TIME_ID)
 	tag_register_number({nil, tag_base64_unmarshal, tag_base64_marshal}, TAG_BASE64_NR,     TAG_BASE64_ID)
 	tag_register_number({nil, tag_cbor_unmarshal,   tag_cbor_marshal},   TAG_CBOR_NR,       TAG_CBOR_ID)
@@ -298,7 +300,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number,
 
 	#partial switch t in ti.variant {
 	case reflect.Type_Info_String:
-
+		assert(t.encoding == .UTF_8)
 		if t.is_cstring {
 			length  := base64.decoded_len(bytes)
 			builder := strings.builder_make(0, length+1)
diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin
index c39255d9d..043b2ec60 100644
--- a/core/encoding/cbor/unmarshal.odin
+++ b/core/encoding/cbor/unmarshal.odin
@@ -29,6 +29,7 @@ an input.
 unmarshal :: proc {
 	unmarshal_from_reader,
 	unmarshal_from_string,
+	unmarshal_from_bytes,
 }
 
 unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator, temp_allocator := context.temp_allocator, loc := #caller_location) -> (err: Unmarshal_Error) {
@@ -51,6 +52,11 @@ unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, all
 	return
 }
 
+// Unmarshals from a slice of bytes, see docs on the proc group `Unmarshal` for more info.
+unmarshal_from_bytes :: proc(bytes: []byte, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator, temp_allocator := context.temp_allocator, loc := #caller_location) -> (err: Unmarshal_Error) {
+	return unmarshal_from_string(string(bytes), ptr, flags, allocator, temp_allocator, loc)
+}
+
 unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator, temp_allocator := context.temp_allocator, loc := #caller_location) -> (err: Unmarshal_Error) {
 	d := d
 
@@ -329,6 +335,8 @@ _unmarshal_value :: proc(d: Decoder, v: any, hdr: Header, allocator := context.a
 _unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add, allocator := context.allocator, loc := #caller_location) -> (err: Unmarshal_Error) {
 	#partial switch t in ti.variant {
 	case reflect.Type_Info_String:
+		assert(t.encoding == .UTF_8)
+
 		bytes := err_conv(_decode_bytes(d, add, allocator=allocator, loc=loc)) or_return
 
 		if t.is_cstring {
@@ -487,7 +495,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header
 		data := mem.alloc_bytes_non_zeroed(t.elem.size * scap, t.elem.align, allocator=allocator, loc=loc) or_return
 		defer if err != nil { mem.free_bytes(data, allocator=allocator, loc=loc) }
 
-		da := mem.Raw_Dynamic_Array{raw_data(data), 0, length, context.allocator }
+		da := mem.Raw_Dynamic_Array{raw_data(data), 0, scap, context.allocator }
 
 		assign_array(d, &da, t.elem, length) or_return
 
@@ -585,6 +593,31 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header
 		if out_of_space { return _unsupported(v, hdr) }
 		return
 
+	case reflect.Type_Info_Matrix:
+		count := t.column_count * t.elem_stride
+		length, _ := err_conv(_decode_len_container(d, add)) or_return
+		if length > count {
+			return _unsupported(v, hdr)
+		}
+
+		da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, allocator }
+
+		out_of_space := assign_array(d, &da, t.elem, length, growable=false) or_return
+		if out_of_space { return _unsupported(v, hdr) }
+		return
+
+	case reflect.Type_Info_Simd_Vector:
+		length, _ := err_conv(_decode_len_container(d, add)) or_return
+		if length > t.count {
+			return _unsupported(v, hdr)
+		}
+
+		da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, allocator }
+
+		out_of_space := assign_array(d, &da, t.elem, length, growable=false) or_return
+		if out_of_space { return _unsupported(v, hdr) }
+		return
+
 	case: return _unsupported(v, hdr)
 	}
 }
diff --git a/core/encoding/csv/doc.odin b/core/encoding/csv/doc.odin
index bfeadafd6..7abe2be49 100644
--- a/core/encoding/csv/doc.odin
+++ b/core/encoding/csv/doc.odin
@@ -63,8 +63,6 @@ Example:
 	read_csv_from_string :: proc(filename: string) {
 		r: csv.Reader
 		r.trim_leading_space  = true
-		r.reuse_record        = true // Without it you have to delete(record)
-		r.reuse_record_buffer = true // Without it you have to each of the fields within it
 		defer csv.reader_destroy(&r)
 
 		csv_data, ok := os.read_entire_file(filename)
diff --git a/core/encoding/csv/reader.odin b/core/encoding/csv/reader.odin
index 5348624d5..577ef219d 100644
--- a/core/encoding/csv/reader.odin
+++ b/core/encoding/csv/reader.odin
@@ -130,7 +130,7 @@ reader_destroy :: proc(r: ^Reader) {
 	for record, row_idx in csv.iterator_next(&r) { ... }
 
 	TIP: If you process the results within the loop and don't need to own the results,
-	you can set the Reader's `reuse_record` and `reuse_record_reuse_record_buffer` to true;
+	you can set the Reader's `reuse_record` and `reuse_record_buffer` to true;
 	you won't need to delete the record or its fields.
 */
 iterator_next :: proc(r: ^Reader) -> (record: []string, idx: int, err: Error, more: bool) {
diff --git a/core/encoding/entity/entity.odin b/core/encoding/entity/entity.odin
index d2f1d46b2..cb8fa8611 100644
--- a/core/encoding/entity/entity.odin
+++ b/core/encoding/entity/entity.odin
@@ -108,7 +108,7 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
 				it couldn't have been part of an XML tag body to be decoded here.
 
 				Keep in mind that we could already *be* inside a CDATA tag.
-				If so, write `>` as a literal and continue.
+				If so, write `<` as a literal and continue.
 			*/
 			if in_data {
 				write_rune(&builder, '<')
@@ -119,11 +119,9 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
 		case ']':
 			// If we're unboxing _and_ decoding CDATA, we'll have to check for the end tag.
 			if in_data {
-				if t.read_offset + len(CDATA_END) < len(t.src) {
-					if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END {
-						in_data = false
-						t.read_offset += len(CDATA_END) - 1
-					}
+				if strings.has_prefix(t.src[t.offset:], CDATA_END) {
+					in_data = false
+					t.read_offset += len(CDATA_END) - 1
 				}
 				continue
 			} else {
@@ -297,40 +295,40 @@ _handle_xml_special :: proc(t: ^Tokenizer, builder: ^strings.Builder, options: X
 	assert(t != nil && t.r == '<')
 	if t.read_offset + len(CDATA_START) >= len(t.src) { return false, .None }
 
-	if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START {
-		t.read_offset += len(CDATA_START) - 1
-
+	s := string(t.src[t.offset:])
+	if strings.has_prefix(s, CDATA_START) {
 		if .Unbox_CDATA in options && .Decode_CDATA in options {
 			// We're unboxing _and_ decoding CDATA
+			t.read_offset += len(CDATA_START) - 1
 			return true, .None
 		}
 
-		// CDATA is passed through.
-		offset := t.offset
-
-		// Scan until end of CDATA.
+		// CDATA is passed through. Scan until end of CDATA.
+		start_offset  := t.offset
+		t.read_offset += len(CDATA_START)
 		for {
-			advance(t) or_return
-			if t.r < 0 { return true, .CDATA_Not_Terminated }
-
-			if t.read_offset + len(CDATA_END) < len(t.src) {
-				if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END {
-					t.read_offset += len(CDATA_END) - 1
+			advance(t)
+			if t.r < 0 {
+				// error(t, offset, "[scan_string] CDATA was not terminated\n")
+				return true, .CDATA_Not_Terminated
+			}
 
-					cdata := string(t.src[offset : t.read_offset])
-	
-					if .Unbox_CDATA in options {
-						cdata = cdata[len(CDATA_START):]
-						cdata = cdata[:len(cdata) - len(CDATA_END)]
-					}
+			// Scan until the end of a CDATA tag.
+			if s = string(t.src[t.read_offset:]); strings.has_prefix(s, CDATA_END) {
+				t.read_offset += len(CDATA_END)
+				cdata := string(t.src[start_offset:t.read_offset])
 
-					write_string(builder, cdata)
-					return false, .None
+				if .Unbox_CDATA in options {
+					cdata = cdata[len(CDATA_START):]
+					cdata = cdata[:len(cdata) - len(CDATA_END)]
 				}
+				write_string(builder, cdata)
+				return false, .None
 			}
 		}
 
-	} else if string(t.src[t.offset:][:len(COMMENT_START)]) == COMMENT_START {
+
+	} else if strings.has_prefix(s, COMMENT_START) {
 		t.read_offset += len(COMMENT_START)
 		// Comment is passed through by default.
 		offset := t.offset
diff --git a/core/encoding/hxa/read.odin b/core/encoding/hxa/read.odin
index a679946f8..6dde16848 100644
--- a/core/encoding/hxa/read.odin
+++ b/core/encoding/hxa/read.odin
@@ -79,7 +79,6 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato
 	read_meta :: proc(r: ^Reader, capacity: u32le, allocator := context.allocator, loc := #caller_location) -> (meta_data: []Meta, err: Read_Error) {
 		meta_data = make([]Meta, int(capacity), allocator=allocator)
 		count := 0
-		defer meta_data = meta_data[:count]
 		for &m in meta_data {
 			m.name = read_name(r) or_return
 
@@ -105,6 +104,7 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato
 
 			count += 1
 		}
+		meta_data = meta_data[:count]
 		return
 	}
 
@@ -112,7 +112,6 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato
 		stack_count := read_value(r, u32le) or_return
 		layer_count := 0
 		layers = make(Layer_Stack, stack_count, allocator=allocator, loc=loc)
-		defer layers = layers[:layer_count]
 		for &layer in layers {
 			layer.name = read_name(r) or_return
 			layer.components = read_value(r, u8) or_return
@@ -136,6 +135,7 @@ read :: proc(data: []byte, filename := "<input>", print_error := false, allocato
 			layer_count += 1
 		}
 
+		layers = layers[:layer_count]
 		return
 	}
 
diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin
index ed6de2f52..2fb507edf 100644
--- a/core/encoding/json/marshal.odin
+++ b/core/encoding/json/marshal.odin
@@ -108,13 +108,13 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
 		if opt.write_uint_as_hex && (opt.spec == .JSON5 || opt.spec == .MJSON) {
 			switch i in a {
 			case u8, u16, u32, u64, u128:
-				s = strconv.append_bits_128(buf[:], u, 16, info.signed, 8*ti.size, "0123456789abcdef", { .Prefix })
+				s = strconv.write_bits_128(buf[:], u, 16, info.signed, 8*ti.size, "0123456789abcdef", { .Prefix })
 
 			case:
-				s = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil)
+				s = strconv.write_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil)
 			}
 		} else {
-			s = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil)
+			s = strconv.write_bits_128(buf[:], u, 10, info.signed, 8*ti.size, "0123456789", nil)
 		}
 
 		io.write_string(w, s) or_return
@@ -292,7 +292,7 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
 						case runtime.Type_Info_Integer:
 							buf: [40]byte
 							u := cast_any_int_to_u128(ka)
-							name = strconv.append_bits_128(buf[:], u, 10, info.signed, 8*kti.size, "0123456789", nil)
+							name = strconv.write_bits_128(buf[:], u, 10, info.signed, 8*kti.size, "0123456789", nil)
 							
 							opt_write_key(w, opt, name) or_return
 						case: return .Unsupported_Type
@@ -359,10 +359,10 @@ marshal_to_writer :: proc(w: io.Writer, v: any, opt: ^Marshal_Options) -> (err:
 			#partial switch info in ti.variant {
 			case runtime.Type_Info_String:
 				switch x in v {
-				case string:
-					return x == ""
-				case cstring:
-					return x == nil || x == ""
+				case string:    return x == ""
+				case cstring:   return x == nil || x == ""
+				case string16:  return x == ""
+				case cstring16: return x == nil || x == ""
 				}
 			case runtime.Type_Info_Any:
 				return v.(any) == nil
diff --git a/core/encoding/json/tokenizer.odin b/core/encoding/json/tokenizer.odin
index e46d879a7..ad928b7d9 100644
--- a/core/encoding/json/tokenizer.odin
+++ b/core/encoding/json/tokenizer.odin
@@ -101,7 +101,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
 		}
 	}
 
-	scan_espace :: proc(t: ^Tokenizer) -> bool {
+	scan_escape :: proc(t: ^Tokenizer) -> bool {
 		switch t.r {
 		case '"', '\'', '\\', '/', 'b', 'n', 'r', 't', 'f':
 			next_rune(t)
@@ -310,7 +310,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
 				break
 			}
 			if r == '\\' {
-				scan_espace(t)
+				scan_escape(t)
 			}
 		}
 
diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin
index 57371e360..0b65adaac 100644
--- a/core/encoding/json/unmarshal.odin
+++ b/core/encoding/json/unmarshal.odin
@@ -117,9 +117,25 @@ assign_int :: proc(val: any, i: $T) -> bool {
 	case uint:    dst = uint   (i)
 	case uintptr: dst = uintptr(i)
 	case:
+		is_bit_set_different_endian_to_platform :: proc(ti: ^runtime.Type_Info) -> bool {
+			if ti == nil {
+				return false
+			}
+			t := runtime.type_info_base(ti)
+			#partial switch info in t.variant {
+			case runtime.Type_Info_Integer:
+				switch info.endianness {
+				case .Platform: return false
+				case .Little:   return ODIN_ENDIAN != .Little
+				case .Big:      return ODIN_ENDIAN != .Big
+				}
+			}
+			return false
+		}
+
 		ti := type_info_of(v.id)
-		if _, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok {
-			do_byte_swap := !reflect.bit_set_is_big_endian(v)
+		if info, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok {
+			do_byte_swap := is_bit_set_different_endian_to_platform(info.underlying)
 			switch ti.size * 8 {
 			case 0: // no-op.
 			case 8:
@@ -390,6 +406,9 @@ unmarshal_expect_token :: proc(p: ^Parser, kind: Token_Kind, loc := #caller_loca
 	return prev
 }
 
+// Struct tags can include not only the name of the JSON key, but also a tag such as `omitempty`.
+// Example: `json:"key_name,omitempty"`
+// This returns the first field as `json_name`, and the rest are returned as `extra`.
 @(private)
 json_name_from_tag_value :: proc(value: string) -> (json_name, extra: string) {
 	json_name = value
@@ -425,12 +444,6 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
 			defer delete(key, p.allocator)
 			
 			unmarshal_expect_token(p, .Colon)						
-			
-			field_test :: #force_inline proc "contextless" (field_used: [^]byte, offset: uintptr) -> bool {
-				prev_set := field_used[offset/8] & byte(offset&7) != 0
-				field_used[offset/8] |= byte(offset&7)
-				return prev_set
-			}
 
 			field_used_bytes := (reflect.size_of_typeid(ti.id)+7)/8
 			field_used := intrinsics.alloca(field_used_bytes + 1, 1) // + 1 to not overflow on size_of 0 types.
@@ -449,7 +462,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
 			
 			if use_field_idx < 0 {
 				for field, field_idx in fields {
-					if key == field.name {
+					tag_value := reflect.struct_tag_get(field.tag, "json")
+					json_name, _ := json_name_from_tag_value(tag_value)
+					if json_name == "" && key == field.name {
 						use_field_idx = field_idx
 						break
 					}
@@ -470,7 +485,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
 						}
 					}
 
-					if field.name == key || (field.tag != "" && reflect.struct_tag_get(field.tag, "json") == key) {
+					tag_value := reflect.struct_tag_get(field.tag, "json")
+					json_name, _ := json_name_from_tag_value(tag_value)
+					if (json_name == "" && field.name == key) || json_name == key {
 						offset = field.offset
 						type = field.type
 						found = true
@@ -492,6 +509,11 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
 			}
 
 			if field_found {
+				field_test :: #force_inline proc "contextless" (field_used: [^]byte, offset: uintptr) -> bool {
+					prev_set := field_used[offset/8] & byte(offset&7) != 0
+					field_used[offset/8] |= byte(offset&7)
+					return prev_set
+				}
 				if field_test(field_used, offset) {
 					return .Multiple_Use_Field
 				}
@@ -548,7 +570,9 @@ unmarshal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unm
 			key_ptr: rawptr
 
 			#partial switch tk in t.key.variant {
-				case runtime.Type_Info_String:			
+				case runtime.Type_Info_String:
+					assert(tk.encoding == .UTF_8)
+
 					key_ptr = rawptr(&key)
 					key_cstr: cstring
 					if reflect.is_cstring(t.key) {
diff --git a/core/encoding/uuid/generation.odin b/core/encoding/uuid/generation.odin
index 7c9d4b80c..b210f6a52 100644
--- a/core/encoding/uuid/generation.odin
+++ b/core/encoding/uuid/generation.odin
@@ -240,7 +240,7 @@ Example:
 	import "core:encoding/uuid"
 	import "core:fmt"
 
-	main :: proc() {
+	generate_v8_hash_bytes_example :: proc() {
 		my_uuid := uuid.generate_v8_hash(uuid.Namespace_DNS, "www.odin-lang.org", .SHA256)
 		my_uuid_string := uuid.to_string(my_uuid, context.temp_allocator)
 		fmt.println(my_uuid_string)
@@ -306,7 +306,7 @@ Example:
 	import "core:encoding/uuid"
 	import "core:fmt"
 
-	main :: proc() {
+	generate_v8_hash_string_example :: proc() {
 		my_uuid := uuid.generate_v8_hash(uuid.Namespace_DNS, "www.odin-lang.org", .SHA256)
 		my_uuid_string := uuid.to_string(my_uuid, context.temp_allocator)
 		fmt.println(my_uuid_string)
diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin
index a2bbaf28e..3ef9a6388 100644
--- a/core/encoding/xml/tokenizer.odin
+++ b/core/encoding/xml/tokenizer.odin
@@ -16,6 +16,7 @@ package encoding_xml
 import "core:fmt"
 import "core:unicode"
 import "core:unicode/utf8"
+import "core:strings"
 
 Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any)
 
@@ -121,7 +122,7 @@ default_error_handler :: proc(pos: Pos, msg: string, args: ..any) {
 error :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) {
 	pos := offset_to_pos(t, offset)
 	if t.err != nil {
-		t.err(pos, msg, ..args)
+		t.err(pos=pos, fmt=msg, args=args)
 	}
 	t.error_count += 1
 }
@@ -268,32 +269,27 @@ scan_comment :: proc(t: ^Tokenizer) -> (comment: string, err: Error) {
 
 // Skip CDATA
 skip_cdata :: proc(t: ^Tokenizer) -> (err: Error) {
-	if t.read_offset + len(CDATA_START) >= len(t.src) {
-		// Can't be the start of a CDATA tag.
+	if s := string(t.src[t.offset:]); !strings.has_prefix(s, CDATA_START) {
 		return .None
 	}
 
-	if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START {
-		t.read_offset += len(CDATA_START)
-		offset := t.offset
+	t.read_offset += len(CDATA_START)
+	offset := t.offset
 
-		cdata_scan: for {
-			advance_rune(t)
-			if t.ch < 0 {
-				error(t, offset, "[scan_string] CDATA was not terminated\n")
-				return .Premature_EOF
-			}
+	cdata_scan: for {
+		advance_rune(t)
+		if t.ch < 0 {
+			error(t, offset, "[scan_string] CDATA was not terminated\n")
+			return .Premature_EOF
+		}
 
-			// Scan until the end of a CDATA tag.
-			if t.read_offset + len(CDATA_END) < len(t.src) {
-				if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END {
-					t.read_offset += len(CDATA_END)
-					break cdata_scan
-				}
-			}
+		// Scan until the end of a CDATA tag.
+		if s := string(t.src[t.read_offset:]); strings.has_prefix(s, CDATA_END) {
+			t.read_offset += len(CDATA_END)
+			break cdata_scan
 		}
 	}
-	return
+	return .None
 }
 
 @(optimization_mode="favor_size")
@@ -393,6 +389,8 @@ scan :: proc(t: ^Tokenizer, multiline_string := false) -> Token {
 		case '/': kind = .Slash
 		case '-': kind = .Dash
 		case ':': kind = .Colon
+		case '[': kind = .Open_Bracket
+		case ']': kind = .Close_Bracket
 
 		case '"', '\'':
 			kind = .Invalid
diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin
index b8c8b13a4..707d2b3f3 100644
--- a/core/encoding/xml/xml_reader.odin
+++ b/core/encoding/xml/xml_reader.odin
@@ -56,7 +56,7 @@ Option_Flag :: enum {
 Option_Flags :: bit_set[Option_Flag; u16]
 
 Document :: struct {
-	elements:      [dynamic]Element,
+	elements:      [dynamic]Element `fmt:"v,element_count"`,
 	element_count: Element_ID,
 
 	prologue: Attributes,
@@ -70,15 +70,15 @@ Document :: struct {
 
 	// If we encounter comments before the root node, and the option to intern comments is given, this is where they'll live.
 	// Otherwise they'll be in the element tree.
-	comments: [dynamic]string,
+	comments: [dynamic]string        `fmt:"-"`,
 
 	// Internal
-	tokenizer: ^Tokenizer,
-	allocator: mem.Allocator,
+	tokenizer: ^Tokenizer            `fmt:"-"`,
+	allocator: mem.Allocator         `fmt:"-"`,
 
 	// Input. Either the original buffer, or a copy if `.Input_May_Be_Modified` isn't specified.
-	input:           []u8,
-	strings_to_free: [dynamic]string,
+	input:           []u8            `fmt:"-"`,
+	strings_to_free: [dynamic]string `fmt:"-"`,
 }
 
 Element :: struct {
@@ -175,7 +175,7 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 		data = bytes.clone(data)
 	}
 
-	t := &Tokenizer{}
+	t := new(Tokenizer)
 	init(t, string(data), path, error_handler)
 
 	doc = new(Document)
@@ -195,7 +195,6 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 
 	loop: for {
 		skip_whitespace(t)
-		// NOTE(Jeroen): This is faster as a switch.
 		switch t.ch {
 		case '<':
 			// Consume peeked `<`
@@ -306,9 +305,17 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 						}
 					}
 
+				case .Open_Bracket:
+					// This could be a CDATA tag part of a tag's body. Unread the `<![`
+					t.offset -= 3
+
+					// Instead of calling `parse_body` here, we could also `continue loop`
+					// and fall through to the `case:` at the bottom of the outer loop.
+					// This makes the intent clearer.
+					parse_body(doc, element, opts) or_return
+
 				case:
-					error(t, t.offset, "Invalid Token after <!. Expected .Ident, got %#v\n", next)
-					return
+					error(t, t.offset, "Unexpected Token after <!: %#v", next)
 				}
 
 			} else if open.kind == .Question {
@@ -341,38 +348,7 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 
 		case:
 			// This should be a tag's body text.
-			body_text        := scan_string(t, t.offset) or_return
-			needs_processing := .Unbox_CDATA          in opts.flags
-			needs_processing |= .Decode_SGML_Entities in opts.flags
-
-			if !needs_processing {
-				append(&doc.elements[element].value, body_text)
-				continue
-			}
-
-			decode_opts := entity.XML_Decode_Options{}
-			if .Keep_Tag_Body_Comments not_in opts.flags {
-				decode_opts += { .Comment_Strip }
-			}
-
-			if .Decode_SGML_Entities not_in opts.flags {
-				decode_opts += { .No_Entity_Decode }
-			}
-
-			if .Unbox_CDATA in opts.flags {
-				decode_opts += { .Unbox_CDATA }
-				if .Decode_SGML_Entities in opts.flags {
-					decode_opts += { .Decode_CDATA }
-				}
-			}
-
-			decoded, decode_err := entity.decode_xml(body_text, decode_opts)
-			if decode_err == .None {
-				append(&doc.elements[element].value, decoded)
-				append(&doc.strings_to_free, decoded)
-			} else {
-				append(&doc.elements[element].value, body_text)
-			}
+			parse_body(doc, element, opts) or_return
 		}
 	}
 
@@ -427,6 +403,7 @@ destroy :: proc(doc: ^Document) {
 	}
 	delete(doc.strings_to_free)
 
+	free(doc.tokenizer)
 	free(doc)
 }
 
@@ -457,8 +434,6 @@ parse_attribute :: proc(doc: ^Document) -> (attr: Attribute, offset: int, err: E
 	t := doc.tokenizer
 
 	key    := expect(t, .Ident)  or_return
-	offset  = t.offset - len(key.text)
-
 	_       = expect(t, .Eq)     or_return
 	value  := expect(t, .String, multiline_string=true) or_return
 
@@ -591,6 +566,47 @@ parse_doctype :: proc(doc: ^Document) -> (err: Error) {
 	return .None
 }
 
+parse_body :: proc(doc: ^Document, element: Element_ID, opts: Options) -> (err: Error) {
+	assert(doc != nil)
+	context.allocator = doc.allocator
+	t := doc.tokenizer
+
+	body_text        := scan_string(t, t.offset) or_return
+	needs_processing := .Unbox_CDATA          in opts.flags
+	needs_processing |= .Decode_SGML_Entities in opts.flags
+
+	if !needs_processing {
+		append(&doc.elements[element].value, body_text)
+		return
+	}
+
+	decode_opts := entity.XML_Decode_Options{}
+	if .Keep_Tag_Body_Comments not_in opts.flags {
+		decode_opts += { .Comment_Strip }
+	}
+
+	if .Decode_SGML_Entities not_in opts.flags {
+		decode_opts += { .No_Entity_Decode }
+	}
+
+	if .Unbox_CDATA in opts.flags {
+		decode_opts += { .Unbox_CDATA }
+		if .Decode_SGML_Entities in opts.flags {
+			decode_opts += { .Decode_CDATA }
+		}
+	}
+
+	decoded, decode_err := entity.decode_xml(body_text, decode_opts)
+	if decode_err == .None {
+		append(&doc.elements[element].value, decoded)
+		append(&doc.strings_to_free, decoded)
+	} else {
+		append(&doc.elements[element].value, body_text)
+	}
+
+	return
+}
+
 Element_ID :: u32
 
 new_element :: proc(doc: ^Document) -> (id: Element_ID) {
@@ -609,4 +625,4 @@ new_element :: proc(doc: ^Document) -> (id: Element_ID) {
 	cur := doc.element_count
 	doc.element_count += 1
 	return cur
-}
+}
+\ No newline at end of file
author	Courtney Strachan <courtney.strachan@gmail.com>	2025-10-06 02:41:44 +0100
committer	GitHub <noreply@github.com>	2025-10-06 02:41:44 +0100
commit	6de2d6e8ca687c989bbb7806e5cbe8d791e425bf (patch)
tree	03a2e0a84c7c1530215f8e3f59a7f643b39b3677 /core/encoding
parent	dbbe96ae5c343f0e803de6ee508207a62571534f (diff)
parent	0f97382fa3e46da80705c00dfe02f3deb9562e4f (diff)