Fix #2684

author: Jeroen van Rijn <Kelimion@users.noreply.github.com> 2023-07-28 15:53:39 +0200
committer: Jeroen van Rijn <Kelimion@users.noreply.github.com> 2023-07-28 15:53:39 +0200
commit: 683ee75703f9bde9ecf34ae3ec6ab2c3b68b52b2 (patch)
tree: 4ba4cf55ff7d99b0e5910df2f4e0c6ef4fb4088d
parent: 5ac7fe453f5fbf0995c24f0c1c12ed439ae3aee9 (diff)
7 files changed, 98 insertions, 73 deletions
diff --git a/core/encoding/xml/debug_print.odin b/core/encoding/xml/debug_print.odin
index e9a1cb160..0736e8893 100644
--- a/core/encoding/xml/debug_print.odin
+++ b/core/encoding/xml/debug_print.odin
@@ -65,19 +65,21 @@ print_element :: proc(writer: io.Writer, doc: ^Document, element_id: Element_ID,
 
 	if element.kind == .Element {
 		wprintf(writer, "<%v>\n", element.ident)
-		if len(element.value) > 0 {
-			tab(writer, indent + 1)
-			wprintf(writer, "[Value] %v\n", element.value)
+
+		for value in element.value {
+			switch v in value {
+			case string:
+				tab(writer, indent + 1)
+				wprintf(writer, "[Value] %v\n", v)
+			case Element_ID:
+				print_element(writer, doc, v, indent + 1)
+			}
 		}
 
 		for attr in element.attribs {
 			tab(writer, indent + 1)
 			wprintf(writer, "[Attr] %v: %v\n", attr.key, attr.val)
 		}
-
-		for child in element.children {
-			print_element(writer, doc, child, indent + 1)
-		}
 	} else if element.kind == .Comment {
 		wprintf(writer, "[COMMENT] %v\n", element.value)
 	}
diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin
index 887b40764..aebb8d0ea 100644
--- a/core/encoding/xml/example/xml_example.odin
+++ b/core/encoding/xml/example/xml_example.odin
@@ -72,10 +72,10 @@ example :: proc() {
 	 	return
 	}
 
-	printf("Found `<charlist>` with %v children, %v elements total\n", len(docs[0].elements[charlist].children), docs[0].element_count)
+	printf("Found `<charlist>` with %v children, %v elements total\n", len(docs[0].elements[charlist].value), docs[0].element_count)
 
-	crc32 := doc_hash(docs[0])
-	printf("[%v] CRC32: 0x%08x\n", "🎉" if crc32 == 0xcaa042b9 else "🤬", crc32)
+	crc32 := doc_hash(docs[0], false)
+	printf("[%v] CRC32: 0x%08x\n", "🎉" if crc32 == 0x420dbac5 else "🤬", crc32)
 
 	for round in 0..<N {
 		defer xml.destroy(docs[round])
diff --git a/core/encoding/xml/helpers.odin b/core/encoding/xml/helpers.odin
index 48f058334..200c5c1de 100644
--- a/core/encoding/xml/helpers.odin
+++ b/core/encoding/xml/helpers.odin
@@ -13,20 +13,25 @@ find_child_by_ident :: proc(doc: ^Document, parent_id: Element_ID, ident: string
 	tag := doc.elements[parent_id]
 
 	count := 0
-	for child_id in tag.children {
-		child := doc.elements[child_id]
-		/*
-			Skip commments. They have no name.
-		*/
-		if child.kind  != .Element                { continue }
+	for v in tag.value {
+		switch child_id in v {
+		case string: continue
+		case Element_ID:
+			child := doc.elements[child_id]
+			/*
+				Skip commments. They have no name.
+			*/
+			if child.kind  != .Element                { continue }
 
-		/*
-			If the ident matches and it's the nth such child, return it.
-		*/
-		if child.ident == ident {
-			if count == nth                       { return child_id, true }
-			count += 1
+			/*
+				If the ident matches and it's the nth such child, return it.
+			*/
+			if child.ident == ident {
+				if count == nth                       { return child_id, true }
+				count += 1
+			}
 		}
+
 	}
 	return 0, false
 }
diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin
index f5523c299..f4f8a4b05 100644
--- a/core/encoding/xml/xml_reader.odin
+++ b/core/encoding/xml/xml_reader.odin
@@ -125,16 +125,19 @@ Document :: struct {
 
 Element :: struct {
 	ident:   string,
-	value:   string,
+	value:   [dynamic]Value,
 	attribs: Attributes,
 
 	kind: enum {
 		Element = 0,
 		Comment,
 	},
-
 	parent:   Element_ID,
-	children: [dynamic]Element_ID,
+}
+
+Value :: union {
+	string,
+	Element_ID,
 }
 
 Attribute :: struct {
@@ -247,9 +250,6 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 
 	err =            .Unexpected_Token
 	element, parent: Element_ID
-
-	tag_is_open   := false
-	first_element := true
 	open: Token
 
 	/*
@@ -275,16 +275,10 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 					e.g. <odin - Start of new element.
 				*/
 				element = new_element(doc)
-				tag_is_open = true
-
-				if first_element {
-					/*
-						First element.
-					*/
-					parent   = element
-					first_element = false
+				if element == 0 { // First Element
+					parent = element
 				} else {
-					append(&doc.elements[parent].children, element)
+					append(&doc.elements[parent].value, element)
 				}
 
 				doc.elements[element].parent = parent
@@ -324,7 +318,6 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 					expect(t, .Gt) or_return
 					parent      = doc.elements[element].parent
 					element     = parent
-					tag_is_open = false
 
 				case:
 					error(t, t.offset, "Expected close tag, got: %#v\n", end_token)
@@ -344,7 +337,6 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 				}
 				parent      = doc.elements[element].parent
 				element     = parent
-				tag_is_open = false
 
 			} else if open.kind == .Exclaim {
 				/*
@@ -392,8 +384,8 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 							el := new_element(doc)
 							doc.elements[el].parent = element
 							doc.elements[el].kind   = .Comment
-							doc.elements[el].value  = comment
-							append(&doc.elements[element].children, el)
+							append(&doc.elements[el].value, comment)
+							append(&doc.elements[element].value, el)
 						}
 					}
 
@@ -436,9 +428,6 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 			/*
 				End of file.
 			*/
-			if tag_is_open {
-				return doc, .Premature_EOF
-			}
 			break loop
 
 		case:
@@ -450,7 +439,7 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 			needs_processing |= .Decode_SGML_Entities in opts.flags
 
 			if !needs_processing {
-				doc.elements[element].value = body_text
+				append(&doc.elements[element].value, body_text)
 				continue
 			}
 
@@ -472,10 +461,10 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 
 			decoded, decode_err := entity.decode_xml(body_text, decode_opts)
 			if decode_err == .None {
-				doc.elements[element].value = decoded
+				append(&doc.elements[element].value, decoded)
 				append(&doc.strings_to_free, decoded)
 			} else {
-				doc.elements[element].value = body_text
+				append(&doc.elements[element].value, body_text)
 			}
 		}
 	}
@@ -518,7 +507,7 @@ destroy :: proc(doc: ^Document) {
 
 	for el in doc.elements {
 		delete(el.attribs)
-		delete(el.children)
+		delete(el.value)
 	}
 	delete(doc.elements)
 
@@ -710,6 +699,5 @@ new_element :: proc(doc: ^Document) -> (id: Element_ID) {
 
 	cur := doc.element_count
 	doc.element_count += 1
-
 	return cur
 }
 \ No newline at end of file
diff --git a/core/text/i18n/i18n.odin b/core/text/i18n/i18n.odin
index 8513f30c8..151f9e129 100644
--- a/core/text/i18n/i18n.odin
+++ b/core/text/i18n/i18n.odin
@@ -71,6 +71,8 @@ Error :: enum {
 	TS_File_Expected_Source,
 	TS_File_Expected_Translation,
 	TS_File_Expected_NumerusForm,
+	Bad_Str,
+	Bad_Id,
 
 }
 
diff --git a/core/text/i18n/qt_linguist.odin b/core/text/i18n/qt_linguist.odin
index e7c1f9974..f4d2d78d6 100644
--- a/core/text/i18n/qt_linguist.odin
+++ b/core/text/i18n/qt_linguist.odin
@@ -30,10 +30,26 @@ TS_XML_Options := xml.Options{
 parse_qt_linguist_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
 	context.allocator = allocator
 
+	get_str :: proc(val: xml.Value) -> (str: string, err: Error) {
+		v, ok := val.(string)
+		if ok {
+			return v, .None
+		}
+		return "", .Bad_Str
+	}
+
+	get_id :: proc(val: xml.Value) -> (str: xml.Element_ID, err: Error) {
+		v, ok := val.(xml.Element_ID)
+		if ok {
+			return v, .None
+		}
+		return 0, .Bad_Id
+	}
+
 	ts, xml_err := xml.parse(data, TS_XML_Options)
 	defer xml.destroy(ts)
 
-	if xml_err != .None || ts.element_count < 1 || ts.elements[0].ident != "TS" || len(ts.elements[0].children) == 0 {
+	if xml_err != .None || ts.element_count < 1 || ts.elements[0].ident != "TS" || len(ts.elements[0].value) == 0 {
 		return nil, .TS_File_Parse_Error
 	}
 
@@ -46,10 +62,12 @@ parse_qt_linguist_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTI
 
 	section: ^Section
 
-	for child_id in ts.elements[0].children {
+	for value in ts.elements[0].value {
+		child_id := get_id(value) or_return
+
 		// These should be <context>s.
-		child := ts.elements[child_id]
-		if child.ident != "context" {
+
+		if ts.elements[child_id].ident != "context" {
 			return translation, .TS_File_Expected_Context
 		}
 
@@ -61,7 +79,8 @@ parse_qt_linguist_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTI
 
 		section_name, _ := strings.intern_get(&translation.intern, "")
 		if !options.merge_sections {
-			section_name, _ = strings.intern_get(&translation.intern, ts.elements[section_name_id].value)
+			value_text := get_str(ts.elements[section_name_id].value[0]) or_return
+			section_name, _ = strings.intern_get(&translation.intern, value_text)
 		}
 
 		if section_name not_in translation.k_v {
@@ -92,8 +111,14 @@ parse_qt_linguist_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTI
 				return translation, .TS_File_Expected_Translation
 			}
 
-			source, _ := strings.intern_get(&translation.intern, ts.elements[source_id].value)
-			xlat,   _ := strings.intern_get(&translation.intern, ts.elements[translation_id].value)
+			source    := get_str(ts.elements[source_id].value[0]) or_return
+			source, _  = strings.intern_get(&translation.intern, source)
+
+			xlat := ""
+			if !has_plurals {
+				xlat    = get_str(ts.elements[translation_id].value[0]) or_return
+				xlat, _ = strings.intern_get(&translation.intern, xlat)
+			}
 
 			if source in section {
 				return translation, .Duplicate_Key
@@ -124,7 +149,8 @@ parse_qt_linguist_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTI
 					if !numerus_found {
 						break
 					}
-					numerus, _ := strings.intern_get(&translation.intern, ts.elements[numerus_id].value)
+					numerus := get_str(ts.elements[numerus_id].value[0]) or_return
+					numerus, _ = strings.intern_get(&translation.intern, numerus)
 					section[source][num_plurals] = numerus
 
 					num_plurals += 1
diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin
index 3cfc75a65..a05db377a 100644
--- a/tests/core/encoding/xml/test_core_xml.odin
+++ b/tests/core/encoding/xml/test_core_xml.odin
@@ -47,7 +47,7 @@ TESTS :: []TEST{
 			},
 			expected_doctype = "恥ずべきフクロウ",
 		},
-		crc32     = 0x30d82264,
+		crc32     = 0xe9b62f03,
 	},
 
 	{
@@ -62,7 +62,7 @@ TESTS :: []TEST{
 			},
 			expected_doctype = "恥ずべきフクロウ",
 		},
-		crc32     = 0xad31d8e8,
+		crc32     = 0x9c2643ed,
 	},
 
 	{
@@ -77,7 +77,7 @@ TESTS :: []TEST{
 			},
 			expected_doctype = "TS",
 		},
-		crc32     = 0x7bce2630,
+		crc32     = 0x859b7443,
 	},
 
 	{
@@ -92,7 +92,7 @@ TESTS :: []TEST{
 			},
 			expected_doctype = "xliff",
 		},
-		crc32     = 0x43f19d61,
+		crc32     = 0x3deaf329,
 	},
 
 	{
@@ -107,7 +107,7 @@ TESTS :: []TEST{
 			},
 			expected_doctype = "xliff",
 		},
-		crc32     = 0x961e7635,
+		crc32     = 0x0c55e287,
 	},
 
 	{
@@ -118,7 +118,7 @@ TESTS :: []TEST{
 			},
 			expected_doctype = "html",
 		},
-		crc32     = 0x573c1033,
+		crc32     = 0x05373317,
 	},
 
 	{
@@ -129,7 +129,7 @@ TESTS :: []TEST{
 			},
 			expected_doctype = "html",
 		},
-		crc32     = 0x82588917,
+		crc32     = 0x3b6d4a90,
 	},
 
 	{
@@ -140,7 +140,7 @@ TESTS :: []TEST{
 			},
 			expected_doctype = "html",
 		},
-		crc32     = 0x5e74d8a6,
+		crc32     = 0x5be2ffdc,
 	},
 
 	/*
@@ -170,7 +170,7 @@ TESTS :: []TEST{
 			expected_doctype = "",
 		},
 		err       = .None,
-		crc32     = 0xcaa042b9,
+		crc32     = 0x420dbac5,
 	},
 }
 
@@ -260,19 +260,21 @@ doc_to_string :: proc(doc: ^xml.Document) -> (result: string) {
 
 		if element.kind == .Element {
 			wprintf(writer, "<%v>\n", element.ident)
-			if len(element.value) > 0 {
-				tab(writer, indent + 1)
-				wprintf(writer, "[Value] %v\n", element.value)
+
+			for value in element.value {
+				switch v in value {
+				case string:
+					tab(writer, indent + 1)
+					wprintf(writer, "[Value] %v\n", v)
+				case xml.Element_ID:
+					print_element(writer, doc, v, indent + 1)
+				}
 			}
 
 			for attr in element.attribs {
 				tab(writer, indent + 1)
 				wprintf(writer, "[Attr] %v: %v\n", attr.key, attr.val)
 			}
-
-			for child in element.children {
-				print_element(writer, doc, child, indent + 1)
-			}
 		} else if element.kind == .Comment {
 			wprintf(writer, "[COMMENT] %v\n", element.value)
 		}
author	Jeroen van Rijn <Kelimion@users.noreply.github.com>	2023-07-28 15:53:39 +0200
committer	Jeroen van Rijn <Kelimion@users.noreply.github.com>	2023-07-28 15:53:39 +0200
commit	683ee75703f9bde9ecf34ae3ec6ab2c3b68b52b2 (patch)
tree	4ba4cf55ff7d99b0e5910df2f4e0c6ef4fb4088d
parent	5ac7fe453f5fbf0995c24f0c1c12ed439ae3aee9 (diff)