aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2021-12-05 02:17:48 +0100
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2021-12-05 02:52:23 +0100
commitd65d6edb0e1887871c4de6a4e8a1630927153eae (patch)
tree962864b2793913332adfe390521eff747f4c4863
parent3d72e80ccf0f382f03a1c9407c4728862c5bca91 (diff)
[xml] Improve XML tests, test `core:encoding/entity`.
-rw-r--r--core/encoding/entity/entity.odin7
-rw-r--r--core/encoding/entity/example/entity_example.odin1
-rw-r--r--core/encoding/entity/example/test.html2
-rw-r--r--tests/core/assets/XML/entities.html29
-rw-r--r--tests/core/assets/XML/nl_NL-xliff-1.2.xliff (renamed from tests/core/assets/XML/nl_NL-xliff-1.0.xliff)0
-rw-r--r--tests/core/assets/XML/utf8.xml2
-rw-r--r--tests/core/encoding/xml/test_core_xml.odin382
7 files changed, 269 insertions, 154 deletions
diff --git a/core/encoding/entity/entity.odin b/core/encoding/entity/entity.odin
index 8742446e6..db1a5ad0b 100644
--- a/core/encoding/entity/entity.odin
+++ b/core/encoding/entity/entity.odin
@@ -115,7 +115,14 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
We don't need to check if we need to write a `<`, because if it isn't CDATA or a comment,
it couldn't have been part of an XML tag body to be decoded here.
+
+ Keep in mind that we could already *be* inside a CDATA tag.
+ If so, write `>` as a literal and continue.
*/
+ if in_data {
+ write_rune(&builder, '<')
+ continue
+ }
in_data = _handle_xml_special(&t, &builder, options) or_return
case ']':
diff --git a/core/encoding/entity/example/entity_example.odin b/core/encoding/entity/example/entity_example.odin
index 161a44827..882203f48 100644
--- a/core/encoding/entity/example/entity_example.odin
+++ b/core/encoding/entity/example/entity_example.odin
@@ -50,6 +50,7 @@ _main :: proc() {
using fmt
options := xml.Options{ flags = { .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities }}
+
doc, _ := xml.parse(#load("test.html"), options)
defer xml.destroy(doc)
diff --git a/core/encoding/entity/example/test.html b/core/encoding/entity/example/test.html
index 62a0bb35a..ebbc6470c 100644
--- a/core/encoding/entity/example/test.html
+++ b/core/encoding/entity/example/test.html
@@ -22,7 +22,7 @@
</div>
<!-- EXPECTED: Foozle]! © BOX ® /BOX42&;1234& -->
<div>
- &verbar; &vert; &VerticalLine; &fjlig; &grave; &bsol; &reg; &rhov; &CounterClockwiseContourIntegral;
+ &verbar; &vert; &VerticalLine; &fjlig; &grave; &bsol; &reg; &rhov; &CounterClockwiseContourIntegral; &bsemi;
</div>
</body>
</html> \ No newline at end of file
diff --git a/tests/core/assets/XML/entities.html b/tests/core/assets/XML/entities.html
new file mode 100644
index 000000000..05a6b107e
--- /dev/null
+++ b/tests/core/assets/XML/entities.html
@@ -0,0 +1,29 @@
+<html>
+ <head>
+ <title>Entity Reference Test</title>
+ <style>
+ body {
+ background: #000; color: #eee;
+ width: 40%;
+ margin-left: auto;
+ margin-right: auto;
+ font-size: 14pt;
+ }
+ </style>
+ </head>
+ <body>
+ <h1>Entity Reference Test</h1>
+ <div id="test_cdata_in_comment" foo="">
+ Foozle]!&#32;&copy;&#x20;<!-- <![CDATA[&#32;&reg;&#x20;]]> -->42&;1234&
+ </div>
+ <!-- foo attribute should be empty but present -->
+ <!-- EXPECTED: Foozle]! © 42&;1234& -->
+ <div id="test_cdata_unwrap_and_passthrough">
+ Foozle]!&#32;&copy;&#x20;<![CDATA[BOX&#32;&reg;&#x20;/BOX]]>42&;1234&
+ </div>
+ <!-- EXPECTED: Foozle]! © BOX ® /BOX42&;1234& -->
+ <div>
+ &verbar; &vert; &VerticalLine; &fjlig; &grave; &bsol; &reg; &rhov; &CounterClockwiseContourIntegral; &bsemi;
+ </div>
+ </body>
+</html> \ No newline at end of file
diff --git a/tests/core/assets/XML/nl_NL-xliff-1.0.xliff b/tests/core/assets/XML/nl_NL-xliff-1.2.xliff
index 7a1abcd66..7a1abcd66 100644
--- a/tests/core/assets/XML/nl_NL-xliff-1.0.xliff
+++ b/tests/core/assets/XML/nl_NL-xliff-1.2.xliff
diff --git a/tests/core/assets/XML/utf8.xml b/tests/core/assets/XML/utf8.xml
index c9ed3bf69..6e1a897ea 100644
--- a/tests/core/assets/XML/utf8.xml
+++ b/tests/core/assets/XML/utf8.xml
@@ -4,5 +4,5 @@
<부끄러운:barzle>
<name foo:bar="birmese">ရှက်စရာ ဇီးကွက်</name>
<nickname>Owl of Shame</nickname>
- <data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] < ]]> Nonsense.</data>
+ <data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] <$]]> Nonsense.</data>
</부끄러운:barzle> \ No newline at end of file
diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin
index c2e0aa172..5cb59e001 100644
--- a/tests/core/encoding/xml/test_core_xml.odin
+++ b/tests/core/encoding/xml/test_core_xml.odin
@@ -3,16 +3,16 @@ package test_core_xml
import "core:encoding/xml"
import "core:testing"
import "core:mem"
+import "core:strings"
+import "core:io"
import "core:fmt"
+import "core:hash"
Silent :: proc(pos: xml.Pos, fmt: string, args: ..any) {
// Custom (silent) error handler.
}
-OPTIONS :: xml.Options{
- flags = {
- .Ignore_Unsupported, .Intern_Comments,
- },
+OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, .Intern_Comments, },
expected_doctype = "",
}
@@ -22,76 +22,153 @@ TEST_fail := 0
TEST :: struct {
filename: string,
options: xml.Options,
- expected: struct {
- error: xml.Error,
- xml_version: string,
- xml_encoding: string,
- doctype: string,
- },
+ err: xml.Error,
+ crc32: u32,
}
+/*
+ Relative to ODIN_ROOT
+*/
+TEST_FILE_PATH_PREFIX :: "tests/core/assets/XML"
+
TESTS :: []TEST{
/*
First we test that certain files parse without error.
*/
+
+ {
+ /*
+ <?xml version="1.0" encoding="utf-8"?>
+ <!DOCTYPE 恥ずべきフクロウ>
+ <恥ずべきフクロウ 올빼미_id="Foozle&#32;<![CDATA[<greeting>Hello, world!"</greeting>]]>Barzle">
+ <부끄러운:barzle>
+ <name foo:bar="birmese">ရှက်စရာ ဇီးကွက်</name>
+ <nickname>Owl of Shame</nickname>
+ <data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] <$]]> Nonsense.</data>
+ </부끄러운:barzle>
+ */
+
+ /*
+ Tests UTF-8 idents and values.
+ Test namespaced ident.
+ Tests that nested partial CDATA start doesn't trip up parser.
+ */
+ filename = "utf8.xml",
+ options = {
+ flags = {
+ .Ignore_Unsupported, .Intern_Comments,
+ },
+ expected_doctype = "恥ずべきフクロウ",
+ },
+ crc32 = 0x30d82264,
+ },
+
+ {
+ /*
+ Same as above.
+ Unbox CDATA in data tag.
+ */
+ filename = "utf8.xml",
+ options = {
+ flags = {
+ .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA,
+ },
+ expected_doctype = "恥ずべきフクロウ",
+ },
+ crc32 = 0x6d38ac58,
+ },
+
+ {
+ /*
+ Simple Qt TS translation file.
+ `core:i18n` requires it to be parsed properly.
+ */
+ filename = "nl_NL-qt-ts.ts",
+ options = {
+ flags = {
+ .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities,
+ },
+ expected_doctype = "TS",
+ },
+ crc32 = 0x7bce2630,
+ },
+
{
- filename = "assets/XML/utf8.xml",
- options = OPTIONS,
- expected = {
- error = .None,
- xml_version = "1.0",
- xml_encoding = "utf-8",
- doctype = "恥ずべきフクロウ",
+ /*
+ Simple XLiff 1.2 file.
+ `core:i18n` requires it to be parsed properly.
+ */
+ filename = "nl_NL-xliff-1.2.xliff",
+ options = {
+ flags = {
+ .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities,
+ },
+ expected_doctype = "xliff",
},
+ crc32 = 0x43f19d61,
},
+
+ {
+ /*
+ Simple XLiff 2.0 file.
+ `core:i18n` requires it to be parsed properly.
+ */
+ filename = "nl_NL-xliff-2.0.xliff",
+ options = {
+ flags = {
+ .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities,
+ },
+ expected_doctype = "xliff",
+ },
+ crc32 = 0x961e7635,
+ },
+
{
- filename = "assets/XML/nl_NL-qt-ts.ts",
- options = OPTIONS,
- expected = {
- error = .None,
- xml_version = "1.0",
- xml_encoding = "utf-8",
- doctype = "TS",
+ filename = "entities.html",
+ options = {
+ flags = {
+ .Ignore_Unsupported, .Intern_Comments,
+ },
+ expected_doctype = "html",
},
+ crc32 = 0xdb4a1e79,
},
+
{
- filename = "assets/XML/nl_NL-xliff-1.0.xliff",
- options = OPTIONS,
- expected = {
- error = .None,
- xml_version = "1.0",
- xml_encoding = "UTF-8",
- doctype = "",
+ filename = "entities.html",
+ options = {
+ flags = {
+ .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA,
+ },
+ expected_doctype = "html",
},
+ crc32 = 0x82588917,
},
+
{
- filename = "assets/XML/nl_NL-xliff-2.0.xliff",
- options = OPTIONS,
- expected = {
- error = .None,
- xml_version = "1.0",
- xml_encoding = "utf-8",
- doctype = "",
+ filename = "entities.html",
+ options = {
+ flags = {
+ .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities,
+ },
+ expected_doctype = "html",
},
+ crc32 = 0x5e74d8a6,
},
/*
Then we test that certain errors are returned as expected.
*/
{
- filename = "assets/XML/utf8.xml",
+ filename = "utf8.xml",
options = {
flags = {
.Ignore_Unsupported, .Intern_Comments,
},
expected_doctype = "Odin",
},
- expected = {
- error = .Invalid_DocType,
- xml_version = "1.0",
- xml_encoding = "utf-8",
- doctype = "恥ずべきフクロウ",
- },
+ err = .Invalid_DocType,
+ crc32 = 0x49b83d0a,
},
}
@@ -115,150 +192,151 @@ when ODIN_TEST {
}
}
-main :: proc() {
- t := testing.T{}
-
- track: mem.Tracking_Allocator
- mem.tracking_allocator_init(&track, context.allocator)
- context.allocator = mem.tracking_allocator(&track)
+test_file_path :: proc(filename: string) -> (path: string) {
- run_tests(&t)
+ path = fmt.tprintf("%v%v/%v", ODIN_ROOT, TEST_FILE_PATH_PREFIX, filename)
+ temp := transmute([]u8)path
- if len(track.allocation_map) > 0 {
- for _, v in track.allocation_map {
- err_msg := fmt.tprintf("%v Leaked %v bytes.", v.location, v.size)
- expect(&t, false, err_msg)
+ for r, i in path {
+ if r == '\\' {
+ temp[i] = '/'
}
- }
-
- fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
+ }
+ return path
}
-@test
-run_tests :: proc(t: ^testing.T) {
- using fmt
-
- count := 0
+doc_to_string :: proc(doc: ^xml.Document) -> (result: string) {
+ /*
+ Effectively a clone of the debug printer in the xml package.
+ We duplicate it here so that the way it prints an XML document to a string is stable.
- for test in TESTS {
- printf("Trying to parse %v\n\n", test.filename)
+ This way we can hash the output. If it changes, it means that the document or how it was parsed changed,
+ not how it was printed. One less source of variability.
+ */
+ print :: proc(writer: io.Writer, doc: ^xml.Document) -> (written: int, err: io.Error) {
+ if doc == nil { return }
+ using fmt
- doc, err := xml.parse(test.filename, test.options, Silent)
- defer xml.destroy(doc)
+ written += wprintf(writer, "[XML Prolog]\n")
- err_msg := tprintf("Expected return value %v, got %v", test.expected.error, err)
- expect(t, err == test.expected.error, err_msg)
+ for attr in doc.prolog {
+ written += wprintf(writer, "\t%v: %v\n", attr.key, attr.val)
+ }
- if len(test.expected.xml_version) > 0 {
- xml_version := ""
- for attr in doc.prolog {
- if attr.key == "version" {
- xml_version = attr.val
- }
- }
+ written += wprintf(writer, "[Encoding] %v\n", doc.encoding)
- err_msg = tprintf("Expected XML version %v, got %v", test.expected.xml_version, xml_version)
- expect(t, xml_version == test.expected.xml_version, err_msg)
- }
+ if len(doc.doctype.ident) > 0 {
+ written += wprintf(writer, "[DOCTYPE] %v\n", doc.doctype.ident)
- if len(test.expected.xml_encoding) > 0 {
- xml_encoding := ""
- for attr in doc.prolog {
- if attr.key == "encoding" {
- xml_encoding = attr.val
- }
+ if len(doc.doctype.rest) > 0 {
+ wprintf(writer, "\t%v\n", doc.doctype.rest)
}
-
- err_msg = tprintf("Expected XML encoding %v, got %v", test.expected.xml_encoding, xml_encoding)
- expect(t, xml_encoding == test.expected.xml_encoding, err_msg)
}
- err_msg = tprintf("Expected DOCTYPE %v, got %v", test.expected.doctype, doc.doctype.ident)
- expect(t, doc.doctype.ident == test.expected.doctype, err_msg)
-
- /*
- File-specific tests.
- */
- switch count {
- case 0:
- expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
- attr := doc.root.attribs[0]
+ for comment in doc.comments {
+ written += wprintf(writer, "[Pre-root comment] %v\n", comment)
+ }
- attr_key_expected := "올빼미_id"
- attr_val_expected := "Foozle&#32;<![CDATA[<greeting>Hello, world!\"</greeting>]]>Barzle"
+ if doc.root != nil {
+ wprintln(writer, " --- ")
+ print_element(writer, doc.root)
+ wprintln(writer, " --- ")
+ }
- attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
- expect(t, attr.key == attr_key_expected, attr_err)
+ return written, .None
+ }
- attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
- expect(t, attr.val == attr_val_expected, attr_err)
+ print_element :: proc(writer: io.Writer, element: ^xml.Element, indent := 0) -> (written: int, err: io.Error) {
+ if element == nil { return }
+ using fmt
- expect(t, len(doc.root.children) > 0, "Expected the root tag to have children.")
- child := doc.root.children[0]
+ tab :: proc(writer: io.Writer, indent: int) {
+ for _ in 0..=indent {
+ wprintf(writer, "\t")
+ }
+ }
- first_child_ident := "부끄러운:barzle"
- attr_err = tprintf("Expected first child tag's ident to be %v, got %v", first_child_ident, child.ident)
- expect(t, child.ident == first_child_ident, attr_err)
+ tab(writer, indent)
- case 2:
- expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
+ if element.kind == .Element {
+ wprintf(writer, "<%v>\n", element.ident)
+ if len(element.value) > 0 {
+ tab(writer, indent + 1)
+ wprintf(writer, "[Value] %v\n", element.value)
+ }
- {
- attr := doc.root.attribs[0]
+ for attr in element.attribs {
+ tab(writer, indent + 1)
+ wprintf(writer, "[Attr] %v: %v\n", attr.key, attr.val)
+ }
- attr_key_expected := "version"
- attr_val_expected := "1.2"
+ for child in element.children {
+ print_element(writer, child, indent + 1)
+ }
+ } else if element.kind == .Comment {
+ wprintf(writer, "[COMMENT] %v\n", element.value)
+ }
- attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
- expect(t, attr.key == attr_key_expected, attr_err)
+ return written, .None
+ }
- attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
- expect(t, attr.val == attr_val_expected, attr_err)
- }
+ buf: strings.Builder
+ defer strings.destroy_builder(&buf)
- {
- attr := doc.root.attribs[1]
+ print(strings.to_writer(&buf), doc)
+ return strings.clone(strings.to_string(buf))
+}
- attr_key_expected := "xmlns"
- attr_val_expected := "urn:oasis:names:tc:xliff:document:1.2"
+@test
+run_tests :: proc(t: ^testing.T) {
+ using fmt
- attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
- expect(t, attr.key == attr_key_expected, attr_err)
+ for test in TESTS {
+ path := test_file_path(test.filename)
+ printf("\nTrying to parse %v\n\n", path)
- attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
- expect(t, attr.val == attr_val_expected, attr_err)
- }
+ doc, err := xml.parse(path, test.options, Silent)
+ defer xml.destroy(doc)
- case 3:
- expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
+ tree_string := doc_to_string(doc)
+ tree_bytes := transmute([]u8)tree_string
+ defer delete(tree_bytes)
- {
- attr := doc.root.attribs[0]
+ crc32 := hash.crc32(tree_bytes)
- attr_key_expected := "xmlns"
- attr_val_expected := "urn:oasis:names:tc:xliff:document:2.0"
+ failed := err != test.err
+ err_msg := tprintf("Expected return value %v, got %v", test.err, err)
+ expect(t, err == test.err, err_msg)
- attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
- expect(t, attr.key == attr_key_expected, attr_err)
+ failed |= crc32 != test.crc32
+ err_msg = tprintf("Expected CRC 0x%08x, got 0x%08x", test.crc32, crc32)
+ expect(t, crc32 == test.crc32, err_msg)
- attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
- expect(t, attr.val == attr_val_expected, attr_err)
- }
+ if failed {
+ /*
+ Don't fully print big trees.
+ */
+ tree_string = tree_string[:min(2_048, len(tree_string))]
+ println(tree_string)
+ }
+ }
+}
- {
- attr := doc.root.attribs[1]
+main :: proc() {
+ t := testing.T{}
- attr_key_expected := "version"
- attr_val_expected := "2.0"
+ track: mem.Tracking_Allocator
+ mem.tracking_allocator_init(&track, context.allocator)
+ context.allocator = mem.tracking_allocator(&track)
- attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
- expect(t, attr.key == attr_key_expected, attr_err)
+ run_tests(&t)
- attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
- expect(t, attr.val == attr_val_expected, attr_err)
- }
+ if len(track.allocation_map) > 0 {
+ for _, v in track.allocation_map {
+ err_msg := fmt.tprintf("%v Leaked %v bytes.", v.location, v.size)
+ expect(&t, false, err_msg)
}
+ }
- count += 1
- }
+ fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
} \ No newline at end of file