diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-12-05 02:17:48 +0100 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-12-05 02:52:23 +0100 |
| commit | d65d6edb0e1887871c4de6a4e8a1630927153eae (patch) | |
| tree | 962864b2793913332adfe390521eff747f4c4863 | |
| parent | 3d72e80ccf0f382f03a1c9407c4728862c5bca91 (diff) | |
[xml] Improve XML tests, test `core:encoding/entity`.
| -rw-r--r-- | core/encoding/entity/entity.odin | 7 | ||||
| -rw-r--r-- | core/encoding/entity/example/entity_example.odin | 1 | ||||
| -rw-r--r-- | core/encoding/entity/example/test.html | 2 | ||||
| -rw-r--r-- | tests/core/assets/XML/entities.html | 29 | ||||
| -rw-r--r-- | tests/core/assets/XML/nl_NL-xliff-1.2.xliff (renamed from tests/core/assets/XML/nl_NL-xliff-1.0.xliff) | 0 | ||||
| -rw-r--r-- | tests/core/assets/XML/utf8.xml | 2 | ||||
| -rw-r--r-- | tests/core/encoding/xml/test_core_xml.odin | 382 |
7 files changed, 269 insertions, 154 deletions
diff --git a/core/encoding/entity/entity.odin b/core/encoding/entity/entity.odin index 8742446e6..db1a5ad0b 100644 --- a/core/encoding/entity/entity.odin +++ b/core/encoding/entity/entity.odin @@ -115,7 +115,14 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator := We don't need to check if we need to write a `<`, because if it isn't CDATA or a comment, it couldn't have been part of an XML tag body to be decoded here. + + Keep in mind that we could already *be* inside a CDATA tag. + If so, write `>` as a literal and continue. */ + if in_data { + write_rune(&builder, '<') + continue + } in_data = _handle_xml_special(&t, &builder, options) or_return case ']': diff --git a/core/encoding/entity/example/entity_example.odin b/core/encoding/entity/example/entity_example.odin index 161a44827..882203f48 100644 --- a/core/encoding/entity/example/entity_example.odin +++ b/core/encoding/entity/example/entity_example.odin @@ -50,6 +50,7 @@ _main :: proc() { using fmt options := xml.Options{ flags = { .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities }} + doc, _ := xml.parse(#load("test.html"), options) defer xml.destroy(doc) diff --git a/core/encoding/entity/example/test.html b/core/encoding/entity/example/test.html index 62a0bb35a..ebbc6470c 100644 --- a/core/encoding/entity/example/test.html +++ b/core/encoding/entity/example/test.html @@ -22,7 +22,7 @@ </div>
<!-- EXPECTED: Foozle]! © BOX ® /BOX42&;1234& -->
<div>
- | | | fj ` \ ® ϱ ∳
+ | | | fj ` \ ® ϱ ∳ ⁏
</div>
</body>
</html>
\ No newline at end of file diff --git a/tests/core/assets/XML/entities.html b/tests/core/assets/XML/entities.html new file mode 100644 index 000000000..05a6b107e --- /dev/null +++ b/tests/core/assets/XML/entities.html @@ -0,0 +1,29 @@ +<html>
+ <head>
+ <title>Entity Reference Test</title>
+ <style>
+ body {
+ background: #000; color: #eee;
+ width: 40%;
+ margin-left: auto;
+ margin-right: auto;
+ font-size: 14pt;
+ }
+ </style>
+ </head>
+ <body>
+ <h1>Entity Reference Test</h1>
+ <div id="test_cdata_in_comment" foo="">
+ Foozle]! © <!-- <![CDATA[ ® ]]> -->42&;1234&
+ </div>
+ <!-- foo attribute should be empty but present -->
+ <!-- EXPECTED: Foozle]! © 42&;1234& -->
+ <div id="test_cdata_unwrap_and_passthrough">
+ Foozle]! © <![CDATA[BOX ® /BOX]]>42&;1234&
+ </div>
+ <!-- EXPECTED: Foozle]! © BOX ® /BOX42&;1234& -->
+ <div>
+ | | | fj ` \ ® ϱ ∳ ⁏
+ </div>
+ </body>
+</html>
\ No newline at end of file diff --git a/tests/core/assets/XML/nl_NL-xliff-1.0.xliff b/tests/core/assets/XML/nl_NL-xliff-1.2.xliff index 7a1abcd66..7a1abcd66 100644 --- a/tests/core/assets/XML/nl_NL-xliff-1.0.xliff +++ b/tests/core/assets/XML/nl_NL-xliff-1.2.xliff diff --git a/tests/core/assets/XML/utf8.xml b/tests/core/assets/XML/utf8.xml index c9ed3bf69..6e1a897ea 100644 --- a/tests/core/assets/XML/utf8.xml +++ b/tests/core/assets/XML/utf8.xml @@ -4,5 +4,5 @@ <부끄러운:barzle>
<name foo:bar="birmese">ရှက်စရာ ဇီးကွက်</name>
<nickname>Owl of Shame</nickname>
- <data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] < ]]> Nonsense.</data>
+ <data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] <$]]> Nonsense.</data>
</부끄러운:barzle>
\ No newline at end of file diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin index c2e0aa172..5cb59e001 100644 --- a/tests/core/encoding/xml/test_core_xml.odin +++ b/tests/core/encoding/xml/test_core_xml.odin @@ -3,16 +3,16 @@ package test_core_xml import "core:encoding/xml" import "core:testing" import "core:mem" +import "core:strings" +import "core:io" import "core:fmt" +import "core:hash" Silent :: proc(pos: xml.Pos, fmt: string, args: ..any) { // Custom (silent) error handler. } -OPTIONS :: xml.Options{ - flags = { - .Ignore_Unsupported, .Intern_Comments, - }, +OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, .Intern_Comments, }, expected_doctype = "", } @@ -22,76 +22,153 @@ TEST_fail := 0 TEST :: struct { filename: string, options: xml.Options, - expected: struct { - error: xml.Error, - xml_version: string, - xml_encoding: string, - doctype: string, - }, + err: xml.Error, + crc32: u32, } +/* + Relative to ODIN_ROOT +*/ +TEST_FILE_PATH_PREFIX :: "tests/core/assets/XML" + TESTS :: []TEST{ /* First we test that certain files parse without error. */ + + { + /* + <?xml version="1.0" encoding="utf-8"?> + <!DOCTYPE 恥ずべきフクロウ> + <恥ずべきフクロウ 올빼미_id="Foozle <![CDATA[<greeting>Hello, world!"</greeting>]]>Barzle"> + <부끄러운:barzle> + <name foo:bar="birmese">ရှက်စရာ ဇီးကွက်</name> + <nickname>Owl of Shame</nickname> + <data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] <$]]> Nonsense.</data> + </부끄러운:barzle> + */ + + /* + Tests UTF-8 idents and values. + Test namespaced ident. + Tests that nested partial CDATA start doesn't trip up parser. + */ + filename = "utf8.xml", + options = { + flags = { + .Ignore_Unsupported, .Intern_Comments, + }, + expected_doctype = "恥ずべきフクロウ", + }, + crc32 = 0x30d82264, + }, + + { + /* + Same as above. + Unbox CDATA in data tag. + */ + filename = "utf8.xml", + options = { + flags = { + .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, + }, + expected_doctype = "恥ずべきフクロウ", + }, + crc32 = 0x6d38ac58, + }, + + { + /* + Simple Qt TS translation file. + `core:i18n` requires it to be parsed properly. + */ + filename = "nl_NL-qt-ts.ts", + options = { + flags = { + .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities, + }, + expected_doctype = "TS", + }, + crc32 = 0x7bce2630, + }, + { - filename = "assets/XML/utf8.xml", - options = OPTIONS, - expected = { - error = .None, - xml_version = "1.0", - xml_encoding = "utf-8", - doctype = "恥ずべきフクロウ", + /* + Simple XLiff 1.2 file. + `core:i18n` requires it to be parsed properly. + */ + filename = "nl_NL-xliff-1.2.xliff", + options = { + flags = { + .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities, + }, + expected_doctype = "xliff", }, + crc32 = 0x43f19d61, }, + + { + /* + Simple XLiff 2.0 file. + `core:i18n` requires it to be parsed properly. + */ + filename = "nl_NL-xliff-2.0.xliff", + options = { + flags = { + .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities, + }, + expected_doctype = "xliff", + }, + crc32 = 0x961e7635, + }, + { - filename = "assets/XML/nl_NL-qt-ts.ts", - options = OPTIONS, - expected = { - error = .None, - xml_version = "1.0", - xml_encoding = "utf-8", - doctype = "TS", + filename = "entities.html", + options = { + flags = { + .Ignore_Unsupported, .Intern_Comments, + }, + expected_doctype = "html", }, + crc32 = 0xdb4a1e79, }, + { - filename = "assets/XML/nl_NL-xliff-1.0.xliff", - options = OPTIONS, - expected = { - error = .None, - xml_version = "1.0", - xml_encoding = "UTF-8", - doctype = "", + filename = "entities.html", + options = { + flags = { + .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, + }, + expected_doctype = "html", }, + crc32 = 0x82588917, }, + { - filename = "assets/XML/nl_NL-xliff-2.0.xliff", - options = OPTIONS, - expected = { - error = .None, - xml_version = "1.0", - xml_encoding = "utf-8", - doctype = "", + filename = "entities.html", + options = { + flags = { + .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities, + }, + expected_doctype = "html", }, + crc32 = 0x5e74d8a6, }, /* Then we test that certain errors are returned as expected. */ { - filename = "assets/XML/utf8.xml", + filename = "utf8.xml", options = { flags = { .Ignore_Unsupported, .Intern_Comments, }, expected_doctype = "Odin", }, - expected = { - error = .Invalid_DocType, - xml_version = "1.0", - xml_encoding = "utf-8", - doctype = "恥ずべきフクロウ", - }, + err = .Invalid_DocType, + crc32 = 0x49b83d0a, }, } @@ -115,150 +192,151 @@ when ODIN_TEST { } } -main :: proc() { - t := testing.T{} - - track: mem.Tracking_Allocator - mem.tracking_allocator_init(&track, context.allocator) - context.allocator = mem.tracking_allocator(&track) +test_file_path :: proc(filename: string) -> (path: string) { - run_tests(&t) + path = fmt.tprintf("%v%v/%v", ODIN_ROOT, TEST_FILE_PATH_PREFIX, filename) + temp := transmute([]u8)path - if len(track.allocation_map) > 0 { - for _, v in track.allocation_map { - err_msg := fmt.tprintf("%v Leaked %v bytes.", v.location, v.size) - expect(&t, false, err_msg) + for r, i in path { + if r == '\\' { + temp[i] = '/' } - } - - fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) + } + return path } -@test -run_tests :: proc(t: ^testing.T) { - using fmt - - count := 0 +doc_to_string :: proc(doc: ^xml.Document) -> (result: string) { + /* + Effectively a clone of the debug printer in the xml package. + We duplicate it here so that the way it prints an XML document to a string is stable. - for test in TESTS { - printf("Trying to parse %v\n\n", test.filename) + This way we can hash the output. If it changes, it means that the document or how it was parsed changed, + not how it was printed. One less source of variability. + */ + print :: proc(writer: io.Writer, doc: ^xml.Document) -> (written: int, err: io.Error) { + if doc == nil { return } + using fmt - doc, err := xml.parse(test.filename, test.options, Silent) - defer xml.destroy(doc) + written += wprintf(writer, "[XML Prolog]\n") - err_msg := tprintf("Expected return value %v, got %v", test.expected.error, err) - expect(t, err == test.expected.error, err_msg) + for attr in doc.prolog { + written += wprintf(writer, "\t%v: %v\n", attr.key, attr.val) + } - if len(test.expected.xml_version) > 0 { - xml_version := "" - for attr in doc.prolog { - if attr.key == "version" { - xml_version = attr.val - } - } + written += wprintf(writer, "[Encoding] %v\n", doc.encoding) - err_msg = tprintf("Expected XML version %v, got %v", test.expected.xml_version, xml_version) - expect(t, xml_version == test.expected.xml_version, err_msg) - } + if len(doc.doctype.ident) > 0 { + written += wprintf(writer, "[DOCTYPE] %v\n", doc.doctype.ident) - if len(test.expected.xml_encoding) > 0 { - xml_encoding := "" - for attr in doc.prolog { - if attr.key == "encoding" { - xml_encoding = attr.val - } + if len(doc.doctype.rest) > 0 { + wprintf(writer, "\t%v\n", doc.doctype.rest) } - - err_msg = tprintf("Expected XML encoding %v, got %v", test.expected.xml_encoding, xml_encoding) - expect(t, xml_encoding == test.expected.xml_encoding, err_msg) } - err_msg = tprintf("Expected DOCTYPE %v, got %v", test.expected.doctype, doc.doctype.ident) - expect(t, doc.doctype.ident == test.expected.doctype, err_msg) - - /* - File-specific tests. - */ - switch count { - case 0: - expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.") - attr := doc.root.attribs[0] + for comment in doc.comments { + written += wprintf(writer, "[Pre-root comment] %v\n", comment) + } - attr_key_expected := "올빼미_id" - attr_val_expected := "Foozle <![CDATA[<greeting>Hello, world!\"</greeting>]]>Barzle" + if doc.root != nil { + wprintln(writer, " --- ") + print_element(writer, doc.root) + wprintln(writer, " --- ") + } - attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key) - expect(t, attr.key == attr_key_expected, attr_err) + return written, .None + } - attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val) - expect(t, attr.val == attr_val_expected, attr_err) + print_element :: proc(writer: io.Writer, element: ^xml.Element, indent := 0) -> (written: int, err: io.Error) { + if element == nil { return } + using fmt - expect(t, len(doc.root.children) > 0, "Expected the root tag to have children.") - child := doc.root.children[0] + tab :: proc(writer: io.Writer, indent: int) { + for _ in 0..=indent { + wprintf(writer, "\t") + } + } - first_child_ident := "부끄러운:barzle" - attr_err = tprintf("Expected first child tag's ident to be %v, got %v", first_child_ident, child.ident) - expect(t, child.ident == first_child_ident, attr_err) + tab(writer, indent) - case 2: - expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.") + if element.kind == .Element { + wprintf(writer, "<%v>\n", element.ident) + if len(element.value) > 0 { + tab(writer, indent + 1) + wprintf(writer, "[Value] %v\n", element.value) + } - { - attr := doc.root.attribs[0] + for attr in element.attribs { + tab(writer, indent + 1) + wprintf(writer, "[Attr] %v: %v\n", attr.key, attr.val) + } - attr_key_expected := "version" - attr_val_expected := "1.2" + for child in element.children { + print_element(writer, child, indent + 1) + } + } else if element.kind == .Comment { + wprintf(writer, "[COMMENT] %v\n", element.value) + } - attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key) - expect(t, attr.key == attr_key_expected, attr_err) + return written, .None + } - attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val) - expect(t, attr.val == attr_val_expected, attr_err) - } + buf: strings.Builder + defer strings.destroy_builder(&buf) - { - attr := doc.root.attribs[1] + print(strings.to_writer(&buf), doc) + return strings.clone(strings.to_string(buf)) +} - attr_key_expected := "xmlns" - attr_val_expected := "urn:oasis:names:tc:xliff:document:1.2" +@test +run_tests :: proc(t: ^testing.T) { + using fmt - attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key) - expect(t, attr.key == attr_key_expected, attr_err) + for test in TESTS { + path := test_file_path(test.filename) + printf("\nTrying to parse %v\n\n", path) - attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val) - expect(t, attr.val == attr_val_expected, attr_err) - } + doc, err := xml.parse(path, test.options, Silent) + defer xml.destroy(doc) - case 3: - expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.") + tree_string := doc_to_string(doc) + tree_bytes := transmute([]u8)tree_string + defer delete(tree_bytes) - { - attr := doc.root.attribs[0] + crc32 := hash.crc32(tree_bytes) - attr_key_expected := "xmlns" - attr_val_expected := "urn:oasis:names:tc:xliff:document:2.0" + failed := err != test.err + err_msg := tprintf("Expected return value %v, got %v", test.err, err) + expect(t, err == test.err, err_msg) - attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key) - expect(t, attr.key == attr_key_expected, attr_err) + failed |= crc32 != test.crc32 + err_msg = tprintf("Expected CRC 0x%08x, got 0x%08x", test.crc32, crc32) + expect(t, crc32 == test.crc32, err_msg) - attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val) - expect(t, attr.val == attr_val_expected, attr_err) - } + if failed { + /* + Don't fully print big trees. + */ + tree_string = tree_string[:min(2_048, len(tree_string))] + println(tree_string) + } + } +} - { - attr := doc.root.attribs[1] +main :: proc() { + t := testing.T{} - attr_key_expected := "version" - attr_val_expected := "2.0" + track: mem.Tracking_Allocator + mem.tracking_allocator_init(&track, context.allocator) + context.allocator = mem.tracking_allocator(&track) - attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key) - expect(t, attr.key == attr_key_expected, attr_err) + run_tests(&t) - attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val) - expect(t, attr.val == attr_val_expected, attr_err) - } + if len(track.allocation_map) > 0 { + for _, v in track.allocation_map { + err_msg := fmt.tprintf("%v Leaked %v bytes.", v.location, v.size) + expect(&t, false, err_msg) } + } - count += 1 - } + fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) }
\ No newline at end of file |