diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-12-02 18:00:29 +0100 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-12-05 02:52:23 +0100 |
| commit | 580721440657a9fe5334b6bf095fb70b584fa4f6 (patch) | |
| tree | ba857c3ce7cdd42d8da515a39e27da0d1162d54d | |
| parent | 23baf56c8784901f67970760db5025c9c9f03b67 (diff) | |
[xml] Improvements.
| -rw-r--r-- | core/encoding/xml/example/xml_example.odin | 69 | ||||
| -rw-r--r-- | core/encoding/xml/helpers.odin | 49 | ||||
| -rw-r--r-- | core/encoding/xml/tokenizer.odin | 6 | ||||
| -rw-r--r-- | core/encoding/xml/xml_reader.odin | 2 | ||||
| -rw-r--r-- | tests/core/assets/XML/.gitignore | 2 | ||||
| -rw-r--r-- | tests/core/assets/XML/nl_NL-qt-ts.ts (renamed from tests/core/assets/xml/nl_NL-qt-ts.ts) | 0 | ||||
| -rw-r--r-- | tests/core/assets/XML/nl_NL-xliff-1.0.xliff (renamed from tests/core/assets/xml/nl_NL-xliff-1.0.xliff) | 0 | ||||
| -rw-r--r-- | tests/core/assets/XML/nl_NL-xliff-2.0.xliff (renamed from tests/core/assets/xml/nl_NL-xliff-2.0.xliff) | 0 | ||||
| -rw-r--r-- | tests/core/assets/XML/utf8.xml (renamed from tests/core/assets/xml/utf8.xml) | 0 | ||||
| -rw-r--r-- | tests/core/download_assets.py | 43 | ||||
| -rw-r--r-- | tests/core/encoding/xml/test_core_xml.odin | 10 |
11 files changed, 136 insertions, 45 deletions
diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin index 82938c223..085252e92 100644 --- a/core/encoding/xml/example/xml_example.odin +++ b/core/encoding/xml/example/xml_example.odin @@ -1,45 +1,55 @@ package xml_example import "core:encoding/xml" +import "core:os" +import "core:path" import "core:mem" -import "core:strings" import "core:fmt" -Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) { +/* + Silent error handler for the parser. +*/ +Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {} -} - -FILENAME :: "../../../../tests/core/assets/xml/nl_NL-xliff-1.0.xliff" -DOC :: #load(FILENAME) - -OPTIONS :: xml.Options{ - flags = { - .Ignore_Unsupported, .Intern_Comments, - }, - expected_doctype = "", -} +OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, }, expected_doctype = "unicode", } -_main :: proc() { +example :: proc() { using fmt - println("--- DOCUMENT TO PARSE ---") - println(string(DOC)) - println("--- /DOCUMENT TO PARSE ---\n") + filename := path.join(ODIN_ROOT, "tests", "core", "assets", "XML", "unicode.xml") + defer delete(filename) - doc, err := xml.parse(DOC, OPTIONS, FILENAME, Error_Handler) + doc, err := xml.parse(filename, OPTIONS, Error_Handler) defer xml.destroy(doc) - buf: strings.Builder - defer strings.destroy_builder(&buf) - w := strings.to_writer(&buf) + if err != .None { + printf("Load/Parse error: %v\n", err) + if err == .File_Error { + printf("\"%v\" not found. Did you run \"tests\\download_assets.py\"?", filename) + } + os.exit(1) + } - xml.print(w, doc) - println(strings.to_string(buf)) + printf("\"%v\" loaded and parsed.\n", filename) - if err != .None { - printf("Parse error: %v\n", err) - } else { - println("DONE!") + charlist, charlist_ok := xml.find_child_by_ident(doc.root, "charlist") + if !charlist_ok { + eprintln("Could not locate top-level `<charlist>` tag.") + os.exit(1) + } + + printf("Found `<charlist>` with %v children.\n", len(charlist.children)) + + for char in charlist.children { + if char.ident != "character" { + eprintf("Expected `<character>`, got `<%v>`\n", char.ident) + os.exit(1) + } + + if _, ok := xml.find_attribute_val_by_key(char, "dec"); !ok { + eprintln("`<character dec=\"...\">` attribute not found.") + os.exit(1) + } } } @@ -50,12 +60,13 @@ main :: proc() { mem.tracking_allocator_init(&track, context.allocator) context.allocator = mem.tracking_allocator(&track) - _main() + example() if len(track.allocation_map) > 0 { println() for _, v in track.allocation_map { printf("%v Leaked %v bytes.\n", v.location, v.size) } - } + } + println("Done and cleaned up!") }
\ No newline at end of file diff --git a/core/encoding/xml/helpers.odin b/core/encoding/xml/helpers.odin new file mode 100644 index 000000000..14597ddbd --- /dev/null +++ b/core/encoding/xml/helpers.odin @@ -0,0 +1,49 @@ +package xml +/* + An XML 1.0 / 1.1 parser + + Copyright 2021 Jeroen van Rijn <nom@duclavier.com>. + Made available under Odin's BSD-3 license. + + This file contains helper functions. +*/ + + +/* + Find `tag`'s nth child with a given ident. +*/ +find_child_by_ident :: proc(tag: ^Element, ident: string, nth := 0) -> (res: ^Element, found: bool) { + if tag == nil { return nil, false } + + count := 0 + for child in tag.children { + /* + Skip commments. They have no name. + */ + if child.kind != .Element { continue } + + /* + If the ident matches and it's the nth such child, return it. + */ + if child.ident == ident { + if count == nth { return child, true } + count += 1 + } + } + return nil, false +} + +/* + Find an attribute by key. +*/ +find_attribute_val_by_key :: proc(tag: ^Element, key: string) -> (val: string, found: bool) { + if tag == nil { return "", false } + + for attr in tag.attribs { + /* + If the ident matches, we're done. There can only ever be one attribute with the same name. + */ + if attr.key == key { return attr.val, true } + } + return "", false +}
\ No newline at end of file diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin index 95024518d..2da3b7683 100644 --- a/core/encoding/xml/tokenizer.odin +++ b/core/encoding/xml/tokenizer.odin @@ -403,11 +403,11 @@ scan :: proc(t: ^Tokenizer) -> Token { case ':': kind = .Colon case '"', '\'': + kind = .Invalid + lit, err = scan_string(t, t.offset, ch, true, false) if err == .None { kind = .String - } else { - kind = .Invalid } case '\n': @@ -418,7 +418,7 @@ scan :: proc(t: ^Tokenizer) -> Token { } } - if lit == "" { + if kind != .String && lit == "" { lit = string(t.src[offset : t.offset]) } return Token{kind, lit, pos} diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index 146c278cb..563294309 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -519,6 +519,8 @@ parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error) _ = expect(t, .Eq) or_return value := expect(t, .String) or_return + error(t, t.offset, "String: %v\n", value) + attr.key = strings.intern_get(&doc.intern, key.text) attr.val = strings.intern_get(&doc.intern, value.text) diff --git a/tests/core/assets/XML/.gitignore b/tests/core/assets/XML/.gitignore new file mode 100644 index 000000000..32dc58b57 --- /dev/null +++ b/tests/core/assets/XML/.gitignore @@ -0,0 +1,2 @@ +# This file will be downloaded by download_assets.py
+unicode.xml
\ No newline at end of file diff --git a/tests/core/assets/xml/nl_NL-qt-ts.ts b/tests/core/assets/XML/nl_NL-qt-ts.ts index 6ec3f2f47..6ec3f2f47 100644 --- a/tests/core/assets/xml/nl_NL-qt-ts.ts +++ b/tests/core/assets/XML/nl_NL-qt-ts.ts diff --git a/tests/core/assets/xml/nl_NL-xliff-1.0.xliff b/tests/core/assets/XML/nl_NL-xliff-1.0.xliff index 7a1abcd66..7a1abcd66 100644 --- a/tests/core/assets/xml/nl_NL-xliff-1.0.xliff +++ b/tests/core/assets/XML/nl_NL-xliff-1.0.xliff diff --git a/tests/core/assets/xml/nl_NL-xliff-2.0.xliff b/tests/core/assets/XML/nl_NL-xliff-2.0.xliff index 611ac80c4..611ac80c4 100644 --- a/tests/core/assets/xml/nl_NL-xliff-2.0.xliff +++ b/tests/core/assets/XML/nl_NL-xliff-2.0.xliff diff --git a/tests/core/assets/xml/utf8.xml b/tests/core/assets/XML/utf8.xml index c9ed3bf69..c9ed3bf69 100644 --- a/tests/core/assets/xml/utf8.xml +++ b/tests/core/assets/XML/utf8.xml diff --git a/tests/core/download_assets.py b/tests/core/download_assets.py index d86f7f1e7..831b5b13a 100644 --- a/tests/core/download_assets.py +++ b/tests/core/download_assets.py @@ -50,10 +50,7 @@ def try_download_file(url, out_file): print("Could not download", url) return 1 -def try_download_and_unpack_zip(suite): - url = ASSETS_BASE_URL.format(suite, "{}.zip".format(suite)) - out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite) - +def try_download_and_unpack_zip(url, out_file, extract_path): print("\tDownloading {} to {}.".format(url, out_file)) if try_download_file(url, out_file) is not None: @@ -65,7 +62,6 @@ def try_download_and_unpack_zip(suite): with zipfile.ZipFile(out_file) as z: for file in z.filelist: filename = file.filename - extract_path = DOWNLOAD_BASE_PATH.format(suite) print("\t\tExtracting: {}".format(filename)) z.extract(file, extract_path) @@ -73,25 +69,56 @@ def try_download_and_unpack_zip(suite): print("Could not extract ZIP file") return 2 +def download_png_assets(): + suite = "PNG" + url = ASSETS_BASE_URL.format(suite, "{}.zip".format(suite)) + out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite) + extract_path = DOWNLOAD_BASE_PATH.format(suite) -def main(): print("Downloading PNG assets") # Make PNG assets path try: - path = DOWNLOAD_BASE_PATH.format("PNG") + path = DOWNLOAD_BASE_PATH.format(suite) os.makedirs(path) except FileExistsError: pass # Try downloading and unpacking the PNG assets - r = try_download_and_unpack_zip("PNG") + r = try_download_and_unpack_zip(url, out_file, extract_path) if r is not None: return r # We could fall back on downloading the PNG files individually, but it's slow print("Done downloading PNG assets") + +def download_unicode_assets(): + suite = "XML" + url = "https://www.w3.org/2003/entities/2007xml/unicode.xml.zip" + out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite) + extract_path = DOWNLOAD_BASE_PATH.format(suite) + + print("Downloading {}.".format(url)) + + # Make XML assets path + try: + path = DOWNLOAD_BASE_PATH.format(suite) + os.makedirs(path) + except FileExistsError: + pass + + # Try downloading and unpacking the assets + r = try_download_and_unpack_zip(url, out_file, extract_path) + if r is not None: + return r + + print("Done downloading Unicode/XML assets") + +def main(): + download_png_assets() + download_unicode_assets() + return 0 if __name__ == '__main__': diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin index 7eefac212..c2e0aa172 100644 --- a/tests/core/encoding/xml/test_core_xml.odin +++ b/tests/core/encoding/xml/test_core_xml.odin @@ -35,7 +35,7 @@ TESTS :: []TEST{ First we test that certain files parse without error. */ { - filename = "assets/xml/utf8.xml", + filename = "assets/XML/utf8.xml", options = OPTIONS, expected = { error = .None, @@ -45,7 +45,7 @@ TESTS :: []TEST{ }, }, { - filename = "assets/xml/nl_NL-qt-ts.ts", + filename = "assets/XML/nl_NL-qt-ts.ts", options = OPTIONS, expected = { error = .None, @@ -55,7 +55,7 @@ TESTS :: []TEST{ }, }, { - filename = "assets/xml/nl_NL-xliff-1.0.xliff", + filename = "assets/XML/nl_NL-xliff-1.0.xliff", options = OPTIONS, expected = { error = .None, @@ -65,7 +65,7 @@ TESTS :: []TEST{ }, }, { - filename = "assets/xml/nl_NL-xliff-2.0.xliff", + filename = "assets/XML/nl_NL-xliff-2.0.xliff", options = OPTIONS, expected = { error = .None, @@ -79,7 +79,7 @@ TESTS :: []TEST{ Then we test that certain errors are returned as expected. */ { - filename = "assets/xml/utf8.xml", + filename = "assets/XML/utf8.xml", options = { flags = { .Ignore_Unsupported, .Intern_Comments, |