From b5c828fe4ee3f0942b2eda1dc5753e4ad6d38ea9 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Tue, 30 Nov 2021 23:01:22 +0100 Subject: [xml] Initial implementation of `core:encoding/xml`. A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816). Features: - Supports enough of the XML 1.0/1.1 spec to handle the 99.9% of XML documents in common current usage. - Simple to understand and use. Small. Caveats: - We do NOT support HTML in this package, as that may or may not be valid XML. If it works, great. If it doesn't, that's not considered a bug. - We do NOT support UTF-16. If you have a UTF-16 XML file, please convert it to UTF-8 first. Also, our condolences. - <[!ELEMENT and <[!ATTLIST are not supported, and will be either ignored or return an error depending on the parser options. TODO: - Optional CDATA unboxing. - Optional `>`, ` `, ` ` and other escape substitution in tag bodies. - Test suite MAYBE: - XML writer? - Serialize/deserialize Odin types? --- core/encoding/xml/example/xml_example.odin | 55 ++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 core/encoding/xml/example/xml_example.odin (limited to 'core/encoding/xml/example') diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin new file mode 100644 index 000000000..24a277de6 --- /dev/null +++ b/core/encoding/xml/example/xml_example.odin @@ -0,0 +1,55 @@ +package xml_example + +import "core:encoding/xml" +import "core:mem" +import "core:fmt" + +Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) { + +} + +FILENAME :: "../../../../tests/core/assets/xml/nl_NL-xliff-1.0.xliff" +DOC :: #load(FILENAME) + +OPTIONS :: xml.Options{ + flags = { + .Ignore_Unsupported, .Intern_Comments, + }, + expected_doctype = "", +} + +_main :: proc() { + using fmt + + println("--- DOCUMENT TO PARSE ---") + println(string(DOC)) + println("--- /DOCUMENT TO PARSE ---\n") + + doc, err := xml.parse(DOC, OPTIONS, FILENAME, Error_Handler) + defer xml.destroy(doc) + + xml.print(doc) + + if err != .None { + printf("Parse error: %v\n", err) + } else { + println("DONE!") + } +} + +main :: proc() { + using fmt + + track: mem.Tracking_Allocator + mem.tracking_allocator_init(&track, context.allocator) + context.allocator = mem.tracking_allocator(&track) + + _main() + + if len(track.allocation_map) > 0 { + println() + for _, v in track.allocation_map { + printf("%v Leaked %v bytes.\n", v.location, v.size) + } + } +} \ No newline at end of file -- cgit v1.2.3 From 46a4927acad674b3265969bd5bde591b480d0c73 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Wed, 1 Dec 2021 00:32:35 +0100 Subject: [xml] Use `io.Writer` for `xml.print(doc)`. --- core/encoding/xml/debug_print.odin | 51 ++++++++++++++++-------------- core/encoding/xml/example/xml_example.odin | 8 ++++- core/encoding/xml/xml_reader.odin | 2 +- 3 files changed, 36 insertions(+), 25 deletions(-) (limited to 'core/encoding/xml/example') diff --git a/core/encoding/xml/debug_print.odin b/core/encoding/xml/debug_print.odin index 0b7ffa822..be1175cbc 100644 --- a/core/encoding/xml/debug_print.odin +++ b/core/encoding/xml/debug_print.odin @@ -10,64 +10,69 @@ package xml List of contributors: Jeroen van Rijn: Initial implementation. */ +import "core:io" import "core:fmt" /* Just for debug purposes. */ -print :: proc(doc: ^Document) { - assert(doc != nil) - +print :: proc(writer: io.Writer, doc: ^Document) -> (written: int, err: io.Error) { + if doc == nil { return } using fmt - println("[XML Prolog]") + + written += wprintf(writer, "[XML Prolog]\n") for attr in doc.prolog { - printf("\t%v: %v\n", attr.key, attr.val) + written += wprintf(writer, "\t%v: %v\n", attr.key, attr.val) } - printf("[Encoding] %v\n", doc.encoding) - printf("[DOCTYPE] %v\n", doc.doctype.ident) + written += wprintf(writer, "[Encoding] %v\n", doc.encoding) + written += wprintf(writer, "[DOCTYPE] %v\n", doc.doctype.ident) if len(doc.doctype.rest) > 0 { - printf("\t%v\n", doc.doctype.rest) + wprintf(writer, "\t%v\n", doc.doctype.rest) } if doc.root != nil { - println(" --- ") - print_element(0, doc.root) - println(" --- ") - } + wprintln(writer, " --- ") + print_element(writer, doc.root) + wprintln(writer, " --- ") + } + + return written, .None } -print_element :: proc(indent: int, element: ^Element) { +print_element :: proc(writer: io.Writer, element: ^Element, indent := 0) -> (written: int, err: io.Error) { if element == nil { return } using fmt - tab :: proc(indent: int) { + tab :: proc(writer: io.Writer, indent: int) { tabs := "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" i := max(0, min(indent, len(tabs))) - printf("%v", tabs[:i]) + wprintf(writer, "%v", tabs[:i]) } - tab(indent) + tab(writer, indent) if element.kind == .Element { - printf("<%v>\n", element.ident) + wprintf(writer, "<%v>\n", element.ident) if len(element.value) > 0 { - tab(indent + 1) - printf("[Value] %v\n", element.value) + tab(writer, indent + 1) + wprintf(writer, "[Value] %v\n", element.value) } for attr in element.attribs { - tab(indent + 1) - printf("[Attr] %v: %v\n", attr.key, attr.val) + tab(writer, indent + 1) + wprintf(writer, "[Attr] %v: %v\n", attr.key, attr.val) } for child in element.children { - print_element(indent + 1, child) + print_element(writer, child, indent + 1) } } else if element.kind == .Comment { - printf("[COMMENT] %v\n", element.value) + wprintf(writer, "[COMMENT] %v\n", element.value) } + + return written, .None } \ No newline at end of file diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin index 24a277de6..82938c223 100644 --- a/core/encoding/xml/example/xml_example.odin +++ b/core/encoding/xml/example/xml_example.odin @@ -2,6 +2,7 @@ package xml_example import "core:encoding/xml" import "core:mem" +import "core:strings" import "core:fmt" Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) { @@ -28,7 +29,12 @@ _main :: proc() { doc, err := xml.parse(DOC, OPTIONS, FILENAME, Error_Handler) defer xml.destroy(doc) - xml.print(doc) + buf: strings.Builder + defer strings.destroy_builder(&buf) + w := strings.to_writer(&buf) + + xml.print(w, doc) + println(strings.to_string(buf)) if err != .None { printf("Parse error: %v\n", err) diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index 526be5856..34f6e65d0 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -75,6 +75,7 @@ Option_Flag :: enum { */ Decode_SGML_Entities, } +Option_Flags :: bit_set[Option_Flag; u8] Document :: struct { root: ^Element, @@ -122,7 +123,6 @@ Options :: struct { flags: Option_Flags, expected_doctype: string, } -Option_Flags :: bit_set[Option_Flag] Encoding :: enum { Unknown, -- cgit v1.2.3 From 580721440657a9fe5334b6bf095fb70b584fa4f6 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Thu, 2 Dec 2021 18:00:29 +0100 Subject: [xml] Improvements. --- core/encoding/xml/example/xml_example.odin | 69 +++++++++++++++++------------ core/encoding/xml/helpers.odin | 49 ++++++++++++++++++++ core/encoding/xml/tokenizer.odin | 6 +-- core/encoding/xml/xml_reader.odin | 2 + tests/core/assets/XML/.gitignore | 2 + tests/core/assets/XML/nl_NL-qt-ts.ts | 35 +++++++++++++++ tests/core/assets/XML/nl_NL-xliff-1.0.xliff | 38 ++++++++++++++++ tests/core/assets/XML/nl_NL-xliff-2.0.xliff | 52 ++++++++++++++++++++++ tests/core/assets/XML/utf8.xml | 8 ++++ tests/core/assets/xml/nl_NL-qt-ts.ts | 35 --------------- tests/core/assets/xml/nl_NL-xliff-1.0.xliff | 38 ---------------- tests/core/assets/xml/nl_NL-xliff-2.0.xliff | 52 ---------------------- tests/core/assets/xml/utf8.xml | 8 ---- tests/core/download_assets.py | 43 ++++++++++++++---- tests/core/encoding/xml/test_core_xml.odin | 10 ++--- 15 files changed, 269 insertions(+), 178 deletions(-) create mode 100644 core/encoding/xml/helpers.odin create mode 100644 tests/core/assets/XML/.gitignore create mode 100644 tests/core/assets/XML/nl_NL-qt-ts.ts create mode 100644 tests/core/assets/XML/nl_NL-xliff-1.0.xliff create mode 100644 tests/core/assets/XML/nl_NL-xliff-2.0.xliff create mode 100644 tests/core/assets/XML/utf8.xml delete mode 100644 tests/core/assets/xml/nl_NL-qt-ts.ts delete mode 100644 tests/core/assets/xml/nl_NL-xliff-1.0.xliff delete mode 100644 tests/core/assets/xml/nl_NL-xliff-2.0.xliff delete mode 100644 tests/core/assets/xml/utf8.xml (limited to 'core/encoding/xml/example') diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin index 82938c223..085252e92 100644 --- a/core/encoding/xml/example/xml_example.odin +++ b/core/encoding/xml/example/xml_example.odin @@ -1,45 +1,55 @@ package xml_example import "core:encoding/xml" +import "core:os" +import "core:path" import "core:mem" -import "core:strings" import "core:fmt" -Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) { +/* + Silent error handler for the parser. +*/ +Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {} -} - -FILENAME :: "../../../../tests/core/assets/xml/nl_NL-xliff-1.0.xliff" -DOC :: #load(FILENAME) - -OPTIONS :: xml.Options{ - flags = { - .Ignore_Unsupported, .Intern_Comments, - }, - expected_doctype = "", -} +OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, }, expected_doctype = "unicode", } -_main :: proc() { +example :: proc() { using fmt - println("--- DOCUMENT TO PARSE ---") - println(string(DOC)) - println("--- /DOCUMENT TO PARSE ---\n") + filename := path.join(ODIN_ROOT, "tests", "core", "assets", "XML", "unicode.xml") + defer delete(filename) - doc, err := xml.parse(DOC, OPTIONS, FILENAME, Error_Handler) + doc, err := xml.parse(filename, OPTIONS, Error_Handler) defer xml.destroy(doc) - buf: strings.Builder - defer strings.destroy_builder(&buf) - w := strings.to_writer(&buf) + if err != .None { + printf("Load/Parse error: %v\n", err) + if err == .File_Error { + printf("\"%v\" not found. Did you run \"tests\\download_assets.py\"?", filename) + } + os.exit(1) + } - xml.print(w, doc) - println(strings.to_string(buf)) + printf("\"%v\" loaded and parsed.\n", filename) - if err != .None { - printf("Parse error: %v\n", err) - } else { - println("DONE!") + charlist, charlist_ok := xml.find_child_by_ident(doc.root, "charlist") + if !charlist_ok { + eprintln("Could not locate top-level `` tag.") + os.exit(1) + } + + printf("Found `` with %v children.\n", len(charlist.children)) + + for char in charlist.children { + if char.ident != "character" { + eprintf("Expected ``, got `<%v>`\n", char.ident) + os.exit(1) + } + + if _, ok := xml.find_attribute_val_by_key(char, "dec"); !ok { + eprintln("`` attribute not found.") + os.exit(1) + } } } @@ -50,12 +60,13 @@ main :: proc() { mem.tracking_allocator_init(&track, context.allocator) context.allocator = mem.tracking_allocator(&track) - _main() + example() if len(track.allocation_map) > 0 { println() for _, v in track.allocation_map { printf("%v Leaked %v bytes.\n", v.location, v.size) } - } + } + println("Done and cleaned up!") } \ No newline at end of file diff --git a/core/encoding/xml/helpers.odin b/core/encoding/xml/helpers.odin new file mode 100644 index 000000000..14597ddbd --- /dev/null +++ b/core/encoding/xml/helpers.odin @@ -0,0 +1,49 @@ +package xml +/* + An XML 1.0 / 1.1 parser + + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-3 license. + + This file contains helper functions. +*/ + + +/* + Find `tag`'s nth child with a given ident. +*/ +find_child_by_ident :: proc(tag: ^Element, ident: string, nth := 0) -> (res: ^Element, found: bool) { + if tag == nil { return nil, false } + + count := 0 + for child in tag.children { + /* + Skip commments. They have no name. + */ + if child.kind != .Element { continue } + + /* + If the ident matches and it's the nth such child, return it. + */ + if child.ident == ident { + if count == nth { return child, true } + count += 1 + } + } + return nil, false +} + +/* + Find an attribute by key. +*/ +find_attribute_val_by_key :: proc(tag: ^Element, key: string) -> (val: string, found: bool) { + if tag == nil { return "", false } + + for attr in tag.attribs { + /* + If the ident matches, we're done. There can only ever be one attribute with the same name. + */ + if attr.key == key { return attr.val, true } + } + return "", false +} \ No newline at end of file diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin index 95024518d..2da3b7683 100644 --- a/core/encoding/xml/tokenizer.odin +++ b/core/encoding/xml/tokenizer.odin @@ -403,11 +403,11 @@ scan :: proc(t: ^Tokenizer) -> Token { case ':': kind = .Colon case '"', '\'': + kind = .Invalid + lit, err = scan_string(t, t.offset, ch, true, false) if err == .None { kind = .String - } else { - kind = .Invalid } case '\n': @@ -418,7 +418,7 @@ scan :: proc(t: ^Tokenizer) -> Token { } } - if lit == "" { + if kind != .String && lit == "" { lit = string(t.src[offset : t.offset]) } return Token{kind, lit, pos} diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index 146c278cb..563294309 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -519,6 +519,8 @@ parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error) _ = expect(t, .Eq) or_return value := expect(t, .String) or_return + error(t, t.offset, "String: %v\n", value) + attr.key = strings.intern_get(&doc.intern, key.text) attr.val = strings.intern_get(&doc.intern, value.text) diff --git a/tests/core/assets/XML/.gitignore b/tests/core/assets/XML/.gitignore new file mode 100644 index 000000000..32dc58b57 --- /dev/null +++ b/tests/core/assets/XML/.gitignore @@ -0,0 +1,2 @@ +# This file will be downloaded by download_assets.py +unicode.xml \ No newline at end of file diff --git a/tests/core/assets/XML/nl_NL-qt-ts.ts b/tests/core/assets/XML/nl_NL-qt-ts.ts new file mode 100644 index 000000000..6ec3f2f47 --- /dev/null +++ b/tests/core/assets/XML/nl_NL-qt-ts.ts @@ -0,0 +1,35 @@ + + + + + Page + + Text for translation + commenting + Tekst om te vertalen + + + Also text to translate + some text + Ook tekst om te vertalen + + + + installscript + + 99 bottles of beer on the wall + some new comments here + 99 flessen bier op de muur + + + + apple_count + + %d apple(s) + + %d appel + %d appels + + + + diff --git a/tests/core/assets/XML/nl_NL-xliff-1.0.xliff b/tests/core/assets/XML/nl_NL-xliff-1.0.xliff new file mode 100644 index 000000000..7a1abcd66 --- /dev/null +++ b/tests/core/assets/XML/nl_NL-xliff-1.0.xliff @@ -0,0 +1,38 @@ + + + + + + text + tekst + Context + + + text 1 + tekst 1 + Context 1 + + + text 2 + + Context of the segment 2 + + + text 3 + translation 3 + Context 3 + + + Plurals + + %d month + %d maand + + + %d months + %d maanden + + + + + diff --git a/tests/core/assets/XML/nl_NL-xliff-2.0.xliff b/tests/core/assets/XML/nl_NL-xliff-2.0.xliff new file mode 100644 index 000000000..611ac80c4 --- /dev/null +++ b/tests/core/assets/XML/nl_NL-xliff-2.0.xliff @@ -0,0 +1,52 @@ + + + + + Note for file + + + + Note for unit + + + text + + + + + + Note for unit 2 + + + text 2 + translation 2 + + + + + Note for unit 3 + + + text 3 + approved translation 3 + + + + + + Plurals + + + %d month + %d maand + + + + + %d months + %d maanden + + + + + \ No newline at end of file diff --git a/tests/core/assets/XML/utf8.xml b/tests/core/assets/XML/utf8.xml new file mode 100644 index 000000000..c9ed3bf69 --- /dev/null +++ b/tests/core/assets/XML/utf8.xml @@ -0,0 +1,8 @@ + + +<恥ずべきフクロウ 올빼미_id="Foozle Hello, world!"]]>Barzle"> +<부끄러운:barzle> + ရှက်စရာ ဇီးကွက် + Owl of Shame + More CDATA Hello, world! Nonsense. + \ No newline at end of file diff --git a/tests/core/assets/xml/nl_NL-qt-ts.ts b/tests/core/assets/xml/nl_NL-qt-ts.ts deleted file mode 100644 index 6ec3f2f47..000000000 --- a/tests/core/assets/xml/nl_NL-qt-ts.ts +++ /dev/null @@ -1,35 +0,0 @@ - - - - - Page - - Text for translation - commenting - Tekst om te vertalen - - - Also text to translate - some text - Ook tekst om te vertalen - - - - installscript - - 99 bottles of beer on the wall - some new comments here - 99 flessen bier op de muur - - - - apple_count - - %d apple(s) - - %d appel - %d appels - - - - diff --git a/tests/core/assets/xml/nl_NL-xliff-1.0.xliff b/tests/core/assets/xml/nl_NL-xliff-1.0.xliff deleted file mode 100644 index 7a1abcd66..000000000 --- a/tests/core/assets/xml/nl_NL-xliff-1.0.xliff +++ /dev/null @@ -1,38 +0,0 @@ - - - - - - text - tekst - Context - - - text 1 - tekst 1 - Context 1 - - - text 2 - - Context of the segment 2 - - - text 3 - translation 3 - Context 3 - - - Plurals - - %d month - %d maand - - - %d months - %d maanden - - - - - diff --git a/tests/core/assets/xml/nl_NL-xliff-2.0.xliff b/tests/core/assets/xml/nl_NL-xliff-2.0.xliff deleted file mode 100644 index 611ac80c4..000000000 --- a/tests/core/assets/xml/nl_NL-xliff-2.0.xliff +++ /dev/null @@ -1,52 +0,0 @@ - - - - - Note for file - - - - Note for unit - - - text - - - - - - Note for unit 2 - - - text 2 - translation 2 - - - - - Note for unit 3 - - - text 3 - approved translation 3 - - - - - - Plurals - - - %d month - %d maand - - - - - %d months - %d maanden - - - - - \ No newline at end of file diff --git a/tests/core/assets/xml/utf8.xml b/tests/core/assets/xml/utf8.xml deleted file mode 100644 index c9ed3bf69..000000000 --- a/tests/core/assets/xml/utf8.xml +++ /dev/null @@ -1,8 +0,0 @@ - - -<恥ずべきフクロウ 올빼미_id="Foozle Hello, world!"]]>Barzle"> -<부끄러운:barzle> - ရှက်စရာ ဇီးကွက် - Owl of Shame - More CDATA Hello, world! Nonsense. - \ No newline at end of file diff --git a/tests/core/download_assets.py b/tests/core/download_assets.py index d86f7f1e7..831b5b13a 100644 --- a/tests/core/download_assets.py +++ b/tests/core/download_assets.py @@ -50,10 +50,7 @@ def try_download_file(url, out_file): print("Could not download", url) return 1 -def try_download_and_unpack_zip(suite): - url = ASSETS_BASE_URL.format(suite, "{}.zip".format(suite)) - out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite) - +def try_download_and_unpack_zip(url, out_file, extract_path): print("\tDownloading {} to {}.".format(url, out_file)) if try_download_file(url, out_file) is not None: @@ -65,7 +62,6 @@ def try_download_and_unpack_zip(suite): with zipfile.ZipFile(out_file) as z: for file in z.filelist: filename = file.filename - extract_path = DOWNLOAD_BASE_PATH.format(suite) print("\t\tExtracting: {}".format(filename)) z.extract(file, extract_path) @@ -73,25 +69,56 @@ def try_download_and_unpack_zip(suite): print("Could not extract ZIP file") return 2 +def download_png_assets(): + suite = "PNG" + url = ASSETS_BASE_URL.format(suite, "{}.zip".format(suite)) + out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite) + extract_path = DOWNLOAD_BASE_PATH.format(suite) -def main(): print("Downloading PNG assets") # Make PNG assets path try: - path = DOWNLOAD_BASE_PATH.format("PNG") + path = DOWNLOAD_BASE_PATH.format(suite) os.makedirs(path) except FileExistsError: pass # Try downloading and unpacking the PNG assets - r = try_download_and_unpack_zip("PNG") + r = try_download_and_unpack_zip(url, out_file, extract_path) if r is not None: return r # We could fall back on downloading the PNG files individually, but it's slow print("Done downloading PNG assets") + +def download_unicode_assets(): + suite = "XML" + url = "https://www.w3.org/2003/entities/2007xml/unicode.xml.zip" + out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite) + extract_path = DOWNLOAD_BASE_PATH.format(suite) + + print("Downloading {}.".format(url)) + + # Make XML assets path + try: + path = DOWNLOAD_BASE_PATH.format(suite) + os.makedirs(path) + except FileExistsError: + pass + + # Try downloading and unpacking the assets + r = try_download_and_unpack_zip(url, out_file, extract_path) + if r is not None: + return r + + print("Done downloading Unicode/XML assets") + +def main(): + download_png_assets() + download_unicode_assets() + return 0 if __name__ == '__main__': diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin index 7eefac212..c2e0aa172 100644 --- a/tests/core/encoding/xml/test_core_xml.odin +++ b/tests/core/encoding/xml/test_core_xml.odin @@ -35,7 +35,7 @@ TESTS :: []TEST{ First we test that certain files parse without error. */ { - filename = "assets/xml/utf8.xml", + filename = "assets/XML/utf8.xml", options = OPTIONS, expected = { error = .None, @@ -45,7 +45,7 @@ TESTS :: []TEST{ }, }, { - filename = "assets/xml/nl_NL-qt-ts.ts", + filename = "assets/XML/nl_NL-qt-ts.ts", options = OPTIONS, expected = { error = .None, @@ -55,7 +55,7 @@ TESTS :: []TEST{ }, }, { - filename = "assets/xml/nl_NL-xliff-1.0.xliff", + filename = "assets/XML/nl_NL-xliff-1.0.xliff", options = OPTIONS, expected = { error = .None, @@ -65,7 +65,7 @@ TESTS :: []TEST{ }, }, { - filename = "assets/xml/nl_NL-xliff-2.0.xliff", + filename = "assets/XML/nl_NL-xliff-2.0.xliff", options = OPTIONS, expected = { error = .None, @@ -79,7 +79,7 @@ TESTS :: []TEST{ Then we test that certain errors are returned as expected. */ { - filename = "assets/xml/utf8.xml", + filename = "assets/XML/utf8.xml", options = { flags = { .Ignore_Unsupported, .Intern_Comments, -- cgit v1.2.3 From 7ec88d24302dcdea38ac09996a2279f4de4f6a25 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Sun, 5 Dec 2021 21:06:33 +0100 Subject: [xml] Add option. --- core/encoding/entity/example/entity_example.odin | 4 +- core/encoding/xml/example/xml_example.odin | 53 ++++++++++++++---------- core/encoding/xml/xml_reader.odin | 26 ++++++++++-- tests/core/encoding/xml/test_core_xml.odin | 10 ++--- 4 files changed, 59 insertions(+), 34 deletions(-) (limited to 'core/encoding/xml/example') diff --git a/core/encoding/entity/example/entity_example.odin b/core/encoding/entity/example/entity_example.odin index 882203f48..6fc377f9d 100644 --- a/core/encoding/entity/example/entity_example.odin +++ b/core/encoding/entity/example/entity_example.odin @@ -64,8 +64,8 @@ main :: proc() { mem.tracking_allocator_init(&track, context.allocator) context.allocator = mem.tracking_allocator(&track) - _main() - //_entities() + // _main() + _entities() if len(track.allocation_map) > 0 { println() diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin index 085252e92..daa3c5dab 100644 --- a/core/encoding/xml/example/xml_example.odin +++ b/core/encoding/xml/example/xml_example.odin @@ -2,35 +2,40 @@ package xml_example import "core:encoding/xml" import "core:os" -import "core:path" import "core:mem" import "core:fmt" - -/* - Silent error handler for the parser. -*/ -Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {} - -OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, }, expected_doctype = "unicode", } +import "core:time" +import "core:strings" +import "core:hash" example :: proc() { using fmt - filename := path.join(ODIN_ROOT, "tests", "core", "assets", "XML", "unicode.xml") - defer delete(filename) + doc: ^xml.Document + err: xml.Error + + DOC :: #load("../../../../tests/core/assets/XML/unicode.xml") - doc, err := xml.parse(filename, OPTIONS, Error_Handler) + parse_duration: time.Duration + { + time.SCOPED_TICK_DURATION(&parse_duration) + doc, err = xml.parse(DOC, xml.Options{flags={.Ignore_Unsupported}}) + } defer xml.destroy(doc) + ms := time.duration_milliseconds(parse_duration) + speed := (f64(1000.0) / ms) * f64(len(DOC)) / 1_024.0 / 1_024.0 + fmt.printf("Parse time: %v bytes in %.2f ms (%.2f MiB/s).\n", len(DOC), ms, speed) + if err != .None { printf("Load/Parse error: %v\n", err) if err == .File_Error { - printf("\"%v\" not found. Did you run \"tests\\download_assets.py\"?", filename) + println("\"unicode.xml\" not found. Did you run \"tests\\download_assets.py\"?") } os.exit(1) } - printf("\"%v\" loaded and parsed.\n", filename) + println("\"unicode.xml\" loaded and parsed.") charlist, charlist_ok := xml.find_child_by_ident(doc.root, "charlist") if !charlist_ok { @@ -40,17 +45,19 @@ example :: proc() { printf("Found `` with %v children.\n", len(charlist.children)) - for char in charlist.children { - if char.ident != "character" { - eprintf("Expected ``, got `<%v>`\n", char.ident) - os.exit(1) - } + crc32 := doc_hash(doc) + printf("[%v] CRC32: 0x%08x\n", "🎉" if crc32 == 0xcaa042b9 else "🤬", crc32) +} - if _, ok := xml.find_attribute_val_by_key(char, "dec"); !ok { - eprintln("`` attribute not found.") - os.exit(1) - } - } +doc_hash :: proc(doc: ^xml.Document, print := false) -> (crc32: u32) { + buf: strings.Builder + defer strings.destroy_builder(&buf) + w := strings.to_writer(&buf) + + xml.print(w, doc) + tree := strings.to_string(buf) + if print { fmt.println(tree) } + return hash.crc32(transmute([]u8)tree) } main :: proc() { diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index 6f49b8e08..b169bd57a 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -71,6 +71,12 @@ Option_Flag :: enum { This option decodes them when encountered. */ Decode_SGML_Entities, + + /* + If a tag body has a comment, it will be stripped unless this option is given. + */ + Keep_Tag_Body_Comments, + } Option_Flags :: bit_set[Option_Flag; u8] @@ -413,15 +419,29 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err /* This should be a tag's body text. */ - body_text := scan_string(t, t.offset) or_return + body_text := scan_string(t, t.offset) or_return + needs_processing := .Unbox_CDATA in opts.flags + needs_processing |= .Decode_SGML_Entities in opts.flags + + if !needs_processing { + element.value = strings.intern_get(&doc.intern, body_text) + continue + } - decode_opts := entity.XML_Decode_Options{ .Comment_Strip } + decode_opts := entity.XML_Decode_Options{} + if .Keep_Tag_Body_Comments not_in opts.flags { + decode_opts += { .Comment_Strip } + } if .Decode_SGML_Entities not_in opts.flags { decode_opts += { .No_Entity_Decode } } + if .Unbox_CDATA in opts.flags { - decode_opts += { .Unbox_CDATA, .Decode_CDATA } + decode_opts += { .Unbox_CDATA } + if .Decode_SGML_Entities in opts.flags { + decode_opts += { .Decode_CDATA } + } } decoded, decode_err := entity.decode_xml(body_text, decode_opts) diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin index f9f7a2992..7669afe97 100644 --- a/tests/core/encoding/xml/test_core_xml.odin +++ b/tests/core/encoding/xml/test_core_xml.odin @@ -8,9 +8,7 @@ import "core:io" import "core:fmt" import "core:hash" -Silent :: proc(pos: xml.Pos, fmt: string, args: ..any) { - // Custom (silent) error handler. -} +Silent :: proc(pos: xml.Pos, format: string, args: ..any) {} OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, .Intern_Comments, }, expected_doctype = "", @@ -75,7 +73,7 @@ TESTS :: []TEST{ }, expected_doctype = "恥ずべきフクロウ", }, - crc32 = 0x6d38ac58, + crc32 = 0xad31d8e8, }, { @@ -131,7 +129,7 @@ TESTS :: []TEST{ }, expected_doctype = "html", }, - crc32 = 0xdb4a1e79, + crc32 = 0x573c1033, }, { @@ -306,7 +304,7 @@ run_tests :: proc(t: ^testing.T) { expect(t, err == test.err, err_msg) failed |= crc32 != test.crc32 - err_msg = tprintf("Expected CRC 0x%08x, got 0x%08x", test.crc32, crc32) + err_msg = tprintf("Expected CRC 0x%08x, got 0x%08x, with options %v", test.crc32, crc32, test.options) expect(t, crc32 == test.crc32, err_msg) if failed { -- cgit v1.2.3 From 80878264b63cd8476def629526b294b8e129791a Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Thu, 28 Apr 2022 15:29:00 +0200 Subject: [xml] Speedup. --- core/encoding/xml/debug_print.odin | 18 +- core/encoding/xml/example/xml_example.odin | 77 +++++--- core/encoding/xml/helpers.odin | 28 ++- core/encoding/xml/tokenizer.odin | 11 ++ core/encoding/xml/xml_reader.odin | 276 ++++++++++++++++------------- tests/core/encoding/xml/test_core_xml.odin | 17 +- 6 files changed, 245 insertions(+), 182 deletions(-) (limited to 'core/encoding/xml/example') diff --git a/core/encoding/xml/debug_print.odin b/core/encoding/xml/debug_print.odin index e6a8c9433..7c20ac123 100644 --- a/core/encoding/xml/debug_print.odin +++ b/core/encoding/xml/debug_print.odin @@ -1,8 +1,7 @@ -package xml /* An XML 1.0 / 1.1 parser - Copyright 2021 Jeroen van Rijn . + Copyright 2021-2022 Jeroen van Rijn . Made available under Odin's BSD-3 license. A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816). @@ -10,6 +9,8 @@ package xml List of contributors: Jeroen van Rijn: Initial implementation. */ +package xml + import "core:io" import "core:fmt" @@ -40,17 +41,16 @@ print :: proc(writer: io.Writer, doc: ^Document) -> (written: int, err: io.Error written += wprintf(writer, "[Pre-root comment] %v\n", comment) } - if doc.root != nil { + if len(doc.elements) > 0 { + wprintln(writer, " --- ") + print_element(writer, doc, 0) wprintln(writer, " --- ") - print_element(writer, doc.root) - wprintln(writer, " --- ") } return written, .None } -print_element :: proc(writer: io.Writer, element: ^Element, indent := 0) -> (written: int, err: io.Error) { - if element == nil { return } +print_element :: proc(writer: io.Writer, doc: ^Document, element_id: Element_ID, indent := 0) -> (written: int, err: io.Error) { using fmt tab :: proc(writer: io.Writer, indent: int) { @@ -61,6 +61,8 @@ print_element :: proc(writer: io.Writer, element: ^Element, indent := 0) -> (wri tab(writer, indent) + element := doc.elements[element_id] + if element.kind == .Element { wprintf(writer, "<%v>\n", element.ident) if len(element.value) > 0 { @@ -74,7 +76,7 @@ print_element :: proc(writer: io.Writer, element: ^Element, indent := 0) -> (wri } for child in element.children { - print_element(writer, child, indent + 1) + print_element(writer, doc, child, indent + 1) } } else if element.kind == .Comment { wprintf(writer, "[COMMENT] %v\n", element.value) diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin index daa3c5dab..cadfcfb43 100644 --- a/core/encoding/xml/example/xml_example.odin +++ b/core/encoding/xml/example/xml_example.odin @@ -1,52 +1,85 @@ package xml_example import "core:encoding/xml" -import "core:os" import "core:mem" import "core:fmt" import "core:time" import "core:strings" import "core:hash" +N :: 1 + example :: proc() { using fmt - doc: ^xml.Document - err: xml.Error + docs: [N]^xml.Document + errs: [N]xml.Error + times: [N]time.Duration + + defer for round in 0..` tag.") - os.exit(1) + eprintln("Could not locate top-level `` tag.") + return } - printf("Found `` with %v children.\n", len(charlist.children)) + printf("Found `` with %v children, %v elements total\n", len(docs[0].elements[charlist].children), docs[0].element_count) - crc32 := doc_hash(doc) + crc32 := doc_hash(docs[0]) printf("[%v] CRC32: 0x%08x\n", "🎉" if crc32 == 0xcaa042b9 else "🤬", crc32) + + for round in 0.. (crc32: u32) { diff --git a/core/encoding/xml/helpers.odin b/core/encoding/xml/helpers.odin index 14597ddbd..48f058334 100644 --- a/core/encoding/xml/helpers.odin +++ b/core/encoding/xml/helpers.odin @@ -1,22 +1,20 @@ -package xml /* An XML 1.0 / 1.1 parser - Copyright 2021 Jeroen van Rijn . + Copyright 2021-2022 Jeroen van Rijn . Made available under Odin's BSD-3 license. This file contains helper functions. */ +package xml - -/* - Find `tag`'s nth child with a given ident. -*/ -find_child_by_ident :: proc(tag: ^Element, ident: string, nth := 0) -> (res: ^Element, found: bool) { - if tag == nil { return nil, false } +// Find parent's nth child with a given ident. +find_child_by_ident :: proc(doc: ^Document, parent_id: Element_ID, ident: string, nth := 0) -> (res: Element_ID, found: bool) { + tag := doc.elements[parent_id] count := 0 - for child in tag.children { + for child_id in tag.children { + child := doc.elements[child_id] /* Skip commments. They have no name. */ @@ -26,18 +24,16 @@ find_child_by_ident :: proc(tag: ^Element, ident: string, nth := 0) -> (res: ^El If the ident matches and it's the nth such child, return it. */ if child.ident == ident { - if count == nth { return child, true } + if count == nth { return child_id, true } count += 1 } } - return nil, false + return 0, false } -/* - Find an attribute by key. -*/ -find_attribute_val_by_key :: proc(tag: ^Element, key: string) -> (val: string, found: bool) { - if tag == nil { return "", false } +// Find an attribute by key. +find_attribute_val_by_key :: proc(doc: ^Document, parent_id: Element_ID, key: string) -> (val: string, found: bool) { + tag := doc.elements[parent_id] for attr in tag.attribs { /* diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin index 2da3b7683..c3fece76e 100644 --- a/core/encoding/xml/tokenizer.odin +++ b/core/encoding/xml/tokenizer.odin @@ -1,3 +1,14 @@ +/* + An XML 1.0 / 1.1 parser + + Copyright 2021-2022 Jeroen van Rijn . + Made available under Odin's BSD-3 license. + + A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816). + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ package xml import "core:fmt" diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index 0315b0e05..636dd0ae4 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -1,8 +1,7 @@ -package xml /* An XML 1.0 / 1.1 parser - Copyright 2021 Jeroen van Rijn . + Copyright 2021-2022 Jeroen van Rijn . Made available under Odin's BSD-3 license. A from-scratch XML implementation, loosely modelled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816). @@ -25,12 +24,17 @@ package xml List of contributors: Jeroen van Rijn: Initial implementation. */ +package xml +// An XML 1.0 / 1.1 parser import "core:bytes" -import "core:strings" import "core:encoding/entity" +import "core:intrinsics" import "core:mem" import "core:os" +import "core:strings" + +likely :: intrinsics.expect DEFAULT_Options :: Options{ flags = { @@ -88,7 +92,9 @@ Option_Flag :: enum { Option_Flags :: bit_set[Option_Flag; u16] Document :: struct { - root: ^Element, + elements: [dynamic]Element, + element_count: Element_ID, + prolog: Attributes, encoding: Encoding, @@ -129,8 +135,8 @@ Element :: struct { Comment, }, - parent: ^Element, - children: [dynamic]^Element, + parent: Element_ID, + children: [dynamic]Element_ID, } Attr :: struct { @@ -185,7 +191,7 @@ Error :: enum { No_DocType, Too_Many_DocTypes, - DocType_Must_Proceed_Elements, + DocType_Must_Preceed_Elements, /* If a DOCTYPE is present _or_ the caller @@ -237,12 +243,16 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err doc.tokenizer = t doc.input = data + doc.elements = make([dynamic]Element, 1024, 1024, allocator) + // strings.intern_init(&doc.intern, allocator, allocator) err = .Unexpected_Token - element, parent: ^Element + element, parent: Element_ID - tag_is_open := false + tag_is_open := false + first_element := true + open: Token /* If a DOCTYPE is present, the root tag has to match. @@ -252,6 +262,7 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err loop: for { skip_whitespace(t) + // NOTE(Jeroen): This is faster as a switch. switch t.ch { case '<': /* @@ -259,118 +270,36 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err */ advance_rune(t) - open := scan(t) - #partial switch open.kind { - - case .Question: - /* - 0 { - /* - We've already seen a prolog. - */ - return doc, .Too_Many_Prologs - } else { - /* - Could be ` 0 { - return doc, .Too_Many_DocTypes - } - if doc.root != nil { - return doc, .DocType_Must_Proceed_Elements - } - parse_doctype(doc) or_return - - if len(expected_doctype) > 0 && expected_doctype != doc.doctype.ident { - error(t, t.offset, "Invalid DOCTYPE. Expected: %v, got: %v\n", expected_doctype, doc.doctype.ident) - return doc, .Invalid_DocType - } - expected_doctype = doc.doctype.ident - - case: - if .Error_on_Unsupported in opts.flags { - error(t, t.offset, "Unhandled: . - The grammar does not allow a comment to end in ---> - */ - expect(t, .Dash) - comment := scan_comment(t) or_return - - if .Intern_Comments in opts.flags { - if doc.root == nil { - append(&doc.comments, comment) - } else { - el := new(Element) - el.parent = element - el.kind = .Comment - el.value = comment - append(&element.children, el) - } - } - - case: - error(t, t.offset, "Invalid Token after 0 && expected_doctype != open.text { error(t, t.offset, "Root Tag doesn't match DOCTYPE. Expected: %v, got: %v\n", expected_doctype, open.text) return doc, .Invalid_DocType @@ -395,7 +324,7 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err Empty tag. Close it. */ expect(t, .Gt) or_return - parent = element.parent + parent = doc.elements[element].parent element = parent tag_is_open = false @@ -404,22 +333,103 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err return } - case .Slash: + } else if open.kind == .Slash { /* Close tag. */ ident := expect(t, .Ident) or_return _ = expect(t, .Gt) or_return - if element.ident != ident.text { - error(t, t.offset, "Mismatched Closing Tag. Expected %v, got %v\n", element.ident, ident.text) + if doc.elements[element].ident != ident.text { + error(t, t.offset, "Mismatched Closing Tag. Expected %v, got %v\n", doc.elements[element].ident, ident.text) return doc, .Mismatched_Closing_Tag } - parent = element.parent + parent = doc.elements[element].parent element = parent tag_is_open = false - case: + } else if open.kind == .Exclaim { + /* + 0 { + return doc, .Too_Many_DocTypes + } + if doc.element_count > 0 { + return doc, .DocType_Must_Preceed_Elements + } + parse_doctype(doc) or_return + + if len(expected_doctype) > 0 && expected_doctype != doc.doctype.ident { + error(t, t.offset, "Invalid DOCTYPE. Expected: %v, got: %v\n", expected_doctype, doc.doctype.ident) + return doc, .Invalid_DocType + } + expected_doctype = doc.doctype.ident + + case: + if .Error_on_Unsupported in opts.flags { + error(t, t.offset, "Unhandled: . + The grammar does not allow a comment to end in ---> + */ + expect(t, .Dash) + comment := scan_comment(t) or_return + + if .Intern_Comments in opts.flags { + if len(doc.elements) == 0 { + append(&doc.comments, comment) + } else { + el := new_element(doc) + doc.elements[el].parent = element + doc.elements[el].kind = .Comment + doc.elements[el].value = comment + append(&doc.elements[element].children, el) + } + } + + case: + error(t, t.offset, "Invalid Token after 0 { + /* + We've already seen a prolog. + */ + return doc, .Too_Many_Prologs + } else { + /* + Could be ` (err: Error) { */ doc.doctype.rest = string(t.src[offset : t.offset - 1]) return .None +} + +Element_ID :: u32 + +new_element :: proc(doc: ^Document) -> (id: Element_ID) { + element_space := len(doc.elements) + + // Need to resize + if int(doc.element_count) + 1 > element_space { + if element_space < 65536 { + element_space *= 2 + } else { + element_space += 65536 + } + resize(&doc.elements, element_space) + } + + cur := doc.element_count + doc.element_count += 1 + + return cur } \ No newline at end of file diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin index 7669afe97..82386b2bb 100644 --- a/tests/core/encoding/xml/test_core_xml.odin +++ b/tests/core/encoding/xml/test_core_xml.odin @@ -224,7 +224,7 @@ doc_to_string :: proc(doc: ^xml.Document) -> (result: string) { written += wprintf(writer, "[DOCTYPE] %v\n", doc.doctype.ident) if len(doc.doctype.rest) > 0 { - wprintf(writer, "\t%v\n", doc.doctype.rest) + wprintf(writer, "\t%v\n", doc.doctype.rest) } } @@ -232,17 +232,16 @@ doc_to_string :: proc(doc: ^xml.Document) -> (result: string) { written += wprintf(writer, "[Pre-root comment] %v\n", comment) } - if doc.root != nil { - wprintln(writer, " --- ") - print_element(writer, doc.root) - wprintln(writer, " --- ") + if doc.element_count > 0 { + wprintln(writer, " --- ") + print_element(writer, doc, 0) + wprintln(writer, " --- ") } return written, .None } - print_element :: proc(writer: io.Writer, element: ^xml.Element, indent := 0) -> (written: int, err: io.Error) { - if element == nil { return } + print_element :: proc(writer: io.Writer, doc: ^xml.Document, element_id: xml.Element_ID, indent := 0) -> (written: int, err: io.Error) { using fmt tab :: proc(writer: io.Writer, indent: int) { @@ -253,6 +252,8 @@ doc_to_string :: proc(doc: ^xml.Document) -> (result: string) { tab(writer, indent) + element := doc.elements[element_id] + if element.kind == .Element { wprintf(writer, "<%v>\n", element.ident) if len(element.value) > 0 { @@ -266,7 +267,7 @@ doc_to_string :: proc(doc: ^xml.Document) -> (result: string) { } for child in element.children { - print_element(writer, child, indent + 1) + print_element(writer, doc, child, indent + 1) } } else if element.kind == .Comment { wprintf(writer, "[COMMENT] %v\n", element.value) -- cgit v1.2.3 From d224679619e4b8b41c62d3cf1909ea05a39f569e Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 12 May 2022 15:57:03 +0100 Subject: Minor name changes within `core:encoding/xml` for consistency --- core/encoding/xml/debug_print.odin | 2 +- core/encoding/xml/example/xml_example.odin | 2 +- core/encoding/xml/xml_reader.odin | 50 ++++++++++++++---------------- 3 files changed, 26 insertions(+), 28 deletions(-) (limited to 'core/encoding/xml/example') diff --git a/core/encoding/xml/debug_print.odin b/core/encoding/xml/debug_print.odin index 7c20ac123..e9a1cb160 100644 --- a/core/encoding/xml/debug_print.odin +++ b/core/encoding/xml/debug_print.odin @@ -23,7 +23,7 @@ print :: proc(writer: io.Writer, doc: ^Document) -> (written: int, err: io.Error written += wprintf(writer, "[XML Prolog]\n") - for attr in doc.prolog { + for attr in doc.prologue { written += wprintf(writer, "\t%v: %v\n", attr.key, attr.val) } diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin index cadfcfb43..f7e74840e 100644 --- a/core/encoding/xml/example/xml_example.odin +++ b/core/encoding/xml/example/xml_example.odin @@ -35,7 +35,7 @@ example :: proc() { times[round] = time.tick_diff(start, end) } - fastest := time.Duration(max(i64)) + fastest := max(time.Duration) slowest := time.Duration(0) total := time.Duration(0) diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index 151d44e2a..b77ae97b3 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -36,10 +36,8 @@ import "core:strings" likely :: intrinsics.expect -DEFAULT_Options :: Options{ - flags = { - .Ignore_Unsupported, - }, +DEFAULT_OPTIONS :: Options{ + flags = {.Ignore_Unsupported}, expected_doctype = "", } @@ -51,7 +49,7 @@ Option_Flag :: enum { Input_May_Be_Modified, /* - Document MUST start with ` (doc: ^Document, err: Error) { +parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) { data := data context.allocator = allocator @@ -411,10 +409,10 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err #partial switch next.kind { case .Ident: if len(next.text) == 3 && strings.to_lower(next.text, context.temp_allocator) == "xml" { - parse_prolog(doc) or_return - } else if len(doc.prolog) > 0 { + parse_prologue(doc) or_return + } else if len(doc.prologue) > 0 { /* - We've already seen a prolog. + We've already seen a prologue. */ return doc, .Too_Many_Prologs } else { @@ -481,7 +479,7 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err } } - if .Must_Have_Prolog in opts.flags && len(doc.prolog) == 0 { + if .Must_Have_Prolog in opts.flags && len(doc.prologue) == 0 { return doc, .No_Prolog } @@ -493,16 +491,16 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err return doc, .None } -parse_from_string :: proc(data: string, options := DEFAULT_Options, path := "", error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) { +parse_string :: proc(data: string, options := DEFAULT_OPTIONS, path := "", error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) { _data := transmute([]u8)data - return parse_from_slice(_data, options, path, error_handler, allocator) + return parse_bytes(_data, options, path, error_handler, allocator) } -parse :: proc { parse_from_string, parse_from_slice } +parse :: proc { parse_string, parse_bytes } // Load an XML file -load_from_file :: proc(filename: string, options := DEFAULT_Options, error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) { +load_from_file :: proc(filename: string, options := DEFAULT_OPTIONS, error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) { context.allocator = allocator options := options @@ -511,7 +509,7 @@ load_from_file :: proc(filename: string, options := DEFAULT_Options, error_handl options.flags += { .Input_May_Be_Modified } - return parse_from_slice(data, options, filename, error_handler, allocator) + return parse_bytes(data, options, filename, error_handler, allocator) } destroy :: proc(doc: ^Document) { @@ -523,7 +521,7 @@ destroy :: proc(doc: ^Document) { } delete(doc.elements) - delete(doc.prolog) + delete(doc.prologue) delete(doc.comments) delete(doc.input) @@ -556,7 +554,7 @@ expect :: proc(t: ^Tokenizer, kind: Token_Kind) -> (tok: Token, err: Error) { return tok, .Unexpected_Token } -parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error) { +parse_attribute :: proc(doc: ^Document) -> (attr: Attribute, offset: int, err: Error) { assert(doc != nil) context.allocator = doc.allocator t := doc.tokenizer @@ -574,7 +572,7 @@ parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error) return } -check_duplicate_attributes :: proc(t: ^Tokenizer, attribs: Attributes, attr: Attr, offset: int) -> (err: Error) { +check_duplicate_attributes :: proc(t: ^Tokenizer, attribs: Attributes, attr: Attribute, offset: int) -> (err: Error) { for a in attribs { if attr.key == a.key { error(t, offset, "Duplicate attribute: %v\n", attr.key) @@ -598,21 +596,21 @@ parse_attributes :: proc(doc: ^Document, attribs: ^Attributes) -> (err: Error) { return .None } -parse_prolog :: proc(doc: ^Document) -> (err: Error) { +parse_prologue :: proc(doc: ^Document) -> (err: Error) { assert(doc != nil) context.allocator = doc.allocator t := doc.tokenizer offset := t.offset - parse_attributes(doc, &doc.prolog) or_return + parse_attributes(doc, &doc.prologue) or_return - for attr in doc.prolog { + for attr in doc.prologue { switch attr.key { case "version": switch attr.val { case "1.0", "1.1": case: - error(t, offset, "[parse_prolog] Warning: Unhandled XML version: %v\n", attr.val) + error(t, offset, "[parse_prologue] Warning: Unhandled XML version: %v\n", attr.val) } case "encoding": @@ -627,7 +625,7 @@ parse_prolog :: proc(doc: ^Document) -> (err: Error) { /* Unrecognized encoding, assume UTF-8. */ - error(t, offset, "[parse_prolog] Warning: Unrecognized encoding: %v\n", attr.val) + error(t, offset, "[parse_prologue] Warning: Unrecognized encoding: %v\n", attr.val) } case: -- cgit v1.2.3