aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2021-12-02 18:00:29 +0100
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2021-12-05 02:52:23 +0100
commit580721440657a9fe5334b6bf095fb70b584fa4f6 (patch)
treeba857c3ce7cdd42d8da515a39e27da0d1162d54d
parent23baf56c8784901f67970760db5025c9c9f03b67 (diff)
[xml] Improvements.
-rw-r--r--core/encoding/xml/example/xml_example.odin69
-rw-r--r--core/encoding/xml/helpers.odin49
-rw-r--r--core/encoding/xml/tokenizer.odin6
-rw-r--r--core/encoding/xml/xml_reader.odin2
-rw-r--r--tests/core/assets/XML/.gitignore2
-rw-r--r--tests/core/assets/XML/nl_NL-qt-ts.ts (renamed from tests/core/assets/xml/nl_NL-qt-ts.ts)0
-rw-r--r--tests/core/assets/XML/nl_NL-xliff-1.0.xliff (renamed from tests/core/assets/xml/nl_NL-xliff-1.0.xliff)0
-rw-r--r--tests/core/assets/XML/nl_NL-xliff-2.0.xliff (renamed from tests/core/assets/xml/nl_NL-xliff-2.0.xliff)0
-rw-r--r--tests/core/assets/XML/utf8.xml (renamed from tests/core/assets/xml/utf8.xml)0
-rw-r--r--tests/core/download_assets.py43
-rw-r--r--tests/core/encoding/xml/test_core_xml.odin10
11 files changed, 136 insertions, 45 deletions
diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin
index 82938c223..085252e92 100644
--- a/core/encoding/xml/example/xml_example.odin
+++ b/core/encoding/xml/example/xml_example.odin
@@ -1,45 +1,55 @@
package xml_example
import "core:encoding/xml"
+import "core:os"
+import "core:path"
import "core:mem"
-import "core:strings"
import "core:fmt"
-Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {
+/*
+ Silent error handler for the parser.
+*/
+Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {}
-}
-
-FILENAME :: "../../../../tests/core/assets/xml/nl_NL-xliff-1.0.xliff"
-DOC :: #load(FILENAME)
-
-OPTIONS :: xml.Options{
- flags = {
- .Ignore_Unsupported, .Intern_Comments,
- },
- expected_doctype = "",
-}
+OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, }, expected_doctype = "unicode", }
-_main :: proc() {
+example :: proc() {
using fmt
- println("--- DOCUMENT TO PARSE ---")
- println(string(DOC))
- println("--- /DOCUMENT TO PARSE ---\n")
+ filename := path.join(ODIN_ROOT, "tests", "core", "assets", "XML", "unicode.xml")
+ defer delete(filename)
- doc, err := xml.parse(DOC, OPTIONS, FILENAME, Error_Handler)
+ doc, err := xml.parse(filename, OPTIONS, Error_Handler)
defer xml.destroy(doc)
- buf: strings.Builder
- defer strings.destroy_builder(&buf)
- w := strings.to_writer(&buf)
+ if err != .None {
+ printf("Load/Parse error: %v\n", err)
+ if err == .File_Error {
+ printf("\"%v\" not found. Did you run \"tests\\download_assets.py\"?", filename)
+ }
+ os.exit(1)
+ }
- xml.print(w, doc)
- println(strings.to_string(buf))
+ printf("\"%v\" loaded and parsed.\n", filename)
- if err != .None {
- printf("Parse error: %v\n", err)
- } else {
- println("DONE!")
+ charlist, charlist_ok := xml.find_child_by_ident(doc.root, "charlist")
+ if !charlist_ok {
+ eprintln("Could not locate top-level `<charlist>` tag.")
+ os.exit(1)
+ }
+
+ printf("Found `<charlist>` with %v children.\n", len(charlist.children))
+
+ for char in charlist.children {
+ if char.ident != "character" {
+ eprintf("Expected `<character>`, got `<%v>`\n", char.ident)
+ os.exit(1)
+ }
+
+ if _, ok := xml.find_attribute_val_by_key(char, "dec"); !ok {
+ eprintln("`<character dec=\"...\">` attribute not found.")
+ os.exit(1)
+ }
}
}
@@ -50,12 +60,13 @@ main :: proc() {
mem.tracking_allocator_init(&track, context.allocator)
context.allocator = mem.tracking_allocator(&track)
- _main()
+ example()
if len(track.allocation_map) > 0 {
println()
for _, v in track.allocation_map {
printf("%v Leaked %v bytes.\n", v.location, v.size)
}
- }
+ }
+ println("Done and cleaned up!")
} \ No newline at end of file
diff --git a/core/encoding/xml/helpers.odin b/core/encoding/xml/helpers.odin
new file mode 100644
index 000000000..14597ddbd
--- /dev/null
+++ b/core/encoding/xml/helpers.odin
@@ -0,0 +1,49 @@
+package xml
+/*
+ An XML 1.0 / 1.1 parser
+
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-3 license.
+
+ This file contains helper functions.
+*/
+
+
+/*
+ Find `tag`'s nth child with a given ident.
+*/
+find_child_by_ident :: proc(tag: ^Element, ident: string, nth := 0) -> (res: ^Element, found: bool) {
+ if tag == nil { return nil, false }
+
+ count := 0
+ for child in tag.children {
+ /*
+ Skip commments. They have no name.
+ */
+ if child.kind != .Element { continue }
+
+ /*
+ If the ident matches and it's the nth such child, return it.
+ */
+ if child.ident == ident {
+ if count == nth { return child, true }
+ count += 1
+ }
+ }
+ return nil, false
+}
+
+/*
+ Find an attribute by key.
+*/
+find_attribute_val_by_key :: proc(tag: ^Element, key: string) -> (val: string, found: bool) {
+ if tag == nil { return "", false }
+
+ for attr in tag.attribs {
+ /*
+ If the ident matches, we're done. There can only ever be one attribute with the same name.
+ */
+ if attr.key == key { return attr.val, true }
+ }
+ return "", false
+} \ No newline at end of file
diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin
index 95024518d..2da3b7683 100644
--- a/core/encoding/xml/tokenizer.odin
+++ b/core/encoding/xml/tokenizer.odin
@@ -403,11 +403,11 @@ scan :: proc(t: ^Tokenizer) -> Token {
case ':': kind = .Colon
case '"', '\'':
+ kind = .Invalid
+
lit, err = scan_string(t, t.offset, ch, true, false)
if err == .None {
kind = .String
- } else {
- kind = .Invalid
}
case '\n':
@@ -418,7 +418,7 @@ scan :: proc(t: ^Tokenizer) -> Token {
}
}
- if lit == "" {
+ if kind != .String && lit == "" {
lit = string(t.src[offset : t.offset])
}
return Token{kind, lit, pos}
diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin
index 146c278cb..563294309 100644
--- a/core/encoding/xml/xml_reader.odin
+++ b/core/encoding/xml/xml_reader.odin
@@ -519,6 +519,8 @@ parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error)
_ = expect(t, .Eq) or_return
value := expect(t, .String) or_return
+ error(t, t.offset, "String: %v\n", value)
+
attr.key = strings.intern_get(&doc.intern, key.text)
attr.val = strings.intern_get(&doc.intern, value.text)
diff --git a/tests/core/assets/XML/.gitignore b/tests/core/assets/XML/.gitignore
new file mode 100644
index 000000000..32dc58b57
--- /dev/null
+++ b/tests/core/assets/XML/.gitignore
@@ -0,0 +1,2 @@
+# This file will be downloaded by download_assets.py
+unicode.xml \ No newline at end of file
diff --git a/tests/core/assets/xml/nl_NL-qt-ts.ts b/tests/core/assets/XML/nl_NL-qt-ts.ts
index 6ec3f2f47..6ec3f2f47 100644
--- a/tests/core/assets/xml/nl_NL-qt-ts.ts
+++ b/tests/core/assets/XML/nl_NL-qt-ts.ts
diff --git a/tests/core/assets/xml/nl_NL-xliff-1.0.xliff b/tests/core/assets/XML/nl_NL-xliff-1.0.xliff
index 7a1abcd66..7a1abcd66 100644
--- a/tests/core/assets/xml/nl_NL-xliff-1.0.xliff
+++ b/tests/core/assets/XML/nl_NL-xliff-1.0.xliff
diff --git a/tests/core/assets/xml/nl_NL-xliff-2.0.xliff b/tests/core/assets/XML/nl_NL-xliff-2.0.xliff
index 611ac80c4..611ac80c4 100644
--- a/tests/core/assets/xml/nl_NL-xliff-2.0.xliff
+++ b/tests/core/assets/XML/nl_NL-xliff-2.0.xliff
diff --git a/tests/core/assets/xml/utf8.xml b/tests/core/assets/XML/utf8.xml
index c9ed3bf69..c9ed3bf69 100644
--- a/tests/core/assets/xml/utf8.xml
+++ b/tests/core/assets/XML/utf8.xml
diff --git a/tests/core/download_assets.py b/tests/core/download_assets.py
index d86f7f1e7..831b5b13a 100644
--- a/tests/core/download_assets.py
+++ b/tests/core/download_assets.py
@@ -50,10 +50,7 @@ def try_download_file(url, out_file):
print("Could not download", url)
return 1
-def try_download_and_unpack_zip(suite):
- url = ASSETS_BASE_URL.format(suite, "{}.zip".format(suite))
- out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite)
-
+def try_download_and_unpack_zip(url, out_file, extract_path):
print("\tDownloading {} to {}.".format(url, out_file))
if try_download_file(url, out_file) is not None:
@@ -65,7 +62,6 @@ def try_download_and_unpack_zip(suite):
with zipfile.ZipFile(out_file) as z:
for file in z.filelist:
filename = file.filename
- extract_path = DOWNLOAD_BASE_PATH.format(suite)
print("\t\tExtracting: {}".format(filename))
z.extract(file, extract_path)
@@ -73,25 +69,56 @@ def try_download_and_unpack_zip(suite):
print("Could not extract ZIP file")
return 2
+def download_png_assets():
+ suite = "PNG"
+ url = ASSETS_BASE_URL.format(suite, "{}.zip".format(suite))
+ out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite)
+ extract_path = DOWNLOAD_BASE_PATH.format(suite)
-def main():
print("Downloading PNG assets")
# Make PNG assets path
try:
- path = DOWNLOAD_BASE_PATH.format("PNG")
+ path = DOWNLOAD_BASE_PATH.format(suite)
os.makedirs(path)
except FileExistsError:
pass
# Try downloading and unpacking the PNG assets
- r = try_download_and_unpack_zip("PNG")
+ r = try_download_and_unpack_zip(url, out_file, extract_path)
if r is not None:
return r
# We could fall back on downloading the PNG files individually, but it's slow
print("Done downloading PNG assets")
+
+def download_unicode_assets():
+ suite = "XML"
+ url = "https://www.w3.org/2003/entities/2007xml/unicode.xml.zip"
+ out_file = DOWNLOAD_BASE_PATH.format(suite) + "/{}.zip".format(suite)
+ extract_path = DOWNLOAD_BASE_PATH.format(suite)
+
+ print("Downloading {}.".format(url))
+
+ # Make XML assets path
+ try:
+ path = DOWNLOAD_BASE_PATH.format(suite)
+ os.makedirs(path)
+ except FileExistsError:
+ pass
+
+ # Try downloading and unpacking the assets
+ r = try_download_and_unpack_zip(url, out_file, extract_path)
+ if r is not None:
+ return r
+
+ print("Done downloading Unicode/XML assets")
+
+def main():
+ download_png_assets()
+ download_unicode_assets()
+
return 0
if __name__ == '__main__':
diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin
index 7eefac212..c2e0aa172 100644
--- a/tests/core/encoding/xml/test_core_xml.odin
+++ b/tests/core/encoding/xml/test_core_xml.odin
@@ -35,7 +35,7 @@ TESTS :: []TEST{
First we test that certain files parse without error.
*/
{
- filename = "assets/xml/utf8.xml",
+ filename = "assets/XML/utf8.xml",
options = OPTIONS,
expected = {
error = .None,
@@ -45,7 +45,7 @@ TESTS :: []TEST{
},
},
{
- filename = "assets/xml/nl_NL-qt-ts.ts",
+ filename = "assets/XML/nl_NL-qt-ts.ts",
options = OPTIONS,
expected = {
error = .None,
@@ -55,7 +55,7 @@ TESTS :: []TEST{
},
},
{
- filename = "assets/xml/nl_NL-xliff-1.0.xliff",
+ filename = "assets/XML/nl_NL-xliff-1.0.xliff",
options = OPTIONS,
expected = {
error = .None,
@@ -65,7 +65,7 @@ TESTS :: []TEST{
},
},
{
- filename = "assets/xml/nl_NL-xliff-2.0.xliff",
+ filename = "assets/XML/nl_NL-xliff-2.0.xliff",
options = OPTIONS,
expected = {
error = .None,
@@ -79,7 +79,7 @@ TESTS :: []TEST{
Then we test that certain errors are returned as expected.
*/
{
- filename = "assets/xml/utf8.xml",
+ filename = "assets/XML/utf8.xml",
options = {
flags = {
.Ignore_Unsupported, .Intern_Comments,