aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2021-11-30 23:01:22 +0100
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2021-12-05 02:52:22 +0100
commitb5c828fe4ee3f0942b2eda1dc5753e4ad6d38ea9 (patch)
treeffbd45adb60e3de951dc2948801d5a57b21dd2c9
parent6ce5608003e630bc0de1c591fd4cbea3fe59e1d3 (diff)
[xml] Initial implementation of `core:encoding/xml`.
A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816). Features: - Supports enough of the XML 1.0/1.1 spec to handle the 99.9% of XML documents in common current usage. - Simple to understand and use. Small. Caveats: - We do NOT support HTML in this package, as that may or may not be valid XML. If it works, great. If it doesn't, that's not considered a bug. - We do NOT support UTF-16. If you have a UTF-16 XML file, please convert it to UTF-8 first. Also, our condolences. - <[!ELEMENT and <[!ATTLIST are not supported, and will be either ignored or return an error depending on the parser options. TODO: - Optional CDATA unboxing. - Optional `&gt;`, `&#32;`, `&#x20;` and other escape substitution in tag bodies. - Test suite MAYBE: - XML writer? - Serialize/deserialize Odin types?
-rw-r--r--core/encoding/xml/debug_print.odin73
-rw-r--r--core/encoding/xml/example/xml_example.odin55
-rw-r--r--core/encoding/xml/tokenizer.odin339
-rw-r--r--core/encoding/xml/xml_reader.odin651
-rw-r--r--tests/core/Makefile17
-rw-r--r--tests/core/assets/xml/nl_NL-qt-ts.ts35
-rw-r--r--tests/core/assets/xml/nl_NL-xliff-1.0.xliff38
-rw-r--r--tests/core/assets/xml/nl_NL-xliff-2.0.xliff52
-rw-r--r--tests/core/assets/xml/utf8.xml8
-rw-r--r--tests/core/build.bat15
-rw-r--r--tests/core/encoding/json/test_core_json.odin (renamed from tests/core/encoding/test_core_json.odin)36
-rw-r--r--tests/core/encoding/xml/test_core_xml.odin264
12 files changed, 1553 insertions, 30 deletions
diff --git a/core/encoding/xml/debug_print.odin b/core/encoding/xml/debug_print.odin
new file mode 100644
index 000000000..0b7ffa822
--- /dev/null
+++ b/core/encoding/xml/debug_print.odin
@@ -0,0 +1,73 @@
+package xml
+/*
+ An XML 1.0 / 1.1 parser
+
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-3 license.
+
+ A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+*/
+import "core:fmt"
+
+/*
+ Just for debug purposes.
+*/
+print :: proc(doc: ^Document) {
+ assert(doc != nil)
+
+ using fmt
+ println("[XML Prolog]")
+
+ for attr in doc.prolog {
+ printf("\t%v: %v\n", attr.key, attr.val)
+ }
+
+ printf("[Encoding] %v\n", doc.encoding)
+ printf("[DOCTYPE] %v\n", doc.doctype.ident)
+
+ if len(doc.doctype.rest) > 0 {
+ printf("\t%v\n", doc.doctype.rest)
+ }
+
+ if doc.root != nil {
+ println(" --- ")
+ print_element(0, doc.root)
+ println(" --- ")
+ }
+}
+
+print_element :: proc(indent: int, element: ^Element) {
+ if element == nil { return }
+ using fmt
+
+ tab :: proc(indent: int) {
+ tabs := "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
+
+ i := max(0, min(indent, len(tabs)))
+ printf("%v", tabs[:i])
+ }
+
+ tab(indent)
+
+ if element.kind == .Element {
+ printf("<%v>\n", element.ident)
+ if len(element.value) > 0 {
+ tab(indent + 1)
+ printf("[Value] %v\n", element.value)
+ }
+
+ for attr in element.attribs {
+ tab(indent + 1)
+ printf("[Attr] %v: %v\n", attr.key, attr.val)
+ }
+
+ for child in element.children {
+ print_element(indent + 1, child)
+ }
+ } else if element.kind == .Comment {
+ printf("[COMMENT] %v\n", element.value)
+ }
+} \ No newline at end of file
diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin
new file mode 100644
index 000000000..24a277de6
--- /dev/null
+++ b/core/encoding/xml/example/xml_example.odin
@@ -0,0 +1,55 @@
+package xml_example
+
+import "core:encoding/xml"
+import "core:mem"
+import "core:fmt"
+
+Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {
+
+}
+
+FILENAME :: "../../../../tests/core/assets/xml/nl_NL-xliff-1.0.xliff"
+DOC :: #load(FILENAME)
+
+OPTIONS :: xml.Options{
+ flags = {
+ .Ignore_Unsupported, .Intern_Comments,
+ },
+ expected_doctype = "",
+}
+
+_main :: proc() {
+ using fmt
+
+ println("--- DOCUMENT TO PARSE ---")
+ println(string(DOC))
+ println("--- /DOCUMENT TO PARSE ---\n")
+
+ doc, err := xml.parse(DOC, OPTIONS, FILENAME, Error_Handler)
+ defer xml.destroy(doc)
+
+ xml.print(doc)
+
+ if err != .None {
+ printf("Parse error: %v\n", err)
+ } else {
+ println("DONE!")
+ }
+}
+
+main :: proc() {
+ using fmt
+
+ track: mem.Tracking_Allocator
+ mem.tracking_allocator_init(&track, context.allocator)
+ context.allocator = mem.tracking_allocator(&track)
+
+ _main()
+
+ if len(track.allocation_map) > 0 {
+ println()
+ for _, v in track.allocation_map {
+ printf("%v Leaked %v bytes.\n", v.location, v.size)
+ }
+ }
+} \ No newline at end of file
diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin
new file mode 100644
index 000000000..a63dca5bd
--- /dev/null
+++ b/core/encoding/xml/tokenizer.odin
@@ -0,0 +1,339 @@
+package xml
+
+import "core:fmt"
+import "core:unicode"
+import "core:unicode/utf8"
+
+Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any)
+
+Token :: struct {
+ kind: Token_Kind,
+ text: string,
+ pos: Pos,
+}
+
+Pos :: struct {
+ file: string,
+ offset: int, // starting at 0
+ line: int, // starting at 1
+ column: int, // starting at 1
+}
+
+Token_Kind :: enum {
+ Invalid,
+
+ Ident,
+ Literal,
+ Rune,
+ String,
+
+ Double_Quote, // "
+ Single_Quote, // '
+ Colon, // :
+
+ Eq, // =
+ Lt, // <
+ Gt, // >
+ Exclaim, // !
+ Question, // ?
+ Hash, // #
+ Slash, // /
+ Dash, // -
+
+ Open_Bracket, // [
+ Close_Bracket, // ]
+
+ EOF,
+}
+
+CDATA_START :: "<![CDATA["
+CDATA_END :: "]]>"
+
+Tokenizer :: struct {
+ // Immutable data
+ path: string,
+ src: string,
+ err: Error_Handler,
+
+ // Tokenizing state
+ ch: rune,
+ offset: int,
+ read_offset: int,
+ line_offset: int,
+ line_count: int,
+
+ // Mutable data
+ error_count: int,
+}
+
+init :: proc(t: ^Tokenizer, src: string, path: string, err: Error_Handler = default_error_handler) {
+ t.src = src
+ t.err = err
+ t.ch = ' '
+ t.offset = 0
+ t.read_offset = 0
+ t.line_offset = 0
+ t.line_count = len(src) > 0 ? 1 : 0
+ t.error_count = 0
+ t.path = path
+
+ advance_rune(t)
+ if t.ch == utf8.RUNE_BOM {
+ advance_rune(t)
+ }
+}
+
+@(private)
+offset_to_pos :: proc(t: ^Tokenizer, offset: int) -> Pos {
+ line := t.line_count
+ column := offset - t.line_offset + 1
+
+ return Pos {
+ file = t.path,
+ offset = offset,
+ line = line,
+ column = column,
+ }
+}
+
+default_error_handler :: proc(pos: Pos, msg: string, args: ..any) {
+ fmt.eprintf("%s(%d:%d) ", pos.file, pos.line, pos.column)
+ fmt.eprintf(msg, ..args)
+ fmt.eprintf("\n")
+}
+
+error :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) {
+ pos := offset_to_pos(t, offset)
+ if t.err != nil {
+ t.err(pos, msg, ..args)
+ }
+ t.error_count += 1
+}
+
+advance_rune :: proc(using t: ^Tokenizer) {
+ if read_offset < len(src) {
+ offset = read_offset
+ if ch == '\n' {
+ line_offset = offset
+ line_count += 1
+ }
+ r, w := rune(src[read_offset]), 1
+ switch {
+ case r == 0:
+ error(t, t.offset, "illegal character NUL")
+ case r >= utf8.RUNE_SELF:
+ r, w = utf8.decode_rune_in_string(src[read_offset:])
+ if r == utf8.RUNE_ERROR && w == 1 {
+ error(t, t.offset, "illegal UTF-8 encoding")
+ } else if r == utf8.RUNE_BOM && offset > 0 {
+ error(t, t.offset, "illegal byte order mark")
+ }
+ }
+ read_offset += w
+ ch = r
+ } else {
+ offset = len(src)
+ if ch == '\n' {
+ line_offset = offset
+ line_count += 1
+ }
+ ch = -1
+ }
+}
+
+peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte {
+ if t.read_offset+offset < len(t.src) {
+ return t.src[t.read_offset+offset]
+ }
+ return 0
+}
+
+skip_whitespace :: proc(t: ^Tokenizer) {
+ for {
+ switch t.ch {
+ case ' ', '\t', '\r', '\n':
+ advance_rune(t)
+ case:
+ return
+ }
+ }
+}
+
+is_letter :: proc(r: rune) -> bool {
+ if r < utf8.RUNE_SELF {
+ switch r {
+ case '_':
+ return true
+ case 'A'..='Z', 'a'..='z':
+ return true
+ }
+ }
+ return unicode.is_letter(r)
+}
+
+is_valid_identifier_rune :: proc(r: rune) -> bool {
+ if r < utf8.RUNE_SELF {
+ switch r {
+ case '_', '-', ':': return true
+ case 'A'..='Z', 'a'..='z': return true
+ case '0'..'9': return true
+ }
+ }
+
+ if unicode.is_digit(r) || unicode.is_letter(r) {
+ return true
+ }
+ return false
+}
+
+scan_identifier :: proc(t: ^Tokenizer) -> string {
+ offset := t.offset
+ namespaced := false
+
+ for is_valid_identifier_rune(t.ch) {
+ advance_rune(t)
+ if t.ch == ':' {
+ /*
+ A namespaced attr can have at most two parts, `namespace:ident`.
+ */
+ if namespaced {
+ break
+ }
+ namespaced = true
+ }
+ }
+ return string(t.src[offset : t.offset])
+}
+
+scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close := false) -> (value: string, err: Error) {
+ err = .None
+ in_cdata := false
+
+ loop: for {
+ ch := t.ch
+
+ switch ch {
+ case -1:
+ error(t, t.offset, "[scan_string] Premature end of file.\n")
+ return "", .Premature_EOF
+
+ case '<':
+ /*
+ Might be the start of a CDATA tag.
+ */
+ if t.read_offset + len(CDATA_START) < len(t.src) {
+ if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START {
+ in_cdata = true
+ }
+ }
+
+ case ']':
+ /*
+ Might be the end of a CDATA tag.
+ */
+ if t.read_offset + len(CDATA_END) < len(t.src) {
+ if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END {
+ in_cdata = false
+ }
+ }
+
+ case '\n':
+ if !in_cdata {
+ error(t, offset, string(t.src[offset : t.offset]))
+ error(t, offset, "[scan_string] Not terminated\n")
+ err = .Invalid_Tag_Value
+ break loop
+ }
+ }
+
+ if ch == close && !in_cdata {
+ /*
+ If it's not a CDATA tag, it's the end of this body.
+ */
+ break loop
+ }
+
+ advance_rune(t)
+ }
+
+ lit := string(t.src[offset : t.offset])
+ if consume_close {
+ advance_rune(t)
+ }
+
+ /*
+ TODO: Handle decoding escape characters and unboxing CDATA.
+ */
+
+ return lit, err
+}
+
+peek :: proc(t: ^Tokenizer) -> (token: Token) {
+ old := t^
+ token = scan(t)
+ t^ = old
+ return token
+}
+
+scan :: proc(t: ^Tokenizer) -> Token {
+ skip_whitespace(t)
+
+ offset := t.offset
+
+ kind: Token_Kind
+ err: Error
+ lit: string
+ pos := offset_to_pos(t, offset)
+
+ switch ch := t.ch; true {
+ case is_letter(ch):
+ lit = scan_identifier(t)
+ kind = .Ident
+
+ case:
+ advance_rune(t)
+ switch ch {
+ case -1:
+ kind = .EOF
+
+ case '<': kind = .Lt
+ case '>': kind = .Gt
+ case '!': kind = .Exclaim
+ case '?': kind = .Question
+ case '=': kind = .Eq
+ case '#': kind = .Hash
+ case '/': kind = .Slash
+ case '-': kind = .Dash
+ case ':': kind = .Colon
+
+ case '"', '\'':
+ lit, err = scan_string(t, t.offset, ch, true)
+ if err == .None {
+ kind = .String
+ } else {
+ kind = .Invalid
+ }
+
+ case '\n':
+ lit = "\n"
+
+ case '\\':
+ token := scan(t)
+ if token.pos.line == pos.line {
+ error(t, token.pos.offset, "expected a newline after \\")
+ }
+ return token
+
+ case:
+ if ch != utf8.RUNE_BOM {
+ // error(t, t.offset, "illegal character '%r': %d", ch, ch)
+ }
+ kind = .Invalid
+ }
+ }
+
+ if lit == "" {
+ lit = string(t.src[offset : t.offset])
+ }
+ return Token{kind, lit, pos}
+} \ No newline at end of file
diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin
new file mode 100644
index 000000000..526be5856
--- /dev/null
+++ b/core/encoding/xml/xml_reader.odin
@@ -0,0 +1,651 @@
+package xml
+/*
+ An XML 1.0 / 1.1 parser
+
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-3 license.
+
+ A from-scratch XML implementation, loosely modelled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).
+
+ Features:
+ - Supports enough of the XML 1.0/1.1 spec to handle the 99.9% of XML documents in common current usage.
+ - Simple to understand and use. Small.
+
+ Caveats:
+ - We do NOT support HTML in this package, as that may or may not be valid XML.
+ If it works, great. If it doesn't, that's not considered a bug.
+
+ - We do NOT support UTF-16. If you have a UTF-16 XML file, please convert it to UTF-8 first. Also, our condolences.
+ - <[!ELEMENT and <[!ATTLIST are not supported, and will be either ignored or return an error depending on the parser options.
+
+ TODO:
+ - Optional CDATA unboxing.
+ - Optional `&gt;`, `&#32;`, `&#x20;` and other escape substitution in tag bodies.
+
+ MAYBE:
+ - XML writer?
+ - Serialize/deserialize Odin types?
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+*/
+
+import "core:strings"
+import "core:mem"
+import "core:os"
+
+DEFAULT_Options :: Options{
+ flags = {
+ .Ignore_Unsupported,
+ },
+ expected_doctype = "",
+}
+
+Option_Flag :: enum {
+ /*
+ Document MUST start with `<?xml` prolog.
+ */
+ Must_Have_Prolog,
+
+ /*
+ Document MUST have a `<!DOCTYPE`.
+ */
+ Must_Have_DocType,
+
+ /*
+ By default we skip comments. Use this option to intern a comment on a parented Element.
+ */
+ Intern_Comments,
+
+ /*
+ How to handle unsupported parts of the specification, like <! other than <!DOCTYPE and <![CDATA[
+ */
+ Error_on_Unsupported,
+ Ignore_Unsupported,
+
+ /*
+ By default CDATA tags are passed-through as-is.
+ This option unwraps them when encountered.
+ */
+ Unbox_CDATA,
+
+ /*
+ By default SGML entities like `&gt;`, `&#32;` and `&#x20;` are passed-through as-is.
+ This option decodes them when encountered.
+ */
+ Decode_SGML_Entities,
+}
+
+Document :: struct {
+ root: ^Element,
+ prolog: Attributes,
+ encoding: Encoding,
+
+ doctype: struct {
+ /*
+ We only scan the <!DOCTYPE IDENT part and skip the rest.
+ */
+ ident: string,
+ rest: string,
+ },
+
+ /*
+ Internal
+ */
+ tokenizer: ^Tokenizer,
+ allocator: mem.Allocator,
+ intern: strings.Intern,
+}
+
+Element :: struct {
+ ident: string,
+ value: string,
+ attribs: Attributes,
+
+ kind: enum {
+ Element = 0,
+ Comment,
+ },
+
+ parent: ^Element,
+ children: [dynamic]^Element,
+}
+
+Attr :: struct {
+ key: string,
+ val: string,
+}
+
+Attributes :: [dynamic]Attr
+
+Options :: struct {
+ flags: Option_Flags,
+ expected_doctype: string,
+}
+Option_Flags :: bit_set[Option_Flag]
+
+Encoding :: enum {
+ Unknown,
+
+ UTF_8,
+ ISO_8859_1,
+
+ /*
+ Aliases
+ */
+ LATIN_1 = ISO_8859_1,
+}
+
+Error :: enum {
+ /*
+ General return values.
+ */
+ None = 0,
+ General_Error,
+ Unexpected_Token,
+ Invalid_Token,
+
+ /*
+ Couldn't find, open or read file.
+ */
+ File_Error,
+
+ /*
+ File too short.
+ */
+ Premature_EOF,
+
+ /*
+ XML-specific errors.
+ */
+ No_Prolog,
+ Invalid_Prolog,
+ Too_Many_Prologs,
+
+ No_DocType,
+ Too_Many_DocTypes,
+ DocType_Must_Proceed_Elements,
+
+ /*
+ If a DOCTYPE is present _or_ the caller
+ asked for a specific DOCTYPE and the DOCTYPE
+ and root tag don't match, we return `.Invalid_DocType`.
+ */
+ Invalid_DocType,
+
+ Invalid_Tag_Value,
+ Mismatched_Closing_Tag,
+
+ Unclosed_Comment,
+ Comment_Before_Root_Element,
+ Invalid_Sequence_In_Comment,
+
+ Unsupported_Version,
+ Unsupported_Encoding,
+
+ /*
+ <!FOO are usually skipped.
+ */
+ Unhandled_Bang,
+
+ Duplicate_Attribute,
+ Conflicting_Options,
+
+ /*
+ Unhandled TODO:
+ */
+ Unhandled_CDATA_Unboxing,
+ Unhandled_SGML_Entity_Decoding,
+}
+
+/*
+ Implementation starts here.
+*/
+parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) {
+ context.allocator = allocator
+
+ opts := validate_options(options) or_return
+
+ t := &Tokenizer{}
+ init(t, string(data), path, error_handler)
+
+ doc = new(Document)
+ doc.allocator = allocator
+ doc.tokenizer = t
+
+ strings.intern_init(&doc.intern, allocator, allocator)
+
+ err = .Unexpected_Token
+ element, parent: ^Element
+
+ /*
+ If a DOCTYPE is present, the root tag has to match.
+ If an expected DOCTYPE is given in options (i.e. it's non-empty), the DOCTYPE (if present) and root tag have to match.
+ */
+ expected_doctype := options.expected_doctype
+
+ loop: for {
+ tok := scan(t)
+ #partial switch tok.kind {
+
+ case .Lt:
+ open := scan(t)
+ #partial switch open.kind {
+
+ case .Question:
+ /*
+ <?xml
+ */
+ next := scan(t)
+ #partial switch next.kind {
+ case .Ident:
+ if len(next.text) == 3 && strings.to_lower(next.text, context.temp_allocator) == "xml" {
+ parse_prolog(doc) or_return
+ } else if len(doc.prolog) > 0 {
+ /*
+ We've already seen a prolog.
+ */
+ return doc, .Too_Many_Prologs
+ } else {
+ error(t, t.offset, "Expected \"<?xml\", got \"<?%v\".", tok.text)
+ return
+ }
+ case:
+ error(t, t.offset, "Expected \"<?xml\", got \"<?%v\".", tok.text)
+ return
+ }
+
+ case .Exclaim:
+ /*
+ <!
+ */
+ next := scan(t)
+ #partial switch next.kind {
+ case .Ident:
+ switch next.text {
+ case "DOCTYPE":
+ if len(doc.doctype.ident) > 0 {
+ return doc, .Too_Many_DocTypes
+ }
+ if doc.root != nil {
+ return doc, .DocType_Must_Proceed_Elements
+ }
+ parse_doctype(doc) or_return
+
+ if len(expected_doctype) > 0 && expected_doctype != doc.doctype.ident {
+ error(t, t.offset, "Invalid DOCTYPE. Expected: %v, got: %v\n", expected_doctype, doc.doctype.ident)
+ return doc, .Invalid_DocType
+ }
+ expected_doctype = doc.doctype.ident
+
+ case:
+ if .Error_on_Unsupported in opts.flags {
+ error(t, t.offset, "Unhandled: <!%v\n", next.text)
+ err = .Unhandled_Bang
+ return
+ }
+ skip_element(t) or_return
+ }
+
+ case .Dash:
+ /*
+ Comment: <!-- -->.
+ The grammar does not allow a comment to end in --->
+ */
+ if doc.root == nil {
+ return doc, .Comment_Before_Root_Element
+ }
+
+ expect(t, .Dash)
+ offset := t.offset
+
+ for {
+ advance_rune(t)
+ ch := t.ch
+
+ /*
+ A comment ends when we see -->, preceded by a character that's not a dash.
+ "For compatibility, the string "--" (double-hyphen) must not occur within comments."
+
+ See: https://www.w3.org/TR/2006/REC-xml11-20060816/#dt-comment
+
+ Thanks to the length (4) of the comment start, we also have enough lookback,
+ and the peek at the next byte asserts that there's at least one more character
+ that's a `>`.
+ */
+ if ch < 0 {
+ error(t, offset, "[parse] Comment was not terminated\n")
+ return doc, .Unclosed_Comment
+ }
+
+ if string(t.src[t.offset - 1:][:2]) == "--" {
+ if peek_byte(t) == '>' {
+ break
+ } else {
+ error(t, t.offset - 1, "Invalid -- sequence in comment.\n")
+ return doc, .Invalid_Sequence_In_Comment
+ }
+ }
+ }
+
+ if .Intern_Comments in opts.flags {
+ el := new(Element)
+
+ el.parent = element
+ el.kind = .Comment
+ el.value = strings.intern_get(&doc.intern, string(t.src[offset : t.offset - 1]))
+ append(&element.children, el)
+ }
+
+ expect(t, .Dash)
+ expect(t, .Gt)
+
+ case:
+ error(t, t.offset, "Invalid Token after <!. Expected .Ident, got %#v\n", next)
+ return
+ }
+
+ case .Ident:
+ /*
+ e.g. <odin - Start of new element.
+ */
+ element = new(Element)
+
+ if doc.root == nil {
+ /*
+ First element.
+ */
+ doc.root = element
+ parent = element
+ } else {
+ append(&parent.children, element)
+ }
+
+ element.parent = parent
+ element.ident = strings.intern_get(&doc.intern, open.text)
+
+ parse_attributes(doc, &element.attribs) or_return
+
+ /*
+ If a DOCTYPE is present _or_ the caller
+ asked for a specific DOCTYPE and the DOCTYPE
+ and root tag don't match, we return .Invalid_Root_Tag.
+ */
+ if element == doc.root {
+ if len(expected_doctype) > 0 && expected_doctype != open.text {
+ error(t, t.offset, "Root Tag doesn't match DOCTYPE. Expected: %v, got: %v\n", expected_doctype, open.text)
+ return doc, .Invalid_DocType
+ }
+ }
+
+ /*
+ One of these should follow:
+ - `>`, which means we've just opened this tag and expect a later element to close it.
+ - `/>`, which means this is an 'empty' or self-closing tag.
+ */
+ end_token := scan(t)
+
+ #partial switch end_token.kind {
+ case .Gt:
+ /*
+ We're now the new parent.
+ */
+ parent = element
+
+ case .Slash:
+ /*
+ Empty tag?
+ */
+ expect(t, .Gt) or_return
+
+ case:
+ error(t, t.offset, "Expected close tag, got: %#v\n", end_token)
+ return
+ }
+
+ case .Slash:
+ /*
+ Close tag.
+ */
+ ident := expect(t, .Ident) or_return
+ _ = expect(t, .Gt) or_return
+
+ if element.ident != ident.text {
+ error(t, t.offset, "Mismatched Closing Tag: %v\n", ident.text)
+ return doc, .Mismatched_Closing_Tag
+ }
+ parent = element.parent
+ element = parent
+
+ case:
+ error(t, t.offset, "Invalid Token after <: %#v\n", open)
+ return
+ }
+
+ case .EOF:
+ break loop
+
+ case:
+ /*
+ This should be a tag's body text.
+ */
+ element.value = scan_string(t, tok.pos.offset) or_return
+ }
+ }
+
+ if .Must_Have_Prolog in opts.flags && len(doc.prolog) == 0 {
+ return doc, .No_Prolog
+ }
+
+ if .Must_Have_DocType in opts.flags && len(doc.doctype.ident) == 0 {
+ return doc, .No_DocType
+ }
+
+ return doc, .None
+}
+
+parse_from_file :: proc(filename: string, options := DEFAULT_Options, error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) {
+ context.allocator = allocator
+
+ data, data_ok := os.read_entire_file(filename)
+ defer delete(data)
+
+ if !data_ok { return {}, .File_Error }
+
+ return parse_from_slice(data, options, filename, error_handler, allocator)
+}
+
+parse :: proc { parse_from_file, parse_from_slice }
+
+free_element :: proc(element: ^Element) {
+ if element == nil { return }
+
+ for child in element.children {
+ /*
+ NOTE: Recursive.
+
+ Could be rewritten so it adds them to a list of pointers to free.
+ */
+ free_element(child)
+ }
+ delete(element.attribs)
+ delete(element.children)
+ free(element)
+}
+
+destroy :: proc(doc: ^Document) {
+ if doc == nil { return }
+
+ free_element(doc.root)
+ strings.intern_destroy(&doc.intern)
+
+ delete(doc.prolog)
+ free(doc)
+}
+
+/*
+ Helpers.
+*/
+
+validate_options :: proc(options: Options) -> (validated: Options, err: Error) {
+ validated = options
+
+ if .Error_on_Unsupported in validated.flags && .Ignore_Unsupported in validated.flags {
+ return options, .Conflicting_Options
+ }
+
+ if .Unbox_CDATA in validated.flags {
+ return options, .Unhandled_CDATA_Unboxing
+ }
+
+ if .Decode_SGML_Entities in validated.flags {
+ return options, .Unhandled_SGML_Entity_Decoding
+ }
+
+ return validated, .None
+}
+
+expect :: proc(t: ^Tokenizer, kind: Token_Kind) -> (tok: Token, err: Error) {
+ tok = scan(t)
+ if tok.kind == kind { return tok, .None }
+
+ error(t, t.offset, "Expected \"%v\", got \"%v\".", kind, tok.kind)
+ return tok, .Unexpected_Token
+}
+
+parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error) {
+ assert(doc != nil)
+ context.allocator = doc.allocator
+ t := doc.tokenizer
+
+ key := expect(t, .Ident) or_return
+ offset = t.offset - len(key.text)
+
+ _ = expect(t, .Eq) or_return
+ value := expect(t, .String) or_return
+
+ attr.key = strings.intern_get(&doc.intern, key.text)
+ attr.val = strings.intern_get(&doc.intern, value.text)
+
+ err = .None
+ return
+}
+
+check_duplicate_attributes :: proc(t: ^Tokenizer, attribs: Attributes, attr: Attr, offset: int) -> (err: Error) {
+ for a in attribs {
+ if attr.key == a.key {
+ error(t, offset, "Duplicate attribute: %v\n", attr.key)
+ return .Duplicate_Attribute
+ }
+ }
+ return .None
+}
+
+parse_attributes :: proc(doc: ^Document, attribs: ^Attributes) -> (err: Error) {
+ assert(doc != nil)
+ context.allocator = doc.allocator
+ t := doc.tokenizer
+
+ for peek(t).kind == .Ident {
+ attr, offset := parse_attribute(doc) or_return
+ check_duplicate_attributes(t, attribs^, attr, offset) or_return
+ append(attribs, attr)
+ }
+ skip_whitespace(t)
+ return .None
+}
+
+parse_prolog :: proc(doc: ^Document) -> (err: Error) {
+ assert(doc != nil)
+ context.allocator = doc.allocator
+ t := doc.tokenizer
+
+ offset := t.offset
+ parse_attributes(doc, &doc.prolog) or_return
+
+ for attr in doc.prolog {
+ switch attr.key {
+ case "version":
+ switch attr.val {
+ case "1.0", "1.1":
+ case:
+ error(t, offset, "[parse_prolog] Warning: Unhandled XML version: %v\n", attr.val)
+ }
+
+ case "encoding":
+ switch strings.to_lower(attr.val, context.temp_allocator) {
+ case "utf-8", "utf8":
+ doc.encoding = .UTF_8
+
+ case "latin-1", "latin1", "iso-8859-1":
+ doc.encoding = .LATIN_1
+
+ case:
+ /*
+ Unrecognized encoding, assume UTF-8.
+ */
+ error(t, offset, "[parse_prolog] Warning: Unrecognized encoding: %v\n", attr.val)
+ }
+
+ case:
+ // Ignored.
+ }
+ }
+
+ _ = expect(t, .Question) or_return
+ _ = expect(t, .Gt) or_return
+
+ return .None
+}
+
+skip_element :: proc(t: ^Tokenizer) -> (err: Error) {
+ close := 1
+
+ loop: for {
+ tok := scan(t)
+ #partial switch tok.kind {
+ case .EOF:
+ error(t, t.offset, "[skip_element] Premature EOF\n")
+ return .Premature_EOF
+
+ case .Lt:
+ close += 1
+
+ case .Gt:
+ close -= 1
+ if close == 0 {
+ break loop
+ }
+
+ case:
+
+ }
+ }
+ return .None
+}
+
+parse_doctype :: proc(doc: ^Document) -> (err: Error) {
+ /*
+ <!DOCTYPE greeting SYSTEM "hello.dtd">
+
+ <!DOCTYPE greeting [
+ <!ELEMENT greeting (#PCDATA)>
+ ]>
+ */
+ assert(doc != nil)
+ context.allocator = doc.allocator
+ t := doc.tokenizer
+
+ tok := expect(t, .Ident) or_return
+ doc.doctype.ident = strings.intern_get(&doc.intern, tok.text)
+
+ skip_whitespace(t)
+ offset := t.offset
+ skip_element(t) or_return
+
+ /*
+ -1 because the current offset is that of the closing tag, so the rest of the DOCTYPE tag ends just before it.
+ */
+ doc.doctype.rest = strings.intern_get(&doc.intern, string(t.src[offset : t.offset - 1]))
+ return .None
+} \ No newline at end of file
diff --git a/tests/core/Makefile b/tests/core/Makefile
index 0f0ffe4d6..e17dede90 100644
--- a/tests/core/Makefile
+++ b/tests/core/Makefile
@@ -1,22 +1,29 @@
ODIN=../../odin
PYTHON=$(shell which python3)
-all: download_test_assets image_test compress_test strings_test hash_test crypto_test
+all: download_test_assets image_test compress_test strings_test hash_test crypto_test encoding_test
download_test_assets:
$(PYTHON) download_assets.py
image_test:
- $(ODIN) run image/test_core_image.odin
+ $(ODIN) run image/test_core_image.odin -out=test_image -o:speed -no-bounds-check
compress_test:
- $(ODIN) run compress/test_core_compress.odin
+ $(ODIN) run compress/test_core_compress.odin -out=test_compress -o:speed -no-bounds-check
strings_test:
- $(ODIN) run strings/test_core_strings.odin
+ $(ODIN) run strings/test_core_strings.odin -out=test_strings -o:speed -no-bounds-check
+
+odin_test:
+ $(ODIN) run odin -out=test_odin -o:speed -no-bounds-check
hash_test:
$(ODIN) run hash -out=test_hash -o:speed -no-bounds-check
crypto_test:
- $(ODIN) run crypto -out=crypto_hash -o:speed -no-bounds-check \ No newline at end of file
+ $(ODIN) run crypto -out=crypto_hash -o:speed -no-bounds-check
+
+encoding_test:
+ $(ODIN) run encoding/json -out=test_encoding_json -o:speed -no-bounds-check
+ $(ODIN) run encoding/xml -out=test_encoding_xml -o:speed -no-bounds-check
diff --git a/tests/core/assets/xml/nl_NL-qt-ts.ts b/tests/core/assets/xml/nl_NL-qt-ts.ts
new file mode 100644
index 000000000..6ec3f2f47
--- /dev/null
+++ b/tests/core/assets/xml/nl_NL-qt-ts.ts
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE TS>
+<TS version="2.1" language="nl" sourcelanguage="en">
+<context>
+ <name>Page</name>
+ <message>
+ <source>Text for translation</source>
+ <comment>commenting</comment>
+ <translation type="obsolete">Tekst om te vertalen</translation>
+ </message>
+ <message>
+ <source>Also text to translate</source>
+ <extracomment>some text</extracomment>
+ <translation>Ook tekst om te vertalen</translation>
+ </message>
+</context>
+<context>
+ <name>installscript</name>
+ <message>
+ <source>99 bottles of beer on the wall</source>
+ <oldcomment>some new comments here</oldcomment>
+ <translation>99 flessen bier op de muur</translation>
+ </message>
+</context>
+<context>
+ <name>apple_count</name>
+ <message numerus="yes">
+ <source>%d apple(s)</source>
+ <translation>
+ <numerusform>%d appel</numerusform>
+ <numerusform>%d appels</numerusform>
+ </translation>
+ </message>
+ </context>
+</TS>
diff --git a/tests/core/assets/xml/nl_NL-xliff-1.0.xliff b/tests/core/assets/xml/nl_NL-xliff-1.0.xliff
new file mode 100644
index 000000000..7a1abcd66
--- /dev/null
+++ b/tests/core/assets/xml/nl_NL-xliff-1.0.xliff
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xliff version="1.2" xmlns="urn:oasis:names:tc:xliff:document:1.2">
+ <file id="42" original="Foozle.xml" source-language="en" target-language="nl-NL" datatype="plaintext">
+ <body>
+ <trans-unit id="874396" maxwidth="20" size-unit="char">
+ <source>text</source>
+ <target state="translated">tekst</target>
+ <note>Context</note>
+ </trans-unit>
+ <trans-unit id="874397" approved="yes">
+ <source>text 1</source>
+ <target state="translated">tekst 1</target>
+ <note>Context 1</note>
+ </trans-unit>
+ <trans-unit id="874398">
+ <source>text 2</source>
+ <target state="needs-translation"/>
+ <context context-type="context">Context of the segment 2</context>
+ </trans-unit>
+ <trans-unit id="874399" translate="no">
+ <source>text 3</source>
+ <target state="final">translation 3</target>
+ <note>Context 3</note>
+ </trans-unit>
+ <group restype="x-gettext-plurals">
+ <note>Plurals</note>
+ <trans-unit id="14343743[0]">
+ <source>%d month</source>
+ <target xml:lang="nl" state="translated">%d maand</target>
+ </trans-unit>
+ <trans-unit id="14343743[1]">
+ <source>%d months</source>
+ <target xml:lang="nl" state="translated">%d maanden</target>
+ </trans-unit>
+ </group>
+ </body>
+ </file>
+</xliff>
diff --git a/tests/core/assets/xml/nl_NL-xliff-2.0.xliff b/tests/core/assets/xml/nl_NL-xliff-2.0.xliff
new file mode 100644
index 000000000..611ac80c4
--- /dev/null
+++ b/tests/core/assets/xml/nl_NL-xliff-2.0.xliff
@@ -0,0 +1,52 @@
+<?xml version="1.0" encoding="utf-8"?>
+<xliff xmlns="urn:oasis:names:tc:xliff:document:2.0" version="2.0" srcLang="en" trgLang="nl">
+ <file id="f1">
+ <notes>
+ <note id="n1">Note for file</note>
+ </notes>
+ <unit id="u1">
+ <notes>
+ <note id="n1">Note for unit</note>
+ </notes>
+ <segment id="s1" state="initial">
+ <source>text</source>
+ <target></target>
+ </segment>
+ </unit>
+ <unit id="u2">
+ <notes>
+ <note id="n2">Note for unit 2</note>
+ </notes>
+ <segment id="s2" state="translated">
+ <source>text 2</source>
+ <target>translation 2</target>
+ </segment>
+ </unit>
+ <unit id="u3">
+ <notes>
+ <note id="n3">Note for unit 3</note>
+ </notes>
+ <segment id="s3" state="final">
+ <source>text 3</source>
+ <target>approved translation 3</target>
+ </segment>
+ </unit>
+ <group id="90290" type="x-gettext:plurals">
+ <unit id="90291" name="90290[0]">
+ <notes>
+ <note category="context">Plurals</note>
+ </notes>
+ <segment>
+ <source>%d month</source>
+ <target xml:lang="nl">%d maand</target>
+ </segment>
+ </unit>
+ <unit id="90292" name="90290[1]">
+ <segment>
+ <source>%d months</source>
+ <target xml:lang="nl">%d maanden</target>
+ </segment>
+ </unit>
+ </group>
+ </file>
+</xliff> \ No newline at end of file
diff --git a/tests/core/assets/xml/utf8.xml b/tests/core/assets/xml/utf8.xml
new file mode 100644
index 000000000..c9ed3bf69
--- /dev/null
+++ b/tests/core/assets/xml/utf8.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE 恥ずべきフクロウ>
+<恥ずべきフクロウ 올빼미_id="Foozle&#32;<![CDATA[<greeting>Hello, world!"</greeting>]]>Barzle">
+<부끄러운:barzle>
+ <name foo:bar="birmese">ရှက်စရာ ဇီးကွက်</name>
+ <nickname>Owl of Shame</nickname>
+ <data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] < ]]> Nonsense.</data>
+</부끄러운:barzle> \ No newline at end of file
diff --git a/tests/core/build.bat b/tests/core/build.bat
index 176b7f175..7a214acc9 100644
--- a/tests/core/build.bat
+++ b/tests/core/build.bat
@@ -5,34 +5,35 @@ python3 download_assets.py
echo ---
echo Running core:image tests
echo ---
-%PATH_TO_ODIN% run image %COMMON%
+%PATH_TO_ODIN% run image %COMMON% -out:test_image.exe
echo ---
echo Running core:compress tests
echo ---
-%PATH_TO_ODIN% run compress %COMMON%
+%PATH_TO_ODIN% run compress %COMMON% -out:test_compress.exe
echo ---
echo Running core:strings tests
echo ---
-%PATH_TO_ODIN% run strings %COMMON%
+%PATH_TO_ODIN% run strings %COMMON% -out:test_strings.exe
echo ---
echo Running core:hash tests
echo ---
-%PATH_TO_ODIN% run hash %COMMON% -o:size
+%PATH_TO_ODIN% run hash %COMMON% -o:size -out:test_hash.exe
echo ---
echo Running core:odin tests
echo ---
-%PATH_TO_ODIN% run odin %COMMON% -o:size
+%PATH_TO_ODIN% run odin %COMMON% -o:size -out:test_odin.exe
echo ---
echo Running core:crypto hash tests
echo ---
-%PATH_TO_ODIN% run crypto %COMMON%
+%PATH_TO_ODIN% run crypto %COMMON% -o:speed -out:test_crypto.exe
echo ---
echo Running core:encoding tests
echo ---
-%PATH_TO_ODIN% run encoding %COMMON% \ No newline at end of file
+%PATH_TO_ODIN% run encoding\json %COMMON% -out:test_json.exe
+%PATH_TO_ODIN% run encoding\xml %COMMON% -out:test_xml.exe \ No newline at end of file
diff --git a/tests/core/encoding/test_core_json.odin b/tests/core/encoding/json/test_core_json.odin
index f536eb4c6..4f415c008 100644
--- a/tests/core/encoding/test_core_json.odin
+++ b/tests/core/encoding/json/test_core_json.odin
@@ -8,32 +8,32 @@ TEST_count := 0
TEST_fail := 0
when ODIN_TEST {
- expect :: testing.expect
- log :: testing.log
+ expect :: testing.expect
+ log :: testing.log
} else {
- expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
- fmt.printf("[%v] ", loc)
- TEST_count += 1
- if !condition {
- TEST_fail += 1
- fmt.println(message)
- return
- }
- fmt.println(" PASS")
- }
- log :: proc(t: ^testing.T, v: any, loc := #caller_location) {
- fmt.printf("[%v] ", loc)
- fmt.printf("log: %v\n", v)
- }
+ expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
+ fmt.printf("[%v] ", loc)
+ TEST_count += 1
+ if !condition {
+ TEST_fail += 1
+ fmt.println(message)
+ return
+ }
+ fmt.println(" PASS")
+ }
+ log :: proc(t: ^testing.T, v: any, loc := #caller_location) {
+ fmt.printf("[%v] ", loc)
+ fmt.printf("log: %v\n", v)
+ }
}
main :: proc() {
- t := testing.T{}
+ t := testing.T{}
parse_json(&t)
marshal_json(&t)
- fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
+ fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
}
@test
diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin
new file mode 100644
index 000000000..7eefac212
--- /dev/null
+++ b/tests/core/encoding/xml/test_core_xml.odin
@@ -0,0 +1,264 @@
+package test_core_xml
+
+import "core:encoding/xml"
+import "core:testing"
+import "core:mem"
+import "core:fmt"
+
+Silent :: proc(pos: xml.Pos, fmt: string, args: ..any) {
+ // Custom (silent) error handler.
+}
+
+OPTIONS :: xml.Options{
+ flags = {
+ .Ignore_Unsupported, .Intern_Comments,
+ },
+ expected_doctype = "",
+}
+
+TEST_count := 0
+TEST_fail := 0
+
+TEST :: struct {
+ filename: string,
+ options: xml.Options,
+ expected: struct {
+ error: xml.Error,
+ xml_version: string,
+ xml_encoding: string,
+ doctype: string,
+ },
+}
+
+TESTS :: []TEST{
+ /*
+ First we test that certain files parse without error.
+ */
+ {
+ filename = "assets/xml/utf8.xml",
+ options = OPTIONS,
+ expected = {
+ error = .None,
+ xml_version = "1.0",
+ xml_encoding = "utf-8",
+ doctype = "恥ずべきフクロウ",
+ },
+ },
+ {
+ filename = "assets/xml/nl_NL-qt-ts.ts",
+ options = OPTIONS,
+ expected = {
+ error = .None,
+ xml_version = "1.0",
+ xml_encoding = "utf-8",
+ doctype = "TS",
+ },
+ },
+ {
+ filename = "assets/xml/nl_NL-xliff-1.0.xliff",
+ options = OPTIONS,
+ expected = {
+ error = .None,
+ xml_version = "1.0",
+ xml_encoding = "UTF-8",
+ doctype = "",
+ },
+ },
+ {
+ filename = "assets/xml/nl_NL-xliff-2.0.xliff",
+ options = OPTIONS,
+ expected = {
+ error = .None,
+ xml_version = "1.0",
+ xml_encoding = "utf-8",
+ doctype = "",
+ },
+ },
+
+ /*
+ Then we test that certain errors are returned as expected.
+ */
+ {
+ filename = "assets/xml/utf8.xml",
+ options = {
+ flags = {
+ .Ignore_Unsupported, .Intern_Comments,
+ },
+ expected_doctype = "Odin",
+ },
+ expected = {
+ error = .Invalid_DocType,
+ xml_version = "1.0",
+ xml_encoding = "utf-8",
+ doctype = "恥ずべきフクロウ",
+ },
+ },
+}
+
+when ODIN_TEST {
+ expect :: testing.expect
+ log :: testing.log
+} else {
+ expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
+ fmt.printf("[%v] ", loc)
+ TEST_count += 1
+ if !condition {
+ TEST_fail += 1
+ fmt.println(message)
+ return
+ }
+ fmt.println(" PASS")
+ }
+ log :: proc(t: ^testing.T, v: any, loc := #caller_location) {
+ fmt.printf("[%v] ", loc)
+ fmt.printf("log: %v\n", v)
+ }
+}
+
+main :: proc() {
+ t := testing.T{}
+
+ track: mem.Tracking_Allocator
+ mem.tracking_allocator_init(&track, context.allocator)
+ context.allocator = mem.tracking_allocator(&track)
+
+ run_tests(&t)
+
+ if len(track.allocation_map) > 0 {
+ for _, v in track.allocation_map {
+ err_msg := fmt.tprintf("%v Leaked %v bytes.", v.location, v.size)
+ expect(&t, false, err_msg)
+ }
+ }
+
+ fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
+}
+
+@test
+run_tests :: proc(t: ^testing.T) {
+ using fmt
+
+ count := 0
+
+ for test in TESTS {
+ printf("Trying to parse %v\n\n", test.filename)
+
+ doc, err := xml.parse(test.filename, test.options, Silent)
+ defer xml.destroy(doc)
+
+ err_msg := tprintf("Expected return value %v, got %v", test.expected.error, err)
+ expect(t, err == test.expected.error, err_msg)
+
+ if len(test.expected.xml_version) > 0 {
+ xml_version := ""
+ for attr in doc.prolog {
+ if attr.key == "version" {
+ xml_version = attr.val
+ }
+ }
+
+ err_msg = tprintf("Expected XML version %v, got %v", test.expected.xml_version, xml_version)
+ expect(t, xml_version == test.expected.xml_version, err_msg)
+ }
+
+ if len(test.expected.xml_encoding) > 0 {
+ xml_encoding := ""
+ for attr in doc.prolog {
+ if attr.key == "encoding" {
+ xml_encoding = attr.val
+ }
+ }
+
+ err_msg = tprintf("Expected XML encoding %v, got %v", test.expected.xml_encoding, xml_encoding)
+ expect(t, xml_encoding == test.expected.xml_encoding, err_msg)
+ }
+
+ err_msg = tprintf("Expected DOCTYPE %v, got %v", test.expected.doctype, doc.doctype.ident)
+ expect(t, doc.doctype.ident == test.expected.doctype, err_msg)
+
+ /*
+ File-specific tests.
+ */
+ switch count {
+ case 0:
+ expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
+ attr := doc.root.attribs[0]
+
+ attr_key_expected := "올빼미_id"
+ attr_val_expected := "Foozle&#32;<![CDATA[<greeting>Hello, world!\"</greeting>]]>Barzle"
+
+ attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
+ expect(t, attr.key == attr_key_expected, attr_err)
+
+ attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
+ expect(t, attr.val == attr_val_expected, attr_err)
+
+ expect(t, len(doc.root.children) > 0, "Expected the root tag to have children.")
+ child := doc.root.children[0]
+
+ first_child_ident := "부끄러운:barzle"
+ attr_err = tprintf("Expected first child tag's ident to be %v, got %v", first_child_ident, child.ident)
+ expect(t, child.ident == first_child_ident, attr_err)
+
+ case 2:
+ expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
+
+ {
+ attr := doc.root.attribs[0]
+
+ attr_key_expected := "version"
+ attr_val_expected := "1.2"
+
+ attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
+ expect(t, attr.key == attr_key_expected, attr_err)
+
+ attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
+ expect(t, attr.val == attr_val_expected, attr_err)
+ }
+
+ {
+ attr := doc.root.attribs[1]
+
+ attr_key_expected := "xmlns"
+ attr_val_expected := "urn:oasis:names:tc:xliff:document:1.2"
+
+ attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
+ expect(t, attr.key == attr_key_expected, attr_err)
+
+ attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
+ expect(t, attr.val == attr_val_expected, attr_err)
+ }
+
+ case 3:
+ expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
+
+ {
+ attr := doc.root.attribs[0]
+
+ attr_key_expected := "xmlns"
+ attr_val_expected := "urn:oasis:names:tc:xliff:document:2.0"
+
+ attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
+ expect(t, attr.key == attr_key_expected, attr_err)
+
+ attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
+ expect(t, attr.val == attr_val_expected, attr_err)
+ }
+
+ {
+ attr := doc.root.attribs[1]
+
+ attr_key_expected := "version"
+ attr_val_expected := "2.0"
+
+ attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
+ expect(t, attr.key == attr_key_expected, attr_err)
+
+ attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
+ expect(t, attr.val == attr_val_expected, attr_err)
+ }
+ }
+
+ count += 1
+ }
+} \ No newline at end of file