diff options
Diffstat (limited to 'core/encoding')
| -rw-r--r-- | core/encoding/xml/debug_print.odin | 4 | ||||
| -rw-r--r-- | core/encoding/xml/xml_reader.odin | 75 |
2 files changed, 55 insertions, 24 deletions
diff --git a/core/encoding/xml/debug_print.odin b/core/encoding/xml/debug_print.odin index 65b71e30b..e6a8c9433 100644 --- a/core/encoding/xml/debug_print.odin +++ b/core/encoding/xml/debug_print.odin @@ -36,6 +36,10 @@ print :: proc(writer: io.Writer, doc: ^Document) -> (written: int, err: io.Error } } + for comment in doc.comments { + written += wprintf(writer, "[Pre-root comment] %v\n", comment) + } + if doc.root != nil { wprintln(writer, " --- ") print_element(writer, doc.root) diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index 34f6e65d0..b2226e6b9 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -86,11 +86,17 @@ Document :: struct { /* We only scan the <!DOCTYPE IDENT part and skip the rest. */ - ident: string, - rest: string, + ident: string, + rest: string, }, /* + If we encounter comments before the root node, and the option to intern comments is given, this is where they'll live. + Otherwise they'll be in the element tree. + */ + comments: [dynamic]string, + + /* Internal */ tokenizer: ^Tokenizer, @@ -218,6 +224,8 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err err = .Unexpected_Token element, parent: ^Element + tag_is_open := false + /* If a DOCTYPE is present, the root tag has to match. If an expected DOCTYPE is given in options (i.e. it's non-empty), the DOCTYPE (if present) and root tag have to match. @@ -225,10 +233,14 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err expected_doctype := options.expected_doctype loop: for { - tok := scan(t) - #partial switch tok.kind { + skip_whitespace(t) + switch t.ch { + case '<': + /* + Consume peeked `<` + */ + tok := scan(t) - case .Lt: open := scan(t) #partial switch open.kind { @@ -247,8 +259,10 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err */ return doc, .Too_Many_Prologs } else { - error(t, t.offset, "Expected \"<?xml\", got \"<?%v\".", tok.text) - return + /* + Could be `<?xml-stylesheet`, etc. Ignore it. + */ + skip_element(t) or_return } case: error(t, t.offset, "Expected \"<?xml\", got \"<?%v\".", tok.text) @@ -292,10 +306,6 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err Comment: <!-- -->. The grammar does not allow a comment to end in ---> */ - if doc.root == nil { - return doc, .Comment_Before_Root_Element - } - expect(t, .Dash) offset := t.offset @@ -329,12 +339,17 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err } if .Intern_Comments in opts.flags { - el := new(Element) - - el.parent = element - el.kind = .Comment - el.value = strings.intern_get(&doc.intern, string(t.src[offset : t.offset - 1])) - append(&element.children, el) + comment := strings.intern_get(&doc.intern, string(t.src[offset : t.offset - 1])) + + if doc.root == nil { + append(&doc.comments, comment) + } else { + el := new(Element) + el.parent = element + el.kind = .Comment + el.value = comment + append(&element.children, el) + } } expect(t, .Dash) @@ -350,6 +365,7 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err e.g. <odin - Start of new element. */ element = new(Element) + tag_is_open = true if doc.root == nil { /* @@ -384,7 +400,6 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err - `/>`, which means this is an 'empty' or self-closing tag. */ end_token := scan(t) - #partial switch end_token.kind { case .Gt: /* @@ -394,9 +409,12 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err case .Slash: /* - Empty tag? + Empty tag. Close it. */ expect(t, .Gt) or_return + parent = element.parent + element = parent + tag_is_open = false case: error(t, t.offset, "Expected close tag, got: %#v\n", end_token) @@ -411,25 +429,33 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err _ = expect(t, .Gt) or_return if element.ident != ident.text { - error(t, t.offset, "Mismatched Closing Tag: %v\n", ident.text) + error(t, t.offset, "Mismatched Closing Tag. Expected %v, got %v\n", element.ident, ident.text) return doc, .Mismatched_Closing_Tag } - parent = element.parent - element = parent + parent = element.parent + element = parent + tag_is_open = false case: error(t, t.offset, "Invalid Token after <: %#v\n", open) return } - case .EOF: + case -1: + /* + End of file. + */ + if tag_is_open { + return doc, .Premature_EOF + } break loop case: /* This should be a tag's body text. */ - element.value = scan_string(t, tok.pos.offset) or_return + body_text := scan_string(t, t.offset) or_return + element.value = strings.intern_get(&doc.intern, body_text) } } @@ -480,6 +506,7 @@ destroy :: proc(doc: ^Document) { strings.intern_destroy(&doc.intern) delete(doc.prolog) + delete(doc.comments) free(doc) } |