From 32eab04d662b0c1128e64a4b91fb81f5f2be5a95 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Wed, 1 Dec 2021 03:15:44 +0100 Subject: [xml] Allow multi-line bodies w/o CDATA. Strip trailing whitespace. --- core/encoding/xml/debug_print.odin | 9 ++++++--- core/encoding/xml/tokenizer.odin | 21 ++++++++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) (limited to 'core/encoding/xml') diff --git a/core/encoding/xml/debug_print.odin b/core/encoding/xml/debug_print.odin index c4d6875cc..65b71e30b 100644 --- a/core/encoding/xml/debug_print.odin +++ b/core/encoding/xml/debug_print.odin @@ -27,10 +27,13 @@ print :: proc(writer: io.Writer, doc: ^Document) -> (written: int, err: io.Error } written += wprintf(writer, "[Encoding] %v\n", doc.encoding) - written += wprintf(writer, "[DOCTYPE] %v\n", doc.doctype.ident) - if len(doc.doctype.rest) > 0 { - wprintf(writer, "\t%v\n", doc.doctype.rest) + if len(doc.doctype.ident) > 0 { + written += wprintf(writer, "[DOCTYPE] %v\n", doc.doctype.ident) + + if len(doc.doctype.rest) > 0 { + wprintf(writer, "\t%v\n", doc.doctype.rest) + } } if doc.root != nil { diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin index 3dcffb0d6..e453552b8 100644 --- a/core/encoding/xml/tokenizer.odin +++ b/core/encoding/xml/tokenizer.odin @@ -205,7 +205,7 @@ scan_identifier :: proc(t: ^Tokenizer) -> string { return string(t.src[offset : t.offset]) } -scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close := false) -> (value: string, err: Error) { +scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close := false, multiline := true) -> (value: string, err: Error) { err = .None in_cdata := false @@ -238,7 +238,7 @@ scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close } case '\n': - if !in_cdata { + if !(multiline || in_cdata) { error(t, offset, string(t.src[offset : t.offset])) error(t, offset, "[scan_string] Not terminated\n") err = .Invalid_Tag_Value @@ -256,7 +256,22 @@ scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close advance_rune(t) } + /* + Strip trailing whitespace. + */ lit := string(t.src[offset : t.offset]) + + end := len(lit) + eat: for ; end > 0; end -= 1 { + ch := lit[end - 1] + switch ch { + case ' ', '\t', '\r', '\n': + case: + break eat + } + } + lit = lit[:end] + if consume_close { advance_rune(t) } @@ -307,7 +322,7 @@ scan :: proc(t: ^Tokenizer) -> Token { case ':': kind = .Colon case '"', '\'': - lit, err = scan_string(t, t.offset, ch, true) + lit, err = scan_string(t, t.offset, ch, true, false) if err == .None { kind = .String } else { -- cgit v1.2.3