aboutsummaryrefslogtreecommitdiff
path: root/core/encoding/xml/tokenizer.odin
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2024-06-12 12:52:48 +0200
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2024-06-12 12:52:48 +0200
commitebadff555d70d5ef8faf91785696e5c5ea3605f8 (patch)
treed50d29bf5e53ca855bc5a2109e814673363c6ce3 /core/encoding/xml/tokenizer.odin
parente87c5bca58e00b8edaf24a25627b36351e92dc16 (diff)
Update XML reader to normalize whitespace, part 1.
Diffstat (limited to 'core/encoding/xml/tokenizer.odin')
-rw-r--r--core/encoding/xml/tokenizer.odin43
1 files changed, 11 insertions, 32 deletions
diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin
index 0f87c366b..2d06038b7 100644
--- a/core/encoding/xml/tokenizer.odin
+++ b/core/encoding/xml/tokenizer.odin
@@ -218,9 +218,7 @@ scan_identifier :: proc(t: ^Tokenizer) -> string {
for is_valid_identifier_rune(t.ch) {
advance_rune(t)
if t.ch == ':' {
- /*
- A namespaced attr can have at most two parts, `namespace:ident`.
- */
+ // A namespaced attr can have at most two parts, `namespace:ident`.
if namespaced {
break
}
@@ -268,14 +266,10 @@ scan_comment :: proc(t: ^Tokenizer) -> (comment: string, err: Error) {
return string(t.src[offset : t.offset - 1]), .None
}
-/*
- Skip CDATA
-*/
+// Skip CDATA
skip_cdata :: proc(t: ^Tokenizer) -> (err: Error) {
if t.read_offset + len(CDATA_START) >= len(t.src) {
- /*
- Can't be the start of a CDATA tag.
- */
+ // Can't be the start of a CDATA tag.
return .None
}
@@ -290,9 +284,7 @@ skip_cdata :: proc(t: ^Tokenizer) -> (err: Error) {
return .Premature_EOF
}
- /*
- Scan until the end of a CDATA tag.
- */
+ // Scan until the end of a CDATA tag.
if t.read_offset + len(CDATA_END) < len(t.src) {
if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END {
t.read_offset += len(CDATA_END)
@@ -319,14 +311,10 @@ scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close
case '<':
if peek_byte(t) == '!' {
if peek_byte(t, 1) == '[' {
- /*
- Might be the start of a CDATA tag.
- */
+ // Might be the start of a CDATA tag.
skip_cdata(t) or_return
} else if peek_byte(t, 1) == '-' && peek_byte(t, 2) == '-' {
- /*
- Comment start. Eat comment.
- */
+ // Comment start. Eat comment.
t.read_offset += 3
_ = scan_comment(t) or_return
}
@@ -342,17 +330,13 @@ scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close
}
if t.ch == close {
- /*
- If it's not a CDATA or comment, it's the end of this body.
- */
+ // If it's not a CDATA or comment, it's the end of this body.
break loop
}
advance_rune(t)
}
- /*
- Strip trailing whitespace.
- */
+ // Strip trailing whitespace.
lit := string(t.src[offset : t.offset])
end := len(lit)
@@ -369,11 +353,6 @@ scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close
if consume_close {
advance_rune(t)
}
-
- /*
- TODO: Handle decoding escape characters and unboxing CDATA.
- */
-
return lit, err
}
@@ -384,7 +363,7 @@ peek :: proc(t: ^Tokenizer) -> (token: Token) {
return token
}
-scan :: proc(t: ^Tokenizer) -> Token {
+scan :: proc(t: ^Tokenizer, multiline_string := false) -> Token {
skip_whitespace(t)
offset := t.offset
@@ -418,7 +397,7 @@ scan :: proc(t: ^Tokenizer) -> Token {
case '"', '\'':
kind = .Invalid
- lit, err = scan_string(t, t.offset, ch, true, false)
+ lit, err = scan_string(t, t.offset, ch, true, multiline_string)
if err == .None {
kind = .String
}
@@ -435,4 +414,4 @@ scan :: proc(t: ^Tokenizer) -> Token {
lit = string(t.src[offset : t.offset])
}
return Token{kind, lit, pos}
-}
+} \ No newline at end of file