diff options
| author | Franz Höltermann <Francis_the_cat@gmx.de> | 2024-06-14 17:43:09 +0200 |
|---|---|---|
| committer | Franz Höltermann <Francis_the_cat@gmx.de> | 2024-06-14 17:43:09 +0200 |
| commit | c3302615a3f03d2d8556b8a7ba7df01ac6448bff (patch) | |
| tree | 6cdf05bc057ed93a7e66d6a2467b63f048eeecff /core/encoding/xml/tokenizer.odin | |
| parent | 3c3f0f90c2c8d063845cf0ecb61a23705749e445 (diff) | |
| parent | cd5fa8523f79ce981e5047dad5b66155f493d169 (diff) | |
Merge branch 'master' of https://github.com/FrancisTheCat/Odin
Diffstat (limited to 'core/encoding/xml/tokenizer.odin')
| -rw-r--r-- | core/encoding/xml/tokenizer.odin | 43 |
1 files changed, 11 insertions, 32 deletions
diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin index 0f87c366b..2d06038b7 100644 --- a/core/encoding/xml/tokenizer.odin +++ b/core/encoding/xml/tokenizer.odin @@ -218,9 +218,7 @@ scan_identifier :: proc(t: ^Tokenizer) -> string { for is_valid_identifier_rune(t.ch) { advance_rune(t) if t.ch == ':' { - /* - A namespaced attr can have at most two parts, `namespace:ident`. - */ + // A namespaced attr can have at most two parts, `namespace:ident`. if namespaced { break } @@ -268,14 +266,10 @@ scan_comment :: proc(t: ^Tokenizer) -> (comment: string, err: Error) { return string(t.src[offset : t.offset - 1]), .None } -/* - Skip CDATA -*/ +// Skip CDATA skip_cdata :: proc(t: ^Tokenizer) -> (err: Error) { if t.read_offset + len(CDATA_START) >= len(t.src) { - /* - Can't be the start of a CDATA tag. - */ + // Can't be the start of a CDATA tag. return .None } @@ -290,9 +284,7 @@ skip_cdata :: proc(t: ^Tokenizer) -> (err: Error) { return .Premature_EOF } - /* - Scan until the end of a CDATA tag. - */ + // Scan until the end of a CDATA tag. if t.read_offset + len(CDATA_END) < len(t.src) { if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END { t.read_offset += len(CDATA_END) @@ -319,14 +311,10 @@ scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close case '<': if peek_byte(t) == '!' { if peek_byte(t, 1) == '[' { - /* - Might be the start of a CDATA tag. - */ + // Might be the start of a CDATA tag. skip_cdata(t) or_return } else if peek_byte(t, 1) == '-' && peek_byte(t, 2) == '-' { - /* - Comment start. Eat comment. - */ + // Comment start. Eat comment. t.read_offset += 3 _ = scan_comment(t) or_return } @@ -342,17 +330,13 @@ scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close } if t.ch == close { - /* - If it's not a CDATA or comment, it's the end of this body. - */ + // If it's not a CDATA or comment, it's the end of this body. break loop } advance_rune(t) } - /* - Strip trailing whitespace. - */ + // Strip trailing whitespace. lit := string(t.src[offset : t.offset]) end := len(lit) @@ -369,11 +353,6 @@ scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close if consume_close { advance_rune(t) } - - /* - TODO: Handle decoding escape characters and unboxing CDATA. - */ - return lit, err } @@ -384,7 +363,7 @@ peek :: proc(t: ^Tokenizer) -> (token: Token) { return token } -scan :: proc(t: ^Tokenizer) -> Token { +scan :: proc(t: ^Tokenizer, multiline_string := false) -> Token { skip_whitespace(t) offset := t.offset @@ -418,7 +397,7 @@ scan :: proc(t: ^Tokenizer) -> Token { case '"', '\'': kind = .Invalid - lit, err = scan_string(t, t.offset, ch, true, false) + lit, err = scan_string(t, t.offset, ch, true, multiline_string) if err == .None { kind = .String } @@ -435,4 +414,4 @@ scan :: proc(t: ^Tokenizer) -> Token { lit = string(t.src[offset : t.offset]) } return Token{kind, lit, pos} -} +}
\ No newline at end of file |