diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2025-04-19 20:25:44 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-04-19 20:25:44 +0200 |
| commit | 062a3c2fae3712c60af00798a0815509a732790b (patch) | |
| tree | 9658915735206e27b5a0437b8ec7af5809e38662 /core/encoding/entity | |
| parent | bc86b503922781091ec3ae54c722bd8ff33c7205 (diff) | |
Fix parsing of CDATA tags (#5059)
Fixes #5054
Diffstat (limited to 'core/encoding/entity')
| -rw-r--r-- | core/encoding/entity/entity.odin | 54 |
1 files changed, 26 insertions, 28 deletions
diff --git a/core/encoding/entity/entity.odin b/core/encoding/entity/entity.odin index d2f1d46b2..cb8fa8611 100644 --- a/core/encoding/entity/entity.odin +++ b/core/encoding/entity/entity.odin @@ -108,7 +108,7 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator := it couldn't have been part of an XML tag body to be decoded here. Keep in mind that we could already *be* inside a CDATA tag. - If so, write `>` as a literal and continue. + If so, write `<` as a literal and continue. */ if in_data { write_rune(&builder, '<') @@ -119,11 +119,9 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator := case ']': // If we're unboxing _and_ decoding CDATA, we'll have to check for the end tag. if in_data { - if t.read_offset + len(CDATA_END) < len(t.src) { - if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END { - in_data = false - t.read_offset += len(CDATA_END) - 1 - } + if strings.has_prefix(t.src[t.offset:], CDATA_END) { + in_data = false + t.read_offset += len(CDATA_END) - 1 } continue } else { @@ -297,40 +295,40 @@ _handle_xml_special :: proc(t: ^Tokenizer, builder: ^strings.Builder, options: X assert(t != nil && t.r == '<') if t.read_offset + len(CDATA_START) >= len(t.src) { return false, .None } - if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START { - t.read_offset += len(CDATA_START) - 1 - + s := string(t.src[t.offset:]) + if strings.has_prefix(s, CDATA_START) { if .Unbox_CDATA in options && .Decode_CDATA in options { // We're unboxing _and_ decoding CDATA + t.read_offset += len(CDATA_START) - 1 return true, .None } - // CDATA is passed through. - offset := t.offset - - // Scan until end of CDATA. + // CDATA is passed through. Scan until end of CDATA. + start_offset := t.offset + t.read_offset += len(CDATA_START) for { - advance(t) or_return - if t.r < 0 { return true, .CDATA_Not_Terminated } - - if t.read_offset + len(CDATA_END) < len(t.src) { - if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END { - t.read_offset += len(CDATA_END) - 1 + advance(t) + if t.r < 0 { + // error(t, offset, "[scan_string] CDATA was not terminated\n") + return true, .CDATA_Not_Terminated + } - cdata := string(t.src[offset : t.read_offset]) - - if .Unbox_CDATA in options { - cdata = cdata[len(CDATA_START):] - cdata = cdata[:len(cdata) - len(CDATA_END)] - } + // Scan until the end of a CDATA tag. + if s = string(t.src[t.read_offset:]); strings.has_prefix(s, CDATA_END) { + t.read_offset += len(CDATA_END) + cdata := string(t.src[start_offset:t.read_offset]) - write_string(builder, cdata) - return false, .None + if .Unbox_CDATA in options { + cdata = cdata[len(CDATA_START):] + cdata = cdata[:len(cdata) - len(CDATA_END)] } + write_string(builder, cdata) + return false, .None } } - } else if string(t.src[t.offset:][:len(COMMENT_START)]) == COMMENT_START { + + } else if strings.has_prefix(s, COMMENT_START) { t.read_offset += len(COMMENT_START) // Comment is passed through by default. offset := t.offset |