diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-12-02 21:07:40 +0100 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-12-05 02:52:23 +0100 |
| commit | 3d72e80ccf0f382f03a1c9407c4728862c5bca91 (patch) | |
| tree | d5ed66c7b2d2a8fe697eeccb35a0884977143f32 /core/encoding/entity | |
| parent | 2dd67dba89732b89adb0199bc0a99de4cbc34e8f (diff) | |
[xml] Implement optional unboxing of CDATA and decoding of tag values.
Diffstat (limited to 'core/encoding/entity')
| -rw-r--r-- | core/encoding/entity/entity.odin | 39 | ||||
| -rw-r--r-- | core/encoding/entity/example/entity_example.odin | 67 | ||||
| -rw-r--r-- | core/encoding/entity/example/test.html | 2 |
3 files changed, 36 insertions, 72 deletions
diff --git a/core/encoding/entity/entity.odin b/core/encoding/entity/entity.odin index e40896819..8742446e6 100644 --- a/core/encoding/entity/entity.odin +++ b/core/encoding/entity/entity.odin @@ -61,15 +61,21 @@ COMMENT_END :: "-->" */ XML_Decode_Option :: enum u8 { /* + Do not decode & entities. It decodes by default. + If given, overrides `Decode_CDATA`. + */ + No_Entity_Decode, + + /* CDATA is unboxed. */ - CDATA_Unbox, + Unbox_CDATA, /* Unboxed CDATA is decoded as well. - Ignored if `.CDATA_Unbox` is not given. + Ignored if `.Unbox_CDATA` is not given. */ - CDATA_Decode, + Decode_CDATA, /* Comments are stripped. @@ -129,7 +135,7 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator := } case: - if in_data && .CDATA_Decode not_in options { + if in_data && .Decode_CDATA not_in options { /* Unboxed, but undecoded. */ @@ -145,17 +151,20 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator := */ write_string(&builder, entity) } else { - if decoded, ok := xml_decode_entity(entity); ok { - write_rune(&builder, decoded) - } else { - /* - Decode failed. Pass through original. - */ - write_string(&builder, "&") - write_string(&builder, entity) - write_string(&builder, ";") + + if .No_Entity_Decode not_in options { + if decoded, ok := xml_decode_entity(entity); ok { + write_rune(&builder, decoded) + continue + } } + /* + Literal passthrough because the decode failed or we want entities not decoded. + */ + write_string(&builder, "&") + write_string(&builder, entity) + write_string(&builder, ";") } } else { write_rune(&builder, t.r) @@ -290,7 +299,7 @@ _handle_xml_special :: proc(t: ^Tokenizer, builder: ^strings.Builder, options: X if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START { t.read_offset += len(CDATA_START) - 1 - if .CDATA_Unbox in options && .CDATA_Decode in options { + if .Unbox_CDATA in options && .Decode_CDATA in options { /* We're unboxing _and_ decoding CDATA */ @@ -315,7 +324,7 @@ _handle_xml_special :: proc(t: ^Tokenizer, builder: ^strings.Builder, options: X cdata := string(t.src[offset : t.read_offset]) - if .CDATA_Unbox in options { + if .Unbox_CDATA in options { cdata = cdata[len(CDATA_START):] cdata = cdata[:len(cdata) - len(CDATA_END)] } diff --git a/core/encoding/entity/example/entity_example.odin b/core/encoding/entity/example/entity_example.odin index 8758d9ad9..161a44827 100644 --- a/core/encoding/entity/example/entity_example.odin +++ b/core/encoding/entity/example/entity_example.odin @@ -1,19 +1,11 @@ package unicode_entity_example import "core:encoding/xml" -import "core:encoding/entity" import "core:strings" import "core:mem" import "core:fmt" import "core:time" -OPTIONS :: xml.Options{ - flags = { - .Ignore_Unsupported, .Intern_Comments, - }, - expected_doctype = "", -} - doc_print :: proc(doc: ^xml.Document) { buf: strings.Builder defer strings.destroy_builder(&buf) @@ -29,6 +21,13 @@ _entities :: proc() { DOC :: #load("../../../../tests/core/assets/XML/unicode.xml") + OPTIONS :: xml.Options{ + flags = { + .Ignore_Unsupported, .Intern_Comments, + }, + expected_doctype = "", + } + parse_duration: time.Duration { @@ -50,57 +49,11 @@ _entities :: proc() { _main :: proc() { using fmt - doc, err := xml.parse(#load("test.html")) + options := xml.Options{ flags = { .Ignore_Unsupported, .Intern_Comments, .Unbox_CDATA, .Decode_SGML_Entities }} + doc, _ := xml.parse(#load("test.html"), options) + defer xml.destroy(doc) doc_print(doc) - - if false { - val := doc.root.children[1].children[2].value - - println() - replaced, ok := entity.decode_xml(val) - defer delete(replaced) - - printf("Before: '%v', Err: %v\n", val, err) - printf("Passthrough: '%v'\nOK: %v\n", replaced, ok) - println() - } - - if false { - val := doc.root.children[1].children[2].value - - println() - replaced, ok := entity.decode_xml(val, { .CDATA_Unbox }) - defer delete(replaced) - - printf("Before: '%v', Err: %v\n", val, err) - printf("CDATA_Unbox: '%v'\nOK: %v\n", replaced, ok) - println() - } - - if true { - val := doc.root.children[1].children[2].value - - println() - replaced, ok := entity.decode_xml(val, { .CDATA_Unbox, .CDATA_Decode }) - defer delete(replaced) - - printf("Before: '%v', Err: %v\n", val, err) - printf("CDATA_Decode: '%v'\nOK: %v\n", replaced, ok) - println() - } - - if true { - val := doc.root.children[1].children[1].value - - println() - replaced, ok := entity.decode_xml(val, { .Comment_Strip }) - defer delete(replaced) - - printf("Before: '%v', Err: %v\n", val, err) - printf("Comment_Strip: '%v'\nOK: %v\n", replaced, ok) - println() - } } main :: proc() { diff --git a/core/encoding/entity/example/test.html b/core/encoding/entity/example/test.html index 60e32bf03..62a0bb35a 100644 --- a/core/encoding/entity/example/test.html +++ b/core/encoding/entity/example/test.html @@ -16,9 +16,11 @@ <div id="test_cdata_in_comment" foo="">
Foozle]! © <!-- <![CDATA[ ® ]]> -->42&;1234&
</div>
+ <!-- EXPECTED: Foozle]! © 42&;1234& -->
<div id="test_cdata_unwrap_and_passthrough">
Foozle]! © <![CDATA[BOX ® /BOX]]>42&;1234&
</div>
+ <!-- EXPECTED: Foozle]! © BOX ® /BOX42&;1234& -->
<div>
| | | fj ` \ ® ϱ ∳
</div>
|