aboutsummaryrefslogtreecommitdiff
path: root/core/encoding/xml
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2021-12-02 21:07:40 +0100
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2021-12-05 02:52:23 +0100
commit3d72e80ccf0f382f03a1c9407c4728862c5bca91 (patch)
treed5ed66c7b2d2a8fe697eeccb35a0884977143f32 /core/encoding/xml
parent2dd67dba89732b89adb0199bc0a99de4cbc34e8f (diff)
[xml] Implement optional unboxing of CDATA and decoding of tag values.
Diffstat (limited to 'core/encoding/xml')
-rw-r--r--core/encoding/xml/xml_reader.odin41
1 files changed, 20 insertions, 21 deletions
diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin
index 146c278cb..6f49b8e08 100644
--- a/core/encoding/xml/xml_reader.odin
+++ b/core/encoding/xml/xml_reader.odin
@@ -18,10 +18,6 @@ package xml
- We do NOT support UTF-16. If you have a UTF-16 XML file, please convert it to UTF-8 first. Also, our condolences.
- <[!ELEMENT and <[!ATTLIST are not supported, and will be either ignored or return an error depending on the parser options.
- TODO:
- - Optional CDATA unboxing.
- - Optional `&gt;`, `&#32;`, `&#x20;` and other escape substitution in tag bodies.
-
MAYBE:
- XML writer?
- Serialize/deserialize Odin types?
@@ -31,6 +27,7 @@ package xml
*/
import "core:strings"
+import "core:encoding/entity"
import "core:mem"
import "core:os"
@@ -196,12 +193,6 @@ Error :: enum {
Duplicate_Attribute,
Conflicting_Options,
-
- /*
- Unhandled TODO:
- */
- Unhandled_CDATA_Unboxing,
- Unhandled_SGML_Entity_Decoding,
}
/*
@@ -422,8 +413,25 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err
/*
This should be a tag's body text.
*/
- body_text := scan_string(t, t.offset) or_return
- element.value = strings.intern_get(&doc.intern, body_text)
+ body_text := scan_string(t, t.offset) or_return
+
+ decode_opts := entity.XML_Decode_Options{ .Comment_Strip }
+
+ if .Decode_SGML_Entities not_in opts.flags {
+ decode_opts += { .No_Entity_Decode }
+ }
+ if .Unbox_CDATA in opts.flags {
+ decode_opts += { .Unbox_CDATA, .Decode_CDATA }
+ }
+
+ decoded, decode_err := entity.decode_xml(body_text, decode_opts)
+ defer delete(decoded)
+
+ if decode_err == .None {
+ element.value = strings.intern_get(&doc.intern, decoded)
+ } else {
+ element.value = strings.intern_get(&doc.intern, body_text)
+ }
}
}
@@ -488,15 +496,6 @@ validate_options :: proc(options: Options) -> (validated: Options, err: Error) {
if .Error_on_Unsupported in validated.flags && .Ignore_Unsupported in validated.flags {
return options, .Conflicting_Options
}
-
- if .Unbox_CDATA in validated.flags {
- return options, .Unhandled_CDATA_Unboxing
- }
-
- if .Decode_SGML_Entities in validated.flags {
- return options, .Unhandled_SGML_Entity_Decoding
- }
-
return validated, .None
}