aboutsummaryrefslogtreecommitdiff
path: root/core/encoding
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2021-12-05 21:06:33 +0100
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2021-12-05 21:06:33 +0100
commit7ec88d24302dcdea38ac09996a2279f4de4f6a25 (patch)
tree069095bfba25750aefde7d0c147fe05d637f3fab /core/encoding
parentd7200f61441b6acfc4f0b47e900095f08490da58 (diff)
[xml] Add option.
Diffstat (limited to 'core/encoding')
-rw-r--r--core/encoding/entity/example/entity_example.odin4
-rw-r--r--core/encoding/xml/example/xml_example.odin53
-rw-r--r--core/encoding/xml/xml_reader.odin26
3 files changed, 55 insertions, 28 deletions
diff --git a/core/encoding/entity/example/entity_example.odin b/core/encoding/entity/example/entity_example.odin
index 882203f48..6fc377f9d 100644
--- a/core/encoding/entity/example/entity_example.odin
+++ b/core/encoding/entity/example/entity_example.odin
@@ -64,8 +64,8 @@ main :: proc() {
mem.tracking_allocator_init(&track, context.allocator)
context.allocator = mem.tracking_allocator(&track)
- _main()
- //_entities()
+ // _main()
+ _entities()
if len(track.allocation_map) > 0 {
println()
diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin
index 085252e92..daa3c5dab 100644
--- a/core/encoding/xml/example/xml_example.odin
+++ b/core/encoding/xml/example/xml_example.odin
@@ -2,35 +2,40 @@ package xml_example
import "core:encoding/xml"
import "core:os"
-import "core:path"
import "core:mem"
import "core:fmt"
-
-/*
- Silent error handler for the parser.
-*/
-Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {}
-
-OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, }, expected_doctype = "unicode", }
+import "core:time"
+import "core:strings"
+import "core:hash"
example :: proc() {
using fmt
- filename := path.join(ODIN_ROOT, "tests", "core", "assets", "XML", "unicode.xml")
- defer delete(filename)
+ doc: ^xml.Document
+ err: xml.Error
+
+ DOC :: #load("../../../../tests/core/assets/XML/unicode.xml")
- doc, err := xml.parse(filename, OPTIONS, Error_Handler)
+ parse_duration: time.Duration
+ {
+ time.SCOPED_TICK_DURATION(&parse_duration)
+ doc, err = xml.parse(DOC, xml.Options{flags={.Ignore_Unsupported}})
+ }
defer xml.destroy(doc)
+ ms := time.duration_milliseconds(parse_duration)
+ speed := (f64(1000.0) / ms) * f64(len(DOC)) / 1_024.0 / 1_024.0
+ fmt.printf("Parse time: %v bytes in %.2f ms (%.2f MiB/s).\n", len(DOC), ms, speed)
+
if err != .None {
printf("Load/Parse error: %v\n", err)
if err == .File_Error {
- printf("\"%v\" not found. Did you run \"tests\\download_assets.py\"?", filename)
+ println("\"unicode.xml\" not found. Did you run \"tests\\download_assets.py\"?")
}
os.exit(1)
}
- printf("\"%v\" loaded and parsed.\n", filename)
+ println("\"unicode.xml\" loaded and parsed.")
charlist, charlist_ok := xml.find_child_by_ident(doc.root, "charlist")
if !charlist_ok {
@@ -40,17 +45,19 @@ example :: proc() {
printf("Found `<charlist>` with %v children.\n", len(charlist.children))
- for char in charlist.children {
- if char.ident != "character" {
- eprintf("Expected `<character>`, got `<%v>`\n", char.ident)
- os.exit(1)
- }
+ crc32 := doc_hash(doc)
+ printf("[%v] CRC32: 0x%08x\n", "🎉" if crc32 == 0xcaa042b9 else "🤬", crc32)
+}
- if _, ok := xml.find_attribute_val_by_key(char, "dec"); !ok {
- eprintln("`<character dec=\"...\">` attribute not found.")
- os.exit(1)
- }
- }
+doc_hash :: proc(doc: ^xml.Document, print := false) -> (crc32: u32) {
+ buf: strings.Builder
+ defer strings.destroy_builder(&buf)
+ w := strings.to_writer(&buf)
+
+ xml.print(w, doc)
+ tree := strings.to_string(buf)
+ if print { fmt.println(tree) }
+ return hash.crc32(transmute([]u8)tree)
}
main :: proc() {
diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin
index 6f49b8e08..b169bd57a 100644
--- a/core/encoding/xml/xml_reader.odin
+++ b/core/encoding/xml/xml_reader.odin
@@ -71,6 +71,12 @@ Option_Flag :: enum {
This option decodes them when encountered.
*/
Decode_SGML_Entities,
+
+ /*
+ If a tag body has a comment, it will be stripped unless this option is given.
+ */
+ Keep_Tag_Body_Comments,
+
}
Option_Flags :: bit_set[Option_Flag; u8]
@@ -413,15 +419,29 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err
/*
This should be a tag's body text.
*/
- body_text := scan_string(t, t.offset) or_return
+ body_text := scan_string(t, t.offset) or_return
+ needs_processing := .Unbox_CDATA in opts.flags
+ needs_processing |= .Decode_SGML_Entities in opts.flags
+
+ if !needs_processing {
+ element.value = strings.intern_get(&doc.intern, body_text)
+ continue
+ }
- decode_opts := entity.XML_Decode_Options{ .Comment_Strip }
+ decode_opts := entity.XML_Decode_Options{}
+ if .Keep_Tag_Body_Comments not_in opts.flags {
+ decode_opts += { .Comment_Strip }
+ }
if .Decode_SGML_Entities not_in opts.flags {
decode_opts += { .No_Entity_Decode }
}
+
if .Unbox_CDATA in opts.flags {
- decode_opts += { .Unbox_CDATA, .Decode_CDATA }
+ decode_opts += { .Unbox_CDATA }
+ if .Decode_SGML_Entities in opts.flags {
+ decode_opts += { .Decode_CDATA }
+ }
}
decoded, decode_err := entity.decode_xml(body_text, decode_opts)