diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-12-05 21:06:33 +0100 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-12-05 21:06:33 +0100 |
| commit | 7ec88d24302dcdea38ac09996a2279f4de4f6a25 (patch) | |
| tree | 069095bfba25750aefde7d0c147fe05d637f3fab /core/encoding | |
| parent | d7200f61441b6acfc4f0b47e900095f08490da58 (diff) | |
[xml] Add option.
Diffstat (limited to 'core/encoding')
| -rw-r--r-- | core/encoding/entity/example/entity_example.odin | 4 | ||||
| -rw-r--r-- | core/encoding/xml/example/xml_example.odin | 53 | ||||
| -rw-r--r-- | core/encoding/xml/xml_reader.odin | 26 |
3 files changed, 55 insertions, 28 deletions
diff --git a/core/encoding/entity/example/entity_example.odin b/core/encoding/entity/example/entity_example.odin index 882203f48..6fc377f9d 100644 --- a/core/encoding/entity/example/entity_example.odin +++ b/core/encoding/entity/example/entity_example.odin @@ -64,8 +64,8 @@ main :: proc() { mem.tracking_allocator_init(&track, context.allocator) context.allocator = mem.tracking_allocator(&track) - _main() - //_entities() + // _main() + _entities() if len(track.allocation_map) > 0 { println() diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin index 085252e92..daa3c5dab 100644 --- a/core/encoding/xml/example/xml_example.odin +++ b/core/encoding/xml/example/xml_example.odin @@ -2,35 +2,40 @@ package xml_example import "core:encoding/xml" import "core:os" -import "core:path" import "core:mem" import "core:fmt" - -/* - Silent error handler for the parser. -*/ -Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {} - -OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, }, expected_doctype = "unicode", } +import "core:time" +import "core:strings" +import "core:hash" example :: proc() { using fmt - filename := path.join(ODIN_ROOT, "tests", "core", "assets", "XML", "unicode.xml") - defer delete(filename) + doc: ^xml.Document + err: xml.Error + + DOC :: #load("../../../../tests/core/assets/XML/unicode.xml") - doc, err := xml.parse(filename, OPTIONS, Error_Handler) + parse_duration: time.Duration + { + time.SCOPED_TICK_DURATION(&parse_duration) + doc, err = xml.parse(DOC, xml.Options{flags={.Ignore_Unsupported}}) + } defer xml.destroy(doc) + ms := time.duration_milliseconds(parse_duration) + speed := (f64(1000.0) / ms) * f64(len(DOC)) / 1_024.0 / 1_024.0 + fmt.printf("Parse time: %v bytes in %.2f ms (%.2f MiB/s).\n", len(DOC), ms, speed) + if err != .None { printf("Load/Parse error: %v\n", err) if err == .File_Error { - printf("\"%v\" not found. Did you run \"tests\\download_assets.py\"?", filename) + println("\"unicode.xml\" not found. Did you run \"tests\\download_assets.py\"?") } os.exit(1) } - printf("\"%v\" loaded and parsed.\n", filename) + println("\"unicode.xml\" loaded and parsed.") charlist, charlist_ok := xml.find_child_by_ident(doc.root, "charlist") if !charlist_ok { @@ -40,17 +45,19 @@ example :: proc() { printf("Found `<charlist>` with %v children.\n", len(charlist.children)) - for char in charlist.children { - if char.ident != "character" { - eprintf("Expected `<character>`, got `<%v>`\n", char.ident) - os.exit(1) - } + crc32 := doc_hash(doc) + printf("[%v] CRC32: 0x%08x\n", "🎉" if crc32 == 0xcaa042b9 else "🤬", crc32) +} - if _, ok := xml.find_attribute_val_by_key(char, "dec"); !ok { - eprintln("`<character dec=\"...\">` attribute not found.") - os.exit(1) - } - } +doc_hash :: proc(doc: ^xml.Document, print := false) -> (crc32: u32) { + buf: strings.Builder + defer strings.destroy_builder(&buf) + w := strings.to_writer(&buf) + + xml.print(w, doc) + tree := strings.to_string(buf) + if print { fmt.println(tree) } + return hash.crc32(transmute([]u8)tree) } main :: proc() { diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index 6f49b8e08..b169bd57a 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -71,6 +71,12 @@ Option_Flag :: enum { This option decodes them when encountered. */ Decode_SGML_Entities, + + /* + If a tag body has a comment, it will be stripped unless this option is given. + */ + Keep_Tag_Body_Comments, + } Option_Flags :: bit_set[Option_Flag; u8] @@ -413,15 +419,29 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err /* This should be a tag's body text. */ - body_text := scan_string(t, t.offset) or_return + body_text := scan_string(t, t.offset) or_return + needs_processing := .Unbox_CDATA in opts.flags + needs_processing |= .Decode_SGML_Entities in opts.flags + + if !needs_processing { + element.value = strings.intern_get(&doc.intern, body_text) + continue + } - decode_opts := entity.XML_Decode_Options{ .Comment_Strip } + decode_opts := entity.XML_Decode_Options{} + if .Keep_Tag_Body_Comments not_in opts.flags { + decode_opts += { .Comment_Strip } + } if .Decode_SGML_Entities not_in opts.flags { decode_opts += { .No_Entity_Decode } } + if .Unbox_CDATA in opts.flags { - decode_opts += { .Unbox_CDATA, .Decode_CDATA } + decode_opts += { .Unbox_CDATA } + if .Decode_SGML_Entities in opts.flags { + decode_opts += { .Decode_CDATA } + } } decoded, decode_err := entity.decode_xml(body_text, decode_opts) |