diff options
| author | gingerBill <bill@gingerbill.org> | 2021-09-28 14:53:16 +0100 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2021-09-28 14:53:16 +0100 |
| commit | c9330548726965ffe64f97c37b43bc96a30f4dc7 (patch) | |
| tree | 4c7c24960e7280d34094dcf2cc42cc5d566faebb /core/encoding | |
| parent | 6f872e04c85213406dcda52d3d8051f0a3022f6d (diff) | |
Support `json.Specification.MJSON`
https://bitsquid.blogspot.com/2009/10/simplified-json-notation.html
Diffstat (limited to 'core/encoding')
| -rw-r--r-- | core/encoding/json/parser.odin | 131 | ||||
| -rw-r--r-- | core/encoding/json/tokenizer.odin | 80 | ||||
| -rw-r--r-- | core/encoding/json/types.odin | 2 | ||||
| -rw-r--r-- | core/encoding/json/unmarshal.odin | 90 | ||||
| -rw-r--r-- | core/encoding/json/validator.odin | 71 |
5 files changed, 201 insertions, 173 deletions
diff --git a/core/encoding/json/parser.odin b/core/encoding/json/parser.odin index b74cd7402..0bb4c03a5 100644 --- a/core/encoding/json/parser.odin +++ b/core/encoding/json/parser.odin @@ -14,6 +14,9 @@ Parser :: struct { } make_parser :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser { + return make_parser_from_string(string(data), spec, parse_integers, allocator) +} +make_parser_from_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> Parser { p: Parser p.tok = make_tokenizer(data, spec, parse_integers) p.spec = spec @@ -23,11 +26,25 @@ make_parser :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers return p } + parse :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> (Value, Error) { + return parse_string(string(data), spec, parse_integers, allocator) +} + +parse_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false, allocator := context.allocator) -> (Value, Error) { context.allocator = allocator - p := make_parser(data, spec, parse_integers, allocator) + p := make_parser_from_string(data, spec, parse_integers, allocator) - if p.spec == Specification.JSON5 { + switch p.spec { + case .JSON: + return parse_object(&p) + case .JSON5: + return parse_value(&p) + case .MJSON: + #partial switch p.curr_token.kind { + case .Ident, .String: + return parse_object_body(&p, .EOF) + } return parse_value(&p) } return parse_object(&p) @@ -59,12 +76,34 @@ expect_token :: proc(p: ^Parser, kind: Token_Kind) -> Error { prev := p.curr_token advance_token(p) if prev.kind == kind { - return .None + return nil } return .Unexpected_Token } +parse_colon :: proc(p: ^Parser) -> (err: Error) { + colon_err := expect_token(p, .Colon) + if colon_err == nil { + return nil + } + return .Expected_Colon_After_Key +} + +parse_comma :: proc(p: ^Parser) -> (do_break: bool) { + switch p.spec { + case .JSON5, .MJSON: + if allow_token(p, .Comma) { + return false + } + return false + case .JSON: + if !allow_token(p, .Comma) { + return true + } + } + return false +} parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) { token := p.curr_token @@ -102,9 +141,15 @@ parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) { case .Open_Bracket: return parse_array(p) + + case .Ident: + if p.spec == .MJSON { + advance_token(p) + return string(token.text), nil + } case: - if p.spec == Specification.JSON5 { + if p.spec != .JSON { #partial switch token.kind { case .Infinity: inf: u64 = 0x7ff0000000000000 @@ -136,7 +181,7 @@ parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) { array: Array array.allocator = p.allocator - defer if err != .None { + defer if err != nil { for elem in array { destroy_value(elem) } @@ -146,11 +191,8 @@ parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) { for p.curr_token.kind != .Close_Bracket { elem := parse_value(p) or_return append(&array, elem) - - // Disallow trailing commas for the time being - if allow_token(p, .Comma) { - continue - } else { + + if parse_comma(p) { break } } @@ -187,31 +229,21 @@ clone_string :: proc(s: string, allocator: mem.Allocator) -> (str: string, err: parse_object_key :: proc(p: ^Parser, key_allocator: mem.Allocator) -> (key: string, err: Error) { tok := p.curr_token - if p.spec == Specification.JSON5 { - if tok.kind == .String { - expect_token(p, .String) - key = unquote_string(tok, p.spec, key_allocator) or_return - return - } else if tok.kind == .Ident { - expect_token(p, .Ident) - key = clone_string(tok.text, key_allocator) or_return - return + if p.spec != .JSON { + if allow_token(p, .Ident) { + return clone_string(tok.text, key_allocator) } } - if tok_err := expect_token(p, .String); tok_err != .None { + if tok_err := expect_token(p, .String); tok_err != nil { err = .Expected_String_For_Object_Key return } - key = unquote_string(tok, p.spec, key_allocator) or_return - return + return unquote_string(tok, p.spec, key_allocator) } -parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) { - expect_token(p, .Open_Brace) or_return - - obj: Object +parse_object_body :: proc(p: ^Parser, end_token: Token_Kind) -> (obj: Object, err: Error) { obj.allocator = p.allocator - defer if err != .None { + defer if err != nil { for key, elem in obj { delete(key, p.allocator) destroy_value(elem) @@ -219,19 +251,9 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) { delete(obj) } - for p.curr_token.kind != .Close_Brace { - key: string - key, err = parse_object_key(p, p.allocator) - if err != .None { - delete(key, p.allocator) - return - } - - if colon_err := expect_token(p, .Colon); colon_err != .None { - err = .Expected_Colon_After_Key - return - } - + for p.curr_token.kind != end_token { + key := parse_object_key(p, p.allocator) or_return + parse_colon(p) or_return elem := parse_value(p) or_return if key in obj { @@ -241,22 +263,17 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) { } obj[key] = elem - - if p.spec == Specification.JSON5 { - // Allow trailing commas - if allow_token(p, .Comma) { - continue - } - } else { - // Disallow trailing commas - if allow_token(p, .Comma) { - continue - } else { - break - } + + if parse_comma(p) { + break } - } + } + return +} +parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) { + expect_token(p, .Open_Brace) or_return + obj := parse_object_body(p, .Close_Brace) or_return expect_token(p, .Close_Brace) or_return value = obj return @@ -387,7 +404,7 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a case '0': - if spec == Specification.JSON5 { + if spec != .JSON { b[w] = '\x00' i += 1 w += 1 @@ -395,7 +412,7 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a break loop } case 'v': - if spec == Specification.JSON5 { + if spec != .JSON { b[w] = '\v' i += 1 w += 1 @@ -404,7 +421,7 @@ unquote_string :: proc(token: Token, spec: Specification, allocator := context.a } case 'x': - if spec == Specification.JSON5 { + if spec != .JSON { i -= 1 // Include the \x in the check for sanity sake r := get_u2_rune(s[i:]) if r < 0 { diff --git a/core/encoding/json/tokenizer.odin b/core/encoding/json/tokenizer.odin index 26d3d8123..f1b0db6ba 100644 --- a/core/encoding/json/tokenizer.odin +++ b/core/encoding/json/tokenizer.odin @@ -43,17 +43,18 @@ Token_Kind :: enum { Tokenizer :: struct { using pos: Pos, - data: []byte, + data: string, r: rune, // current rune w: int, // current rune width in bytes curr_line_offset: int, spec: Specification, parse_integers: bool, + insert_comma: bool, } -make_tokenizer :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false) -> Tokenizer { +make_tokenizer :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers := false) -> Tokenizer { t := Tokenizer{pos = {line=1}, data = data, spec = spec, parse_integers = parse_integers} next_rune(&t) if t.r == utf8.RUNE_BOM { @@ -64,11 +65,15 @@ make_tokenizer :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_intege next_rune :: proc(t: ^Tokenizer) -> rune #no_bounds_check { if t.offset >= len(t.data) { - return utf8.RUNE_EOF + t.r = utf8.RUNE_EOF + } else { + t.offset += t.w + t.r, t.w = utf8.decode_rune_in_string(t.data[t.offset:]) + t.pos.column = t.offset - t.curr_line_offset + if t.offset >= len(t.data) { + t.r = utf8.RUNE_EOF + } } - t.offset += t.w - t.r, t.w = utf8.decode_rune(t.data[t.offset:]) - t.pos.column = t.offset - t.curr_line_offset return t.r } @@ -120,18 +125,21 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { return false } - skip_whitespace :: proc(t: ^Tokenizer) -> rune { + skip_whitespace :: proc(t: ^Tokenizer, on_newline: bool) -> rune { loop: for t.offset < len(t.data) { switch t.r { case ' ', '\t', '\v', '\f', '\r': next_rune(t) case '\n': + if on_newline { + break loop + } t.line += 1 t.curr_line_offset = t.offset t.pos.column = 1 next_rune(t) case: - if t.spec == .JSON5 { + if t.spec != .JSON { switch t.r { case 0x2028, 0x2029, 0xFEFF: next_rune(t) @@ -164,7 +172,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { } } - skip_whitespace(t) + skip_whitespace(t, t.insert_comma) token.pos = t.pos @@ -179,6 +187,12 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { case utf8.RUNE_EOF, '\x00': token.kind = .EOF err = .EOF + + case '\n': + t.insert_comma = false + token.text = "," + token.kind = .Comma + return case 'A'..='Z', 'a'..='z', '_': token.kind = .Ident @@ -190,7 +204,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { case "false": token.kind = .False case "true": token.kind = .True case: - if t.spec == .JSON5 { + if t.spec != .JSON { switch str { case "Infinity": token.kind = .Infinity case "NaN": token.kind = .NaN @@ -200,7 +214,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { case '+': err = .Illegal_Character - if t.spec != .JSON5 { + if t.spec == .JSON { break } fallthrough @@ -213,7 +227,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { // Illegal use of +/- err = .Illegal_Character - if t.spec == .JSON5 { + if t.spec != .JSON { if t.r == 'I' || t.r == 'N' { skip_alphanum(t) } @@ -228,7 +242,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { case '0'..='9': token.kind = t.parse_integers ? .Integer : .Float - if t.spec == .JSON5 { // Hexadecimal Numbers + if t.spec != .JSON { // Hexadecimal Numbers if curr_rune == '0' && (t.r == 'x' || t.r == 'X') { next_rune(t) skip_hex_digits(t) @@ -258,7 +272,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { case '.': err = .Illegal_Character - if t.spec == .JSON5 { // Allow leading decimal point + if t.spec != .JSON { // Allow leading decimal point skip_digits(t) if t.r == 'e' || t.r == 'E' { switch r := next_rune(t); r { @@ -276,7 +290,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { case '\'': err = .Illegal_Character - if t.spec != .JSON5 { + if t.spec == .JSON { break } fallthrough @@ -304,16 +318,25 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { } - case ',': token.kind = .Comma + case ',': + token.kind = .Comma + t.insert_comma = false case ':': token.kind = .Colon case '{': token.kind = .Open_Brace case '}': token.kind = .Close_Brace case '[': token.kind = .Open_Bracket case ']': token.kind = .Close_Bracket + + case '=': + if t.spec == .MJSON { + token.kind = .Colon + } else { + err = .Illegal_Character + } case '/': err = .Illegal_Character - if t.spec == .JSON5 { + if t.spec != .JSON { switch t.r { case '/': // Single-line comments @@ -339,6 +362,21 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { } token.text = string(t.data[token.offset : t.offset]) + + if t.spec == .MJSON { + switch token.kind { + case .Invalid: + // preserve insert_comma info + case .EOF: + t.insert_comma = false + case .Colon, .Comma, .Open_Brace, .Open_Bracket: + t.insert_comma = false + case .Null, .False, .True, .Infinity, .NaN, + .Ident, .Integer, .Float, .String, + .Close_Brace, .Close_Bracket: + t.insert_comma = true + } + } return } @@ -356,7 +394,7 @@ is_valid_number :: proc(str: string, spec: Specification) -> bool { if s == "" { return false } - } else if spec == .JSON5 { + } else if spec != .JSON { if s[0] == '+' { // Allow positive sign s = s[1:] if s == "" { @@ -374,7 +412,7 @@ is_valid_number :: proc(str: string, spec: Specification) -> bool { s = s[1:] } case '.': - if spec == .JSON5 { // Allow leading decimal point + if spec != .JSON { // Allow leading decimal point s = s[1:] } else { return false @@ -383,7 +421,7 @@ is_valid_number :: proc(str: string, spec: Specification) -> bool { return false } - if spec == .JSON5 { + if spec != .JSON { if len(s) == 1 && s[0] == '.' { // Allow trailing decimal point return true } @@ -424,7 +462,7 @@ is_valid_string_literal :: proc(str: string, spec: Specification) -> bool { return false } if s[0] != '"' || s[len(s)-1] != '"' { - if spec == .JSON5 { + if spec != .JSON { if s[0] != '\'' || s[len(s)-1] != '\'' { return false } diff --git a/core/encoding/json/types.odin b/core/encoding/json/types.odin index 6a8f1ed07..a266be536 100644 --- a/core/encoding/json/types.odin +++ b/core/encoding/json/types.odin @@ -3,7 +3,7 @@ package json Specification :: enum { JSON, JSON5, // https://json5.org/ - // MJSON, // http://bitsquid.blogspot.com/2009/09/json-configuration-data.html + MJSON, // https://bitsquid.blogspot.com/2009/10/simplified-json-notation.html } DEFAULT_SPECIFICATION :: Specification.JSON5 diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index 1ae612f30..e05c140da 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -1,5 +1,6 @@ package json +import "core:fmt" import "core:mem" import "core:math" import "core:reflect" @@ -48,6 +49,14 @@ unmarshal_any :: proc(data: []byte, v: any, spec := DEFAULT_SPECIFICATION, alloc } context.allocator = p.allocator + + if p.spec == .MJSON { + #partial switch p.curr_token.kind { + case .Ident, .String: + return unmarsal_object(&p, data, .EOF) + } + } + return unmarsal_value(&p, data) } @@ -244,13 +253,13 @@ unmarsal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { case .Open_Brace: - return unmarsal_object(p, v) + return unmarsal_object(p, v, .Close_Brace) case .Open_Bracket: return unmarsal_array(p, v) case: - if p.spec == Specification.JSON5 { + if p.spec != .JSON { #partial switch token.kind { case .Infinity: advance_token(p) @@ -285,16 +294,18 @@ unmarsal_value :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { unmarsal_expect_token :: proc(p: ^Parser, kind: Token_Kind, loc := #caller_location) -> Token { prev := p.curr_token err := expect_token(p, kind) - assert(err == nil, "unmarsal_expect_token", loc) + fmt.assertf(condition = err == nil, fmt="unmarsal_expect_token: %v, got %v", args={kind, prev.kind}, loc=loc) return prev } @(private) -unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { +unmarsal_object :: proc(p: ^Parser, v: any, end_token: Token_Kind) -> (err: Unmarshal_Error) { UNSUPPORTED_TYPE := Unsupported_Type_Error{v.id, p.curr_token} - assert(expect_token(p, .Open_Brace) == nil) + if end_token == .Close_Brace { + assert(expect_token(p, .Open_Brace) == nil) + } v := v v = reflect.any_base(v) @@ -306,7 +317,7 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { return UNSUPPORTED_TYPE } - struct_loop: for p.curr_token.kind != .Close_Brace { + struct_loop: for p.curr_token.kind != end_token { key, _ := parse_object_key(p, p.allocator) defer delete(key, p.allocator) @@ -347,21 +358,10 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { field_ptr := rawptr(uintptr(v.data) + offset) field := any{field_ptr, type.id} unmarsal_value(p, field) or_return - - if p.spec == Specification.JSON5 { - // Allow trailing commas - if allow_token(p, .Comma) { - continue struct_loop - } - } else { - // Disallow trailing commas - if allow_token(p, .Comma) { - continue struct_loop - } else { - break struct_loop - } + + if parse_comma(p) { + break struct_loop } - continue struct_loop } @@ -384,7 +384,7 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { map_backing_value := any{raw_data(elem_backing), t.value.id} - map_loop: for p.curr_token.kind != .Close_Brace { + map_loop: for p.curr_token.kind != end_token { key, _ := parse_object_key(p, p.allocator) unmarsal_expect_token(p, .Colon) @@ -410,19 +410,9 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { if set_ptr == nil { delete(key, p.allocator) } - - if p.spec == Specification.JSON5 { - // Allow trailing commas - if allow_token(p, .Comma) { - continue map_loop - } - } else { - // Disallow trailing commas - if allow_token(p, .Comma) { - continue map_loop - } else { - break map_loop - } + + if parse_comma(p) { + break map_loop } } @@ -430,7 +420,7 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { index_type := reflect.type_info_base(t.index) enum_type := index_type.variant.(reflect.Type_Info_Enum) - enumerated_array_loop: for p.curr_token.kind != .Close_Brace { + enumerated_array_loop: for p.curr_token.kind != end_token { key, _ := parse_object_key(p, p.allocator) unmarsal_expect_token(p, .Colon) defer delete(key, p.allocator) @@ -450,19 +440,9 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { index_any := any{index_ptr, t.elem.id} unmarsal_value(p, index_any) or_return - - if p.spec == Specification.JSON5 { - // Allow trailing commas - if allow_token(p, .Comma) { - continue enumerated_array_loop - } - } else { - // Disallow trailing commas - if allow_token(p, .Comma) { - continue enumerated_array_loop - } else { - break enumerated_array_loop - } + + if parse_comma(p) { + break enumerated_array_loop } } @@ -472,7 +452,9 @@ unmarsal_object :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { return UNSUPPORTED_TYPE } - assert(expect_token(p, .Close_Brace) == nil) + if end_token == .Close_Brace { + assert(expect_token(p, .Close_Brace) == nil) + } return } @@ -485,10 +467,8 @@ unmarsal_count_array :: proc(p: ^Parser) -> (length: uintptr) { array_length_loop: for p.curr_token.kind != .Close_Bracket { _, _ = parse_value(p) length += 1 - - if allow_token(p, .Comma) { - continue - } else { + + if parse_comma(p) { break } } @@ -509,9 +489,7 @@ unmarsal_array :: proc(p: ^Parser, v: any) -> (err: Unmarshal_Error) { unmarsal_value(p, elem) or_return - if allow_token(p, .Comma) { - continue - } else { + if parse_comma(p) { break } } diff --git a/core/encoding/json/validator.odin b/core/encoding/json/validator.odin index 67fd294cd..085dd384a 100644 --- a/core/encoding/json/validator.odin +++ b/core/encoding/json/validator.odin @@ -5,66 +5,64 @@ import "core:mem" // NOTE(bill): is_valid will not check for duplicate keys is_valid :: proc(data: []byte, spec := DEFAULT_SPECIFICATION, parse_integers := false) -> bool { p := make_parser(data, spec, parse_integers, mem.nil_allocator()) - if p.spec == Specification.JSON5 { + + switch p.spec { + case .JSON: + return validate_object(&p) + case .JSON5: + return validate_value(&p) + case .MJSON: + #partial switch p.curr_token.kind { + case .Ident, .String: + return validate_object_body(&p, .EOF) + } return validate_value(&p) } return validate_object(&p) } validate_object_key :: proc(p: ^Parser) -> bool { - tok := p.curr_token - if p.spec == Specification.JSON5 { - if tok.kind == .String { - expect_token(p, .String) - return true - } else if tok.kind == .Ident { - expect_token(p, .Ident) + if p.spec != .JSON { + if allow_token(p, .Ident) { return true } } err := expect_token(p, .String) - return err == Error.None + return err == .None } -validate_object :: proc(p: ^Parser) -> bool { - if err := expect_token(p, .Open_Brace); err != Error.None { - return false - } - for p.curr_token.kind != .Close_Brace { +validate_object_body :: proc(p: ^Parser, end_token: Token_Kind) -> bool { + for p.curr_token.kind != end_token { if !validate_object_key(p) { return false } - if colon_err := expect_token(p, .Colon); colon_err != Error.None { + if parse_colon(p) != nil { return false } + validate_value(p) or_return - if !validate_value(p) { - return false + if parse_comma(p) { + break } + } + return true +} - if p.spec == Specification.JSON5 { - // Allow trailing commas - if allow_token(p, .Comma) { - continue - } - } else { - // Disallow trailing commas - if allow_token(p, .Comma) { - continue - } else { - break - } - } +validate_object :: proc(p: ^Parser) -> bool { + if err := expect_token(p, .Open_Brace); err != .None { + return false } + + validate_object_body(p, .Close_Brace) or_return - if err := expect_token(p, .Close_Brace); err != Error.None { + if err := expect_token(p, .Close_Brace); err != .None { return false } return true } validate_array :: proc(p: ^Parser) -> bool { - if err := expect_token(p, .Open_Bracket); err != Error.None { + if err := expect_token(p, .Open_Bracket); err != .None { return false } @@ -73,15 +71,12 @@ validate_array :: proc(p: ^Parser) -> bool { return false } - // Disallow trailing commas for the time being - if allow_token(p, .Comma) { - continue - } else { + if parse_comma(p) { break } } - if err := expect_token(p, .Close_Bracket); err != Error.None { + if err := expect_token(p, .Close_Bracket); err != .None { return false } @@ -109,7 +104,7 @@ validate_value :: proc(p: ^Parser) -> bool { return validate_array(p) case: - if p.spec == Specification.JSON5 { + if p.spec != .JSON { #partial switch token.kind { case .Infinity, .NaN: advance_token(p) |