diff options
| author | gingerBill <bill@gingerbill.org> | 2019-01-06 21:48:13 +0000 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2019-01-06 21:48:13 +0000 |
| commit | d1b9f3ac74df5533f1857a26831419aeb560fd2f (patch) | |
| tree | ce7b77ec1fbd83516e0a49d4f4ccc82422f6ab87 /core/encoding | |
| parent | d732a5158761578d14e69daf3a94fad9f0a8c23c (diff) | |
package json; JSON5 support
Diffstat (limited to 'core/encoding')
| -rw-r--r-- | core/encoding/json/parser.odin | 139 | ||||
| -rw-r--r-- | core/encoding/json/tokenizer.odin | 182 | ||||
| -rw-r--r-- | core/encoding/json/types.odin | 5 | ||||
| -rw-r--r-- | core/encoding/json/validator.odin | 41 |
4 files changed, 319 insertions, 48 deletions
diff --git a/core/encoding/json/parser.odin b/core/encoding/json/parser.odin index 3ef8cee93..2c7d79465 100644 --- a/core/encoding/json/parser.odin +++ b/core/encoding/json/parser.odin @@ -7,20 +7,27 @@ import "core:strconv" Parser :: struct { tok: Tokenizer, curr_token: Token, + spec: Specification, allocator: mem.Allocator, } -make_parser :: proc(data: string, allocator := context.allocator) -> Parser { +make_parser :: proc(data: string, spec := Specification.JSON, allocator := context.allocator) -> Parser { p: Parser; - p.tok = make_tokenizer(data); + p.tok = make_tokenizer(data, spec); + p.spec = spec; p.allocator = allocator; assert(p.allocator.procedure != nil); advance_token(&p); return p; } -parse :: proc(data: string, allocator := context.allocator) -> (Value, Error) { - p := make_parser(data, allocator); +parse :: proc(data: string, spec := Specification.JSON, allocator := context.allocator) -> (Value, Error) { + context.allocator = allocator; + p := make_parser(data, spec, allocator); + + if p.spec == Specification.JSON5 { + return parse_value(&p); + } return parse_object(&p); } @@ -77,7 +84,7 @@ parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) { advance_token(p); return; case Kind.String: - value.value = String(unquote_string(token, p.allocator)); + value.value = String(unquote_string(token, p.spec, p.allocator)); advance_token(p); return; @@ -132,6 +139,34 @@ parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) { return; } +clone_string :: proc(s: string, allocator: mem.Allocator) -> string { + n := len(s); + b := make([]byte, n+1, allocator); + copy(b, cast([]byte)s); + b[n] = 0; + return string(b[:n]); +} + +parse_object_key :: proc(p: ^Parser) -> (key: string, err: Error) { + tok := p.curr_token; + if p.spec == Specification.JSON5 { + if tok.kind == Kind.String { + expect_token(p, Kind.String); + key = unquote_string(tok, p.spec, p.allocator); + return; + } else if tok.kind == Kind.Ident { + expect_token(p, Kind.Ident); + key = clone_string(tok.text, p.allocator); + return; + } + } + if tok_err := expect_token(p, Kind.String); tok_err != Error.None { + err = Error.Expected_String_For_Object_Key; + return; + } + key = unquote_string(tok, p.spec, p.allocator); + return; +} parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) { value.pos = p.curr_token.pos; @@ -144,20 +179,20 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) { obj.allocator = p.allocator; defer if err != Error.None { for key, elem in obj { - delete(key); + delete(key, p.allocator); destroy_value(elem); } delete(obj); } for p.curr_token.kind != Kind.Close_Brace { - tok := p.curr_token; - if tok_err := expect_token(p, Kind.String); tok_err != Error.None { - err = Error.Expected_String_For_Object_Key; + key: string; + key, err = parse_object_key(p); + if err != Error.None { + delete(key, p.allocator); value.pos = p.curr_token.pos; return; } - key := unquote_string(tok, p.allocator); if colon_err := expect_token(p, Kind.Colon); colon_err != Error.None { err = Error.Expected_Colon_After_Key; @@ -175,17 +210,24 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) { if key in obj { err = Error.Duplicate_Object_Key; value.pos = p.curr_token.pos; - delete(key); + delete(key, p.allocator); return; } obj[key] = elem; - // Disallow trailing commas for the time being - if allow_token(p, Kind.Comma) { - continue; + if p.spec == Specification.JSON5 { + // Allow trailing commas + if allow_token(p, Kind.Comma) { + continue; + } } else { - break; + // Disallow trailing commas + if allow_token(p, Kind.Comma) { + continue; + } else { + break; + } } } @@ -200,7 +242,25 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) { // IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string -unquote_string :: proc(token: Token, allocator := context.allocator) -> string { +unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator) -> string { + get_u2_rune :: proc(s: string) -> rune { + if len(s) < 4 || s[0] != '\\' || s[1] != 'x' { + return -1; + } + + r: rune; + for c in s[2:4] { + x: rune; + switch c { + case '0'..'9': x = c - '0'; + case 'a'..'f': x = c - 'a' + 10; + case 'A'..'F': x = c - 'A' + 10; + case: return -1; + } + r = r*16 + x; + } + return r; + } get_u4_rune :: proc(s: string) -> rune { if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { return -1; @@ -227,12 +287,17 @@ unquote_string :: proc(token: Token, allocator := context.allocator) -> string { if len(s) <= 2 { return ""; } + quote := s[0]; + if s[0] != s[len(s)-1] { + // Invalid string + return ""; + } s = s[1:len(s)-1]; i := 0; for i < len(s) { c := s[i]; - if c == '\\' || c == '"' || c < ' ' { + if c == '\\' || c == quote || c < ' ' { break; } if c < utf8.RUNE_SELF { @@ -246,9 +311,7 @@ unquote_string :: proc(token: Token, allocator := context.allocator) -> string { i += w; } if i == len(s) { - b := make([]byte, len(s), allocator); - copy(b, cast([]byte)s); - return string(b); + return clone_string(s, allocator); } b := make([]byte, len(s) + 2*utf8.UTF_MAX, allocator); @@ -299,9 +362,43 @@ unquote_string :: proc(token: Token, allocator := context.allocator) -> string { buf, buf_width := utf8.encode_rune(r); copy(b[w:], buf[:buf_width]); w += buf_width; + + + case '0': + if spec == Specification.JSON5 { + b[w] = '\x00'; + i += 1; + w += 1; + } else { + break loop; + } + case 'v': + if spec == Specification.JSON5 { + b[w] = '\v'; + i += 1; + w += 1; + } else { + break loop; + } + + case 'x': + if spec == Specification.JSON5 { + i -= 1; // Include the \x in the check for sanity sake + r := get_u2_rune(s[i:]); + if r < 0 { + break loop; + } + i += 4; + + buf, buf_width := utf8.encode_rune(r); + copy(b[w:], buf[:buf_width]); + w += buf_width; + } else { + break loop; + } } - case c == '"', c < ' ': + case c == quote, c < ' ': break loop; case c < utf8.RUNE_SELF: diff --git a/core/encoding/json/tokenizer.odin b/core/encoding/json/tokenizer.odin index dfa20a6a7..3cada4b45 100644 --- a/core/encoding/json/tokenizer.odin +++ b/core/encoding/json/tokenizer.odin @@ -15,6 +15,9 @@ Kind :: enum { False, True, + Infinity, + NaN, + Ident, Integer, @@ -37,13 +40,17 @@ Tokenizer :: struct { r: rune, // current rune w: int, // current rune width in bytes curr_line_offset: int, + spec: Specification, } -make_tokenizer :: proc(data: string) -> Tokenizer { - t := Tokenizer{pos = {line=1}, data = data}; +make_tokenizer :: proc(data: string, spec := Specification.JSON) -> Tokenizer { + t := Tokenizer{pos = {line=1}, data = data, spec = spec}; next_rune(&t); + if t.r == utf8.RUNE_BOM { + next_rune(&t); + } return t; } @@ -69,6 +76,17 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { } } } + skip_hex_digits :: proc(t: ^Tokenizer) { + for t.offset < len(t.data) { + next_rune(t); + switch t.r { + case '0'..'9', 'a'..'f', 'A'..'F': + // Okay + case: + return; + } + } + } scan_espace :: proc(t: ^Tokenizer) -> bool { switch t.r { @@ -104,12 +122,39 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { t.pos.column = 1; next_rune(t); case: + if t.spec == Specification.JSON5 { + switch t.r { + case 0x2028, 0x2029, 0xFEFF: + next_rune(t); + continue loop; + } + } break loop; } } return t.r; } + skip_to_next_line :: proc(t: ^Tokenizer) { + for t.offset < len(t.data) { + r := next_rune(t); + if r == '\n' { + return; + } + } + } + + skip_alphanum :: proc(t: ^Tokenizer) { + for t.offset < len(t.data) { + switch next_rune(t) { + case 'A'..'Z', 'a'..'z', '0'..'9', '_': + continue; + } + + return; + } + } + skip_whitespace(t); token.pos = t.pos; @@ -118,7 +163,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { curr_rune := t.r; next_rune(t); - switch curr_rune { + block: switch curr_rune { case utf8.RUNE_ERROR: err = Error.Illegal_Character; case utf8.RUNE_EOF, '\x00': @@ -127,21 +172,26 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { case 'A'..'Z', 'a'..'z', '_': token.kind = Kind.Ident; - for t.offset < len(t.data) { - switch next_rune(t) { - case 'A'..'Z', 'a'..'z', '0'..'9', '_': - continue; - } - - break; - } + skip_alphanum(t); switch str := t.data[token.offset:t.offset]; str { case "null": token.kind = Kind.Null; case "false": token.kind = Kind.False; case "true": token.kind = Kind.True; + case: + if t.spec == Specification.JSON5 do switch str { + case "Infinity": token.kind = Kind.Infinity; + case "NaN": token.kind = Kind.NaN; + } } + case '+': + err = Error.Illegal_Character; + if t.spec != Specification.JSON5 { + break; + } + fallthrough; + case '-': switch t.r { case '0'..'9': @@ -149,12 +199,46 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { case: // Illegal use of +/- err = Error.Illegal_Character; - break; + + if t.spec == Specification.JSON5 { + if t.r == 'I' || t.r == 'N' { + skip_alphanum(t); + } + switch t.data[token.offset:t.offset] { + case "-Infinity": token.kind = Kind.Infinity; + case "-NaN": token.kind = Kind.NaN; + } + } + break block; } fallthrough; + case '.': + err = Error.Illegal_Character; + if t.spec == Specification.JSON5 { // Allow leading decimal point + skip_digits(t); + if t.r == 'e' || t.r == 'E' { + switch r := next_rune(t); r { + case '+', '-': + next_rune(t); + } + skip_digits(t); + } + str := t.data[token.offset:t.offset]; + if !is_valid_number(str, t.spec) { + err = Error.Invalid_Number; + } + } + case '0'..'9': token.kind = Kind.Integer; + if t.spec == Specification.JSON5 { // Hexadecimal Numbers + if curr_rune == '0' && (t.r == 'x' || t.r == 'X') { + next_rune(t); + skip_hex_digits(t); + break; + } + } skip_digits(t); if t.r == '.' { @@ -171,11 +255,17 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { } str := t.data[token.offset:t.offset]; - if !is_valid_number(str) { + if !is_valid_number(str, t.spec) { err = Error.Invalid_Number; } + case '\'': + err = Error.Illegal_Character; + if t.spec != Specification.JSON5 { + break; + } + fallthrough; case '"': token.kind = Kind.String; quote := curr_rune; @@ -194,10 +284,11 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { } } - if !is_valid_string_literal(t.data[token.offset : t.offset]) { + if !is_valid_string_literal(t.data[token.offset : t.offset], t.spec) { err = Error.Invalid_String; } + case ',': token.kind = Kind.Comma; case ':': token.kind = Kind.Colon; case '{': token.kind = Kind.Open_Brace; @@ -205,6 +296,30 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { case '[': token.kind = Kind.Open_Bracket; case ']': token.kind = Kind.Close_Bracket; + case '/': + err = Error.Illegal_Character; + if t.spec == Specification.JSON5 { + switch t.r { + case '/': + // Single-line comments + skip_to_next_line(t); + return get_token(t); + case '*': + // None-nested multi-line comments + for t.offset < len(t.data) { + next_rune(t); + if t.r == '*' { + next_rune(t); + if t.r == '/' { + next_rune(t); + return get_token(t); + } + } + } + err = Error.EOF; + } + } + case: err = Error.Illegal_Character; } @@ -215,7 +330,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) { -is_valid_number :: proc(s: string) -> bool { +is_valid_number :: proc(s: string, spec: Specification) -> bool { if s == "" { return false; } @@ -225,6 +340,13 @@ is_valid_number :: proc(s: string) -> bool { if s == "" { return false; } + } else if spec == Specification.JSON5 { + if s[0] == '+' { // Allow positive sign + s = s[1:]; + if s == "" { + return false; + } + } } switch s[0] { @@ -233,10 +355,21 @@ is_valid_number :: proc(s: string) -> bool { case '1'..'9': s = s[1:]; for len(s) > 0 && '0' <= s[0] && s[0] <= '9' do s = s[1:]; + case '.': + if spec == Specification.JSON5 { // Allow leading decimal point + s = s[1:]; + } else { + return false; + } case: return false; } + if spec == Specification.JSON5 { + if len(s) == 1 && s[0] == '.' { // Allow trailing decimal point + return true; + } + } if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' { s = s[2:]; @@ -259,10 +392,23 @@ is_valid_number :: proc(s: string) -> bool { return s == ""; } -is_valid_string_literal :: proc(s: string) -> bool { - if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { +is_valid_string_literal :: proc(s: string, spec: Specification) -> bool { + if len(s) < 2 { + return false; + } + quote := s[0]; + if s[0] != s[len(s)-1] { return false; } + if s[0] != '"' || s[len(s)-1] != '"' { + if spec == Specification.JSON5 { + if s[0] != '\'' || s[len(s)-1] != '\'' { + return false; + } + } else { + return false; + } + } s = s[1 : len(s)-1]; i := 0; @@ -301,7 +447,7 @@ is_valid_string_literal :: proc(s: string) -> bool { case: return false; } - case c == '"', c < ' ': + case c == quote, c < ' ': return false; case c < utf8.RUNE_SELF: diff --git a/core/encoding/json/types.odin b/core/encoding/json/types.odin index d8a10b801..f10136ad0 100644 --- a/core/encoding/json/types.odin +++ b/core/encoding/json/types.odin @@ -2,6 +2,11 @@ package json import "core:strconv" +Specification :: enum { + JSON, + JSON5, +} + Null :: distinct rawptr; Integer :: i64; Float :: f64; diff --git a/core/encoding/json/validator.odin b/core/encoding/json/validator.odin index ac4e62d6b..aa49364ec 100644 --- a/core/encoding/json/validator.odin +++ b/core/encoding/json/validator.odin @@ -3,19 +3,35 @@ package json import "core:mem" // NOTE(bill): is_valid will not check for duplicate keys -is_valid :: proc(data: string) -> bool { - p := make_parser(data, mem.nil_allocator()); +is_valid :: proc(data: string, spec := Specification.JSON) -> bool { + p := make_parser(data, spec, mem.nil_allocator()); + if p.spec == Specification.JSON5 { + return validate_value(&p); + } return validate_object(&p); } +validate_object_key :: proc(p: ^Parser) -> bool { + tok := p.curr_token; + if p.spec == Specification.JSON5 { + if tok.kind == Kind.String { + expect_token(p, Kind.String); + return true; + } else if tok.kind == Kind.Ident { + expect_token(p, Kind.Ident); + return true; + } + } + err := expect_token(p, Kind.String); + return err == Error.None; +} validate_object :: proc(p: ^Parser) -> bool { if err := expect_token(p, Kind.Open_Brace); err != Error.None { return false; } for p.curr_token.kind != Kind.Close_Brace { - tok := p.curr_token; - if tok_err := expect_token(p, Kind.String); tok_err != Error.None { + if !validate_object_key(p) { return false; } if colon_err := expect_token(p, Kind.Colon); colon_err != Error.None { @@ -26,11 +42,18 @@ validate_object :: proc(p: ^Parser) -> bool { return false; } - // Disallow trailing commas for the time being - if allow_token(p, Kind.Comma) { - continue; + if p.spec == Specification.JSON5 { + // Allow trailing commas + if allow_token(p, Kind.Comma) { + continue; + } } else { - break; + // Disallow trailing commas + if allow_token(p, Kind.Comma) { + continue; + } else { + break; + } } } @@ -85,7 +108,7 @@ validate_value :: proc(p: ^Parser) -> bool { return true; case Kind.String: advance_token(p); - return is_valid_string_literal(token.text); + return is_valid_string_literal(token.text, p.spec); case Kind.Open_Brace: return validate_object(p); |