aboutsummaryrefslogtreecommitdiff
path: root/core/encoding
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2019-01-06 21:48:13 +0000
committergingerBill <bill@gingerbill.org>2019-01-06 21:48:13 +0000
commitd1b9f3ac74df5533f1857a26831419aeb560fd2f (patch)
treece7b77ec1fbd83516e0a49d4f4ccc82422f6ab87 /core/encoding
parentd732a5158761578d14e69daf3a94fad9f0a8c23c (diff)
package json; JSON5 support
Diffstat (limited to 'core/encoding')
-rw-r--r--core/encoding/json/parser.odin139
-rw-r--r--core/encoding/json/tokenizer.odin182
-rw-r--r--core/encoding/json/types.odin5
-rw-r--r--core/encoding/json/validator.odin41
4 files changed, 319 insertions, 48 deletions
diff --git a/core/encoding/json/parser.odin b/core/encoding/json/parser.odin
index 3ef8cee93..2c7d79465 100644
--- a/core/encoding/json/parser.odin
+++ b/core/encoding/json/parser.odin
@@ -7,20 +7,27 @@ import "core:strconv"
Parser :: struct {
tok: Tokenizer,
curr_token: Token,
+ spec: Specification,
allocator: mem.Allocator,
}
-make_parser :: proc(data: string, allocator := context.allocator) -> Parser {
+make_parser :: proc(data: string, spec := Specification.JSON, allocator := context.allocator) -> Parser {
p: Parser;
- p.tok = make_tokenizer(data);
+ p.tok = make_tokenizer(data, spec);
+ p.spec = spec;
p.allocator = allocator;
assert(p.allocator.procedure != nil);
advance_token(&p);
return p;
}
-parse :: proc(data: string, allocator := context.allocator) -> (Value, Error) {
- p := make_parser(data, allocator);
+parse :: proc(data: string, spec := Specification.JSON, allocator := context.allocator) -> (Value, Error) {
+ context.allocator = allocator;
+ p := make_parser(data, spec, allocator);
+
+ if p.spec == Specification.JSON5 {
+ return parse_value(&p);
+ }
return parse_object(&p);
}
@@ -77,7 +84,7 @@ parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
advance_token(p);
return;
case Kind.String:
- value.value = String(unquote_string(token, p.allocator));
+ value.value = String(unquote_string(token, p.spec, p.allocator));
advance_token(p);
return;
@@ -132,6 +139,34 @@ parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
return;
}
+clone_string :: proc(s: string, allocator: mem.Allocator) -> string {
+ n := len(s);
+ b := make([]byte, n+1, allocator);
+ copy(b, cast([]byte)s);
+ b[n] = 0;
+ return string(b[:n]);
+}
+
+parse_object_key :: proc(p: ^Parser) -> (key: string, err: Error) {
+ tok := p.curr_token;
+ if p.spec == Specification.JSON5 {
+ if tok.kind == Kind.String {
+ expect_token(p, Kind.String);
+ key = unquote_string(tok, p.spec, p.allocator);
+ return;
+ } else if tok.kind == Kind.Ident {
+ expect_token(p, Kind.Ident);
+ key = clone_string(tok.text, p.allocator);
+ return;
+ }
+ }
+ if tok_err := expect_token(p, Kind.String); tok_err != Error.None {
+ err = Error.Expected_String_For_Object_Key;
+ return;
+ }
+ key = unquote_string(tok, p.spec, p.allocator);
+ return;
+}
parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
value.pos = p.curr_token.pos;
@@ -144,20 +179,20 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
obj.allocator = p.allocator;
defer if err != Error.None {
for key, elem in obj {
- delete(key);
+ delete(key, p.allocator);
destroy_value(elem);
}
delete(obj);
}
for p.curr_token.kind != Kind.Close_Brace {
- tok := p.curr_token;
- if tok_err := expect_token(p, Kind.String); tok_err != Error.None {
- err = Error.Expected_String_For_Object_Key;
+ key: string;
+ key, err = parse_object_key(p);
+ if err != Error.None {
+ delete(key, p.allocator);
value.pos = p.curr_token.pos;
return;
}
- key := unquote_string(tok, p.allocator);
if colon_err := expect_token(p, Kind.Colon); colon_err != Error.None {
err = Error.Expected_Colon_After_Key;
@@ -175,17 +210,24 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
if key in obj {
err = Error.Duplicate_Object_Key;
value.pos = p.curr_token.pos;
- delete(key);
+ delete(key, p.allocator);
return;
}
obj[key] = elem;
- // Disallow trailing commas for the time being
- if allow_token(p, Kind.Comma) {
- continue;
+ if p.spec == Specification.JSON5 {
+ // Allow trailing commas
+ if allow_token(p, Kind.Comma) {
+ continue;
+ }
} else {
- break;
+ // Disallow trailing commas
+ if allow_token(p, Kind.Comma) {
+ continue;
+ } else {
+ break;
+ }
}
}
@@ -200,7 +242,25 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
// IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
-unquote_string :: proc(token: Token, allocator := context.allocator) -> string {
+unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator) -> string {
+ get_u2_rune :: proc(s: string) -> rune {
+ if len(s) < 4 || s[0] != '\\' || s[1] != 'x' {
+ return -1;
+ }
+
+ r: rune;
+ for c in s[2:4] {
+ x: rune;
+ switch c {
+ case '0'..'9': x = c - '0';
+ case 'a'..'f': x = c - 'a' + 10;
+ case 'A'..'F': x = c - 'A' + 10;
+ case: return -1;
+ }
+ r = r*16 + x;
+ }
+ return r;
+ }
get_u4_rune :: proc(s: string) -> rune {
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
return -1;
@@ -227,12 +287,17 @@ unquote_string :: proc(token: Token, allocator := context.allocator) -> string {
if len(s) <= 2 {
return "";
}
+ quote := s[0];
+ if s[0] != s[len(s)-1] {
+ // Invalid string
+ return "";
+ }
s = s[1:len(s)-1];
i := 0;
for i < len(s) {
c := s[i];
- if c == '\\' || c == '"' || c < ' ' {
+ if c == '\\' || c == quote || c < ' ' {
break;
}
if c < utf8.RUNE_SELF {
@@ -246,9 +311,7 @@ unquote_string :: proc(token: Token, allocator := context.allocator) -> string {
i += w;
}
if i == len(s) {
- b := make([]byte, len(s), allocator);
- copy(b, cast([]byte)s);
- return string(b);
+ return clone_string(s, allocator);
}
b := make([]byte, len(s) + 2*utf8.UTF_MAX, allocator);
@@ -299,9 +362,43 @@ unquote_string :: proc(token: Token, allocator := context.allocator) -> string {
buf, buf_width := utf8.encode_rune(r);
copy(b[w:], buf[:buf_width]);
w += buf_width;
+
+
+ case '0':
+ if spec == Specification.JSON5 {
+ b[w] = '\x00';
+ i += 1;
+ w += 1;
+ } else {
+ break loop;
+ }
+ case 'v':
+ if spec == Specification.JSON5 {
+ b[w] = '\v';
+ i += 1;
+ w += 1;
+ } else {
+ break loop;
+ }
+
+ case 'x':
+ if spec == Specification.JSON5 {
+ i -= 1; // Include the \x in the check for sanity sake
+ r := get_u2_rune(s[i:]);
+ if r < 0 {
+ break loop;
+ }
+ i += 4;
+
+ buf, buf_width := utf8.encode_rune(r);
+ copy(b[w:], buf[:buf_width]);
+ w += buf_width;
+ } else {
+ break loop;
+ }
}
- case c == '"', c < ' ':
+ case c == quote, c < ' ':
break loop;
case c < utf8.RUNE_SELF:
diff --git a/core/encoding/json/tokenizer.odin b/core/encoding/json/tokenizer.odin
index dfa20a6a7..3cada4b45 100644
--- a/core/encoding/json/tokenizer.odin
+++ b/core/encoding/json/tokenizer.odin
@@ -15,6 +15,9 @@ Kind :: enum {
False,
True,
+ Infinity,
+ NaN,
+
Ident,
Integer,
@@ -37,13 +40,17 @@ Tokenizer :: struct {
r: rune, // current rune
w: int, // current rune width in bytes
curr_line_offset: int,
+ spec: Specification,
}
-make_tokenizer :: proc(data: string) -> Tokenizer {
- t := Tokenizer{pos = {line=1}, data = data};
+make_tokenizer :: proc(data: string, spec := Specification.JSON) -> Tokenizer {
+ t := Tokenizer{pos = {line=1}, data = data, spec = spec};
next_rune(&t);
+ if t.r == utf8.RUNE_BOM {
+ next_rune(&t);
+ }
return t;
}
@@ -69,6 +76,17 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
}
}
}
+ skip_hex_digits :: proc(t: ^Tokenizer) {
+ for t.offset < len(t.data) {
+ next_rune(t);
+ switch t.r {
+ case '0'..'9', 'a'..'f', 'A'..'F':
+ // Okay
+ case:
+ return;
+ }
+ }
+ }
scan_espace :: proc(t: ^Tokenizer) -> bool {
switch t.r {
@@ -104,12 +122,39 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
t.pos.column = 1;
next_rune(t);
case:
+ if t.spec == Specification.JSON5 {
+ switch t.r {
+ case 0x2028, 0x2029, 0xFEFF:
+ next_rune(t);
+ continue loop;
+ }
+ }
break loop;
}
}
return t.r;
}
+ skip_to_next_line :: proc(t: ^Tokenizer) {
+ for t.offset < len(t.data) {
+ r := next_rune(t);
+ if r == '\n' {
+ return;
+ }
+ }
+ }
+
+ skip_alphanum :: proc(t: ^Tokenizer) {
+ for t.offset < len(t.data) {
+ switch next_rune(t) {
+ case 'A'..'Z', 'a'..'z', '0'..'9', '_':
+ continue;
+ }
+
+ return;
+ }
+ }
+
skip_whitespace(t);
token.pos = t.pos;
@@ -118,7 +163,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
curr_rune := t.r;
next_rune(t);
- switch curr_rune {
+ block: switch curr_rune {
case utf8.RUNE_ERROR:
err = Error.Illegal_Character;
case utf8.RUNE_EOF, '\x00':
@@ -127,21 +172,26 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
case 'A'..'Z', 'a'..'z', '_':
token.kind = Kind.Ident;
- for t.offset < len(t.data) {
- switch next_rune(t) {
- case 'A'..'Z', 'a'..'z', '0'..'9', '_':
- continue;
- }
-
- break;
- }
+ skip_alphanum(t);
switch str := t.data[token.offset:t.offset]; str {
case "null": token.kind = Kind.Null;
case "false": token.kind = Kind.False;
case "true": token.kind = Kind.True;
+ case:
+ if t.spec == Specification.JSON5 do switch str {
+ case "Infinity": token.kind = Kind.Infinity;
+ case "NaN": token.kind = Kind.NaN;
+ }
}
+ case '+':
+ err = Error.Illegal_Character;
+ if t.spec != Specification.JSON5 {
+ break;
+ }
+ fallthrough;
+
case '-':
switch t.r {
case '0'..'9':
@@ -149,12 +199,46 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
case:
// Illegal use of +/-
err = Error.Illegal_Character;
- break;
+
+ if t.spec == Specification.JSON5 {
+ if t.r == 'I' || t.r == 'N' {
+ skip_alphanum(t);
+ }
+ switch t.data[token.offset:t.offset] {
+ case "-Infinity": token.kind = Kind.Infinity;
+ case "-NaN": token.kind = Kind.NaN;
+ }
+ }
+ break block;
}
fallthrough;
+ case '.':
+ err = Error.Illegal_Character;
+ if t.spec == Specification.JSON5 { // Allow leading decimal point
+ skip_digits(t);
+ if t.r == 'e' || t.r == 'E' {
+ switch r := next_rune(t); r {
+ case '+', '-':
+ next_rune(t);
+ }
+ skip_digits(t);
+ }
+ str := t.data[token.offset:t.offset];
+ if !is_valid_number(str, t.spec) {
+ err = Error.Invalid_Number;
+ }
+ }
+
case '0'..'9':
token.kind = Kind.Integer;
+ if t.spec == Specification.JSON5 { // Hexadecimal Numbers
+ if curr_rune == '0' && (t.r == 'x' || t.r == 'X') {
+ next_rune(t);
+ skip_hex_digits(t);
+ break;
+ }
+ }
skip_digits(t);
if t.r == '.' {
@@ -171,11 +255,17 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
}
str := t.data[token.offset:t.offset];
- if !is_valid_number(str) {
+ if !is_valid_number(str, t.spec) {
err = Error.Invalid_Number;
}
+ case '\'':
+ err = Error.Illegal_Character;
+ if t.spec != Specification.JSON5 {
+ break;
+ }
+ fallthrough;
case '"':
token.kind = Kind.String;
quote := curr_rune;
@@ -194,10 +284,11 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
}
}
- if !is_valid_string_literal(t.data[token.offset : t.offset]) {
+ if !is_valid_string_literal(t.data[token.offset : t.offset], t.spec) {
err = Error.Invalid_String;
}
+
case ',': token.kind = Kind.Comma;
case ':': token.kind = Kind.Colon;
case '{': token.kind = Kind.Open_Brace;
@@ -205,6 +296,30 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
case '[': token.kind = Kind.Open_Bracket;
case ']': token.kind = Kind.Close_Bracket;
+ case '/':
+ err = Error.Illegal_Character;
+ if t.spec == Specification.JSON5 {
+ switch t.r {
+ case '/':
+ // Single-line comments
+ skip_to_next_line(t);
+ return get_token(t);
+ case '*':
+ // None-nested multi-line comments
+ for t.offset < len(t.data) {
+ next_rune(t);
+ if t.r == '*' {
+ next_rune(t);
+ if t.r == '/' {
+ next_rune(t);
+ return get_token(t);
+ }
+ }
+ }
+ err = Error.EOF;
+ }
+ }
+
case: err = Error.Illegal_Character;
}
@@ -215,7 +330,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
-is_valid_number :: proc(s: string) -> bool {
+is_valid_number :: proc(s: string, spec: Specification) -> bool {
if s == "" {
return false;
}
@@ -225,6 +340,13 @@ is_valid_number :: proc(s: string) -> bool {
if s == "" {
return false;
}
+ } else if spec == Specification.JSON5 {
+ if s[0] == '+' { // Allow positive sign
+ s = s[1:];
+ if s == "" {
+ return false;
+ }
+ }
}
switch s[0] {
@@ -233,10 +355,21 @@ is_valid_number :: proc(s: string) -> bool {
case '1'..'9':
s = s[1:];
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' do s = s[1:];
+ case '.':
+ if spec == Specification.JSON5 { // Allow leading decimal point
+ s = s[1:];
+ } else {
+ return false;
+ }
case:
return false;
}
+ if spec == Specification.JSON5 {
+ if len(s) == 1 && s[0] == '.' { // Allow trailing decimal point
+ return true;
+ }
+ }
if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' {
s = s[2:];
@@ -259,10 +392,23 @@ is_valid_number :: proc(s: string) -> bool {
return s == "";
}
-is_valid_string_literal :: proc(s: string) -> bool {
- if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
+is_valid_string_literal :: proc(s: string, spec: Specification) -> bool {
+ if len(s) < 2 {
+ return false;
+ }
+ quote := s[0];
+ if s[0] != s[len(s)-1] {
return false;
}
+ if s[0] != '"' || s[len(s)-1] != '"' {
+ if spec == Specification.JSON5 {
+ if s[0] != '\'' || s[len(s)-1] != '\'' {
+ return false;
+ }
+ } else {
+ return false;
+ }
+ }
s = s[1 : len(s)-1];
i := 0;
@@ -301,7 +447,7 @@ is_valid_string_literal :: proc(s: string) -> bool {
case: return false;
}
- case c == '"', c < ' ':
+ case c == quote, c < ' ':
return false;
case c < utf8.RUNE_SELF:
diff --git a/core/encoding/json/types.odin b/core/encoding/json/types.odin
index d8a10b801..f10136ad0 100644
--- a/core/encoding/json/types.odin
+++ b/core/encoding/json/types.odin
@@ -2,6 +2,11 @@ package json
import "core:strconv"
+Specification :: enum {
+ JSON,
+ JSON5,
+}
+
Null :: distinct rawptr;
Integer :: i64;
Float :: f64;
diff --git a/core/encoding/json/validator.odin b/core/encoding/json/validator.odin
index ac4e62d6b..aa49364ec 100644
--- a/core/encoding/json/validator.odin
+++ b/core/encoding/json/validator.odin
@@ -3,19 +3,35 @@ package json
import "core:mem"
// NOTE(bill): is_valid will not check for duplicate keys
-is_valid :: proc(data: string) -> bool {
- p := make_parser(data, mem.nil_allocator());
+is_valid :: proc(data: string, spec := Specification.JSON) -> bool {
+ p := make_parser(data, spec, mem.nil_allocator());
+ if p.spec == Specification.JSON5 {
+ return validate_value(&p);
+ }
return validate_object(&p);
}
+validate_object_key :: proc(p: ^Parser) -> bool {
+ tok := p.curr_token;
+ if p.spec == Specification.JSON5 {
+ if tok.kind == Kind.String {
+ expect_token(p, Kind.String);
+ return true;
+ } else if tok.kind == Kind.Ident {
+ expect_token(p, Kind.Ident);
+ return true;
+ }
+ }
+ err := expect_token(p, Kind.String);
+ return err == Error.None;
+}
validate_object :: proc(p: ^Parser) -> bool {
if err := expect_token(p, Kind.Open_Brace); err != Error.None {
return false;
}
for p.curr_token.kind != Kind.Close_Brace {
- tok := p.curr_token;
- if tok_err := expect_token(p, Kind.String); tok_err != Error.None {
+ if !validate_object_key(p) {
return false;
}
if colon_err := expect_token(p, Kind.Colon); colon_err != Error.None {
@@ -26,11 +42,18 @@ validate_object :: proc(p: ^Parser) -> bool {
return false;
}
- // Disallow trailing commas for the time being
- if allow_token(p, Kind.Comma) {
- continue;
+ if p.spec == Specification.JSON5 {
+ // Allow trailing commas
+ if allow_token(p, Kind.Comma) {
+ continue;
+ }
} else {
- break;
+ // Disallow trailing commas
+ if allow_token(p, Kind.Comma) {
+ continue;
+ } else {
+ break;
+ }
}
}
@@ -85,7 +108,7 @@ validate_value :: proc(p: ^Parser) -> bool {
return true;
case Kind.String:
advance_token(p);
- return is_valid_string_literal(token.text);
+ return is_valid_string_literal(token.text, p.spec);
case Kind.Open_Brace:
return validate_object(p);