aboutsummaryrefslogtreecommitdiff
path: root/core/encoding/json
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2019-01-06 20:37:12 +0000
committergingerBill <bill@gingerbill.org>2019-01-06 20:37:12 +0000
commitc5def602241aba33a4c111edc22577d9490c8ef2 (patch)
tree829b9fd3882da7d9f2b90cbe853fd97da2a2508d /core/encoding/json
parent6e6a05382318658bbab8040f6c4ae66bc9f1044c (diff)
Begin work on `package json`
Diffstat (limited to 'core/encoding/json')
-rw-r--r--core/encoding/json/parser.odin323
-rw-r--r--core/encoding/json/tokenizer.odin322
-rw-r--r--core/encoding/json/types.odin70
3 files changed, 715 insertions, 0 deletions
diff --git a/core/encoding/json/parser.odin b/core/encoding/json/parser.odin
new file mode 100644
index 000000000..2b66a1558
--- /dev/null
+++ b/core/encoding/json/parser.odin
@@ -0,0 +1,323 @@
+package json
+
+import "core:mem"
+import "core:unicode/utf8"
+import "core:strconv"
+import "core:strings"
+
+Parser :: struct {
+ tok: Tokenizer,
+ curr_token: Token,
+ allocator: mem.Allocator,
+}
+
+make_parser :: proc(data: string, allocator := context.allocator) -> Parser {
+ p: Parser;
+ p.tok = make_tokenizer(data);
+ p.allocator = allocator;
+ assert(p.allocator.procedure != nil);
+ advance_token(&p);
+ return p;
+}
+
+parse :: proc(data: string, allocator := context.allocator) -> (Value, Error) {
+ p := make_parser(data, allocator);
+ return parse_object(&p);
+}
+
+advance_token :: proc(p: ^Parser) -> (Token, Error) {
+ err: Error;
+ prev := p.curr_token;
+ p.curr_token, err = get_token(&p.tok);
+ return prev, err;
+}
+
+
+allow_token :: proc(p: ^Parser, kind: Kind) -> bool {
+ if p.curr_token.kind == kind {
+ advance_token(p);
+ return true;
+ }
+ return false;
+}
+
+expect_token :: proc(p: ^Parser, kind: Kind) -> Error {
+ prev := p.curr_token;
+ advance_token(p);
+ if prev.kind == kind {
+ return Error.None;
+ }
+ return Error.Unexpected_Token;
+}
+
+
+
+parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
+ value.pos = p.curr_token.pos;
+ token := p.curr_token;
+ switch token.kind {
+ case Kind.Null:
+ value.value = Null{};
+ advance_token(p);
+ return;
+ case Kind.False:
+ value.value = Boolean(false);
+ advance_token(p);
+ return;
+ case Kind.True:
+ value.value = Boolean(true);
+ advance_token(p);
+ return;
+
+ case Kind.Integer:
+ value.value = Integer(strconv.parse_i64(token.text));
+ advance_token(p);
+ return;
+ case Kind.Float:
+ value.value = Float(strconv.parse_f64(token.text));
+ advance_token(p);
+ return;
+ case Kind.String:
+ value.value = String(unquote_string(token, p.allocator));
+ advance_token(p);
+ return;
+
+ case Kind.Open_Brace:
+ return parse_object(p);
+
+ case Kind.Open_Bracket:
+ return parse_array(p);
+ }
+
+ err = Error.Unexpected_Token;
+ advance_token(p);
+ return;
+}
+
+parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
+ value.pos = p.curr_token.pos;
+ if err = expect_token(p, Kind.Open_Bracket); err != Error.None {
+ return;
+ }
+
+ array: Array;
+ array.allocator = p.allocator;
+ defer if err != Error.None {
+ for elem in array {
+ destroy_value(elem);
+ }
+ delete(array);
+ }
+
+ for p.curr_token.kind != Kind.Close_Bracket {
+ elem, elem_err := parse_value(p);
+ if elem_err != Error.None {
+ err = elem_err;
+ return;
+ }
+ append(&array, elem);
+
+ // Disallow trailing commas for the time being
+ if allow_token(p, Kind.Comma) {
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ if err = expect_token(p, Kind.Close_Bracket); err != Error.None {
+ return;
+ }
+
+ value.value = array;
+ return;
+}
+
+
+parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
+ value.pos = p.curr_token.pos;
+ if err = expect_token(p, Kind.Open_Brace); err != Error.None {
+ value.pos = p.curr_token.pos;
+ return;
+ }
+
+ obj: Object;
+ obj.allocator = p.allocator;
+ defer if err != Error.None {
+ for key, elem in obj {
+ delete(key);
+ destroy_value(elem);
+ }
+ delete(obj);
+ }
+
+ for p.curr_token.kind != Kind.Close_Brace {
+ tok := p.curr_token;
+ if tok_err := expect_token(p, Kind.String); tok_err != Error.None {
+ err = Error.Expected_String_For_Object_Key;
+ value.pos = p.curr_token.pos;
+ return;
+ }
+ key := unquote_string(tok, p.allocator);
+
+ if colon_err := expect_token(p, Kind.Colon); colon_err != Error.None {
+ err = Error.Expected_Colon_After_Key;
+ value.pos = p.curr_token.pos;
+ return;
+ }
+
+ elem, elem_err := parse_value(p);
+ if elem_err != Error.None {
+ err = elem_err;
+ value.pos = p.curr_token.pos;
+ return;
+ }
+
+ if key in obj {
+ err = Error.Duplicate_Object_Key;
+ value.pos = p.curr_token.pos;
+ delete(key);
+ return;
+ }
+
+ obj[key] = elem;
+
+ // Disallow trailing commas for the time being
+ if allow_token(p, Kind.Comma) {
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ if err = expect_token(p, Kind.Close_Brace); err != Error.None {
+ value.pos = p.curr_token.pos;
+ return;
+ }
+
+ value.value = obj;
+ return;
+}
+
+
+// IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
+unquote_string :: proc(token: Token, allocator := context.allocator) -> string {
+ get_u4_rune :: proc(s: string) -> rune {
+ if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
+ return -1;
+ }
+
+ r: rune;
+ for c in s[2:6] {
+ x: rune;
+ switch c {
+ case '0'..'9': x = c - '0';
+ case 'a'..'f': x = c - 'a' + 10;
+ case 'A'..'F': x = c - 'A' + 10;
+ case: return -1;
+ }
+ r = r*16 + x;
+ }
+ return r;
+ }
+
+ if token.kind != Kind.String {
+ return "";
+ }
+ s := token.text;
+ if len(s) <= 2 {
+ return "";
+ }
+ s = s[1:len(s)-1];
+
+ i := 0;
+ for i < len(s) {
+ c := s[i];
+ if c == '\\' || c == '"' || c < ' ' {
+ break;
+ }
+ if c < utf8.RUNE_SELF {
+ i += 1;
+ continue;
+ }
+ r, w := utf8.decode_rune_in_string(s);
+ if r == utf8.RUNE_ERROR && w == 1 {
+ break;
+ }
+ i += w;
+ }
+ if i == len(s) {
+ return strings.new_string(s, allocator);
+ }
+
+ b := make([]byte, len(s) + 2*utf8.UTF_MAX, allocator);
+ w := copy(b, cast([]byte)s[0:i]);
+ loop: for i < len(s) {
+ c := s[i];
+ switch {
+ case c == '\\':
+ i += 1;
+ if i >= len(s) {
+ break loop;
+ }
+ switch s[i] {
+ case: break loop;
+ case '"', '\'', '\\', '/':
+ b[w] = s[i];
+ i += 1;
+ w += 1;
+
+ case 'b':
+ b[w] = '\b';
+ i += 1;
+ w += 1;
+ case 'f':
+ b[w] = '\f';
+ i += 1;
+ w += 1;
+ case 'r':
+ b[w] = '\r';
+ i += 1;
+ w += 1;
+ case 't':
+ b[w] = '\t';
+ i += 1;
+ w += 1;
+ case 'n':
+ b[w] = '\n';
+ i += 1;
+ w += 1;
+ case 'u':
+ i -= 1; // Include the \u in the check for sanity sake
+ r := get_u4_rune(s[i:]);
+ if r < 0 {
+ break loop;
+ }
+ i += 6;
+
+ buf, buf_width := utf8.encode_rune(r);
+ copy(b[w:], buf[:buf_width]);
+ w += buf_width;
+ }
+
+ case c == '"', c < ' ':
+ break loop;
+
+ case c < utf8.RUNE_SELF:
+ b[w] = c;
+ i += 1;
+ w += 1;
+
+ case:
+ r, width := utf8.decode_rune_in_string(s[i:]);
+ i += width;
+
+ buf, buf_width := utf8.encode_rune(r);
+ assert(buf_width <= width);
+ copy(b[w:], buf[:buf_width]);
+ w += buf_width;
+ }
+ }
+
+ return string(b[:w]);
+}
diff --git a/core/encoding/json/tokenizer.odin b/core/encoding/json/tokenizer.odin
new file mode 100644
index 000000000..dfa20a6a7
--- /dev/null
+++ b/core/encoding/json/tokenizer.odin
@@ -0,0 +1,322 @@
+package json
+
+import "core:unicode/utf8"
+
+Token :: struct {
+ using pos: Pos,
+ kind: Kind,
+ text: string,
+}
+
+Kind :: enum {
+ Invalid,
+
+ Null,
+ False,
+ True,
+
+ Ident,
+
+ Integer,
+ Float,
+ String,
+
+ Colon,
+ Comma,
+
+ Open_Brace,
+ Close_Brace,
+
+ Open_Bracket,
+ Close_Bracket,
+}
+
+Tokenizer :: struct {
+ using pos: Pos,
+ data: string,
+ r: rune, // current rune
+ w: int, // current rune width in bytes
+ curr_line_offset: int,
+}
+
+
+
+make_tokenizer :: proc(data: string) -> Tokenizer {
+ t := Tokenizer{pos = {line=1}, data = data};
+ next_rune(&t);
+ return t;
+}
+
+next_rune :: proc(t: ^Tokenizer) -> rune #no_bounds_check {
+ if t.offset >= len(t.data) {
+ return utf8.RUNE_EOF;
+ }
+ t.offset += t.w;
+ t.r, t.w = utf8.decode_rune_in_string(t.data[t.offset:]);
+ t.pos.column = t.offset - t.curr_line_offset;
+ return t.r;
+}
+
+
+get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
+ skip_digits :: proc(t: ^Tokenizer) {
+ for t.offset < len(t.data) {
+ next_rune(t);
+ if '0' <= t.r && t.r <= '9' {
+ // Okay
+ } else {
+ return;
+ }
+ }
+ }
+
+ scan_espace :: proc(t: ^Tokenizer) -> bool {
+ switch t.r {
+ case '"', '\'', '\\', '/', 'b', 'n', 'r', 't', 'f':
+ next_rune(t);
+ return true;
+ case 'u':
+ // Expect 4 hexadecimal digits
+ for i := 0; i < 4; i += 1 {
+ r := next_rune(t);
+ switch r {
+ case '0'..'9', 'a'..'f', 'A'..'F':
+ // Okay
+ case:
+ return false;
+ }
+ }
+ case:
+ // Ignore the next rune regardless
+ next_rune(t);
+ }
+ return false;
+ }
+
+ skip_whitespace :: proc(t: ^Tokenizer) -> rune {
+ loop: for t.offset < len(t.data) {
+ switch t.r {
+ case ' ', '\t', '\v', '\f', '\r':
+ next_rune(t);
+ case '\n':
+ t.line += 1;
+ t.curr_line_offset = t.offset;
+ t.pos.column = 1;
+ next_rune(t);
+ case:
+ break loop;
+ }
+ }
+ return t.r;
+ }
+
+ skip_whitespace(t);
+
+ token.pos = t.pos;
+ token.kind = Kind.Invalid;
+
+ curr_rune := t.r;
+ next_rune(t);
+
+ switch curr_rune {
+ case utf8.RUNE_ERROR:
+ err = Error.Illegal_Character;
+ case utf8.RUNE_EOF, '\x00':
+ err = Error.EOF;
+
+ case 'A'..'Z', 'a'..'z', '_':
+ token.kind = Kind.Ident;
+
+ for t.offset < len(t.data) {
+ switch next_rune(t) {
+ case 'A'..'Z', 'a'..'z', '0'..'9', '_':
+ continue;
+ }
+
+ break;
+ }
+
+ switch str := t.data[token.offset:t.offset]; str {
+ case "null": token.kind = Kind.Null;
+ case "false": token.kind = Kind.False;
+ case "true": token.kind = Kind.True;
+ }
+
+ case '-':
+ switch t.r {
+ case '0'..'9':
+ // Okay
+ case:
+ // Illegal use of +/-
+ err = Error.Illegal_Character;
+ break;
+ }
+ fallthrough;
+
+ case '0'..'9':
+ token.kind = Kind.Integer;
+
+ skip_digits(t);
+ if t.r == '.' {
+ token.kind = Kind.Float;
+ next_rune(t);
+ skip_digits(t);
+ }
+ if t.r == 'e' || t.r == 'E' {
+ switch r := next_rune(t); r {
+ case '+', '-':
+ next_rune(t);
+ }
+ skip_digits(t);
+ }
+
+ str := t.data[token.offset:t.offset];
+ if !is_valid_number(str) {
+ err = Error.Invalid_Number;
+ }
+
+
+ case '"':
+ token.kind = Kind.String;
+ quote := curr_rune;
+ for t.offset < len(t.data) {
+ r := t.r;
+ if r == '\n' || r < 0 {
+ err = Error.String_Not_Terminated;
+ break;
+ }
+ next_rune(t);
+ if r == quote {
+ break;
+ }
+ if r == '\\' {
+ scan_espace(t);
+ }
+ }
+
+ if !is_valid_string_literal(t.data[token.offset : t.offset]) {
+ err = Error.Invalid_String;
+ }
+
+ case ',': token.kind = Kind.Comma;
+ case ':': token.kind = Kind.Colon;
+ case '{': token.kind = Kind.Open_Brace;
+ case '}': token.kind = Kind.Close_Brace;
+ case '[': token.kind = Kind.Open_Bracket;
+ case ']': token.kind = Kind.Close_Bracket;
+
+ case: err = Error.Illegal_Character;
+ }
+
+ token.text = t.data[token.offset : t.offset];
+
+ return;
+}
+
+
+
+is_valid_number :: proc(s: string) -> bool {
+ if s == "" {
+ return false;
+ }
+
+ if s[0] == '-' {
+ s = s[1:];
+ if s == "" {
+ return false;
+ }
+ }
+
+ switch s[0] {
+ case '0':
+ s = s[1:];
+ case '1'..'9':
+ s = s[1:];
+ for len(s) > 0 && '0' <= s[0] && s[0] <= '9' do s = s[1:];
+ case:
+ return false;
+ }
+
+
+ if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' {
+ s = s[2:];
+ for len(s) > 0 && '0' <= s[0] && s[0] <= '9' do s = s[1:];
+ }
+
+ if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
+ s = s[1:];
+ switch s[0] {
+ case '+', '-':
+ s = s[1:];
+ if s == "" {
+ return false;
+ }
+ }
+ for len(s) > 0 && '0' <= s[0] && s[0] <= '9' do s = s[1:];
+ }
+
+ // The string should be empty now to be valid
+ return s == "";
+}
+
+is_valid_string_literal :: proc(s: string) -> bool {
+ if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
+ return false;
+ }
+ s = s[1 : len(s)-1];
+
+ i := 0;
+ for i < len(s) {
+ c := s[i];
+ switch {
+ case c == '\\':
+ i += 1;
+ if i >= len(s) {
+ return false;
+ }
+ switch s[i] {
+ case '"', '\'', '\\', '/', 'b', 'n', 'r', 't', 'f':
+ i += 1;
+ case 'u':
+ if i >= len(s) {
+ return false;
+ }
+ hex := s[i+1:];
+ if len(hex) < 4 {
+ return false;
+ }
+ hex = hex[:4];
+ i += 5;
+
+ for j := 0; j < 4; j += 1 {
+ c := hex[j];
+ switch c {
+ case '0'..'9', 'a'..'z', 'A'..'Z':
+ // Okay
+ case:
+ return false;
+ }
+ }
+
+ case: return false;
+ }
+
+ case c == '"', c < ' ':
+ return false;
+
+ case c < utf8.RUNE_SELF:
+ i += 1;
+
+ case:
+ r, width := utf8.decode_rune_in_string(s[i:]);
+ if r == utf8.RUNE_ERROR && width == 1 {
+ return false;
+ }
+ i += width;
+ }
+ }
+ if i == len(s) {
+ return true;
+ }
+ return true;
+}
diff --git a/core/encoding/json/types.odin b/core/encoding/json/types.odin
new file mode 100644
index 000000000..d8a10b801
--- /dev/null
+++ b/core/encoding/json/types.odin
@@ -0,0 +1,70 @@
+package json
+
+import "core:strconv"
+
+Null :: distinct rawptr;
+Integer :: i64;
+Float :: f64;
+Boolean :: bool;
+String :: string;
+Array :: distinct [dynamic]Value;
+Object :: distinct map[string]Value;
+
+Value :: struct {
+ pos: Pos,
+ value: union {
+ Null,
+ Integer,
+ Float,
+ Boolean,
+ String,
+ Array,
+ Object,
+ }
+}
+
+Pos :: struct {
+ offset: int,
+ line: int,
+ column: int,
+}
+
+
+Error :: enum {
+ None,
+
+ EOF, // Not necessarily an error
+
+ // Tokenizing Errors
+ Illegal_Character,
+ Invalid_Number,
+ String_Not_Terminated,
+ Invalid_String,
+
+
+ // Parsing Errors
+ Unexpected_Token,
+ Expected_String_For_Object_Key,
+ Duplicate_Object_Key,
+ Expected_Colon_After_Key,
+}
+
+
+
+
+destroy_value :: proc(value: Value) {
+ switch v in value.value {
+ case Object:
+ for key, elem in v {
+ delete(key);
+ destroy_value(elem);
+ }
+ delete(v);
+ case Array:
+ for elem in v do destroy_value(elem);
+ delete(v);
+ case String:
+ delete(v);
+ }
+}
+