diff options
| author | gingerBill <bill@gingerbill.org> | 2019-01-06 20:37:12 +0000 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2019-01-06 20:37:12 +0000 |
| commit | c5def602241aba33a4c111edc22577d9490c8ef2 (patch) | |
| tree | 829b9fd3882da7d9f2b90cbe853fd97da2a2508d /core/encoding/json/parser.odin | |
| parent | 6e6a05382318658bbab8040f6c4ae66bc9f1044c (diff) | |
Begin work on `package json`
Diffstat (limited to 'core/encoding/json/parser.odin')
| -rw-r--r-- | core/encoding/json/parser.odin | 323 |
1 files changed, 323 insertions, 0 deletions
diff --git a/core/encoding/json/parser.odin b/core/encoding/json/parser.odin new file mode 100644 index 000000000..2b66a1558 --- /dev/null +++ b/core/encoding/json/parser.odin @@ -0,0 +1,323 @@ +package json + +import "core:mem" +import "core:unicode/utf8" +import "core:strconv" +import "core:strings" + +Parser :: struct { + tok: Tokenizer, + curr_token: Token, + allocator: mem.Allocator, +} + +make_parser :: proc(data: string, allocator := context.allocator) -> Parser { + p: Parser; + p.tok = make_tokenizer(data); + p.allocator = allocator; + assert(p.allocator.procedure != nil); + advance_token(&p); + return p; +} + +parse :: proc(data: string, allocator := context.allocator) -> (Value, Error) { + p := make_parser(data, allocator); + return parse_object(&p); +} + +advance_token :: proc(p: ^Parser) -> (Token, Error) { + err: Error; + prev := p.curr_token; + p.curr_token, err = get_token(&p.tok); + return prev, err; +} + + +allow_token :: proc(p: ^Parser, kind: Kind) -> bool { + if p.curr_token.kind == kind { + advance_token(p); + return true; + } + return false; +} + +expect_token :: proc(p: ^Parser, kind: Kind) -> Error { + prev := p.curr_token; + advance_token(p); + if prev.kind == kind { + return Error.None; + } + return Error.Unexpected_Token; +} + + + +parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) { + value.pos = p.curr_token.pos; + token := p.curr_token; + switch token.kind { + case Kind.Null: + value.value = Null{}; + advance_token(p); + return; + case Kind.False: + value.value = Boolean(false); + advance_token(p); + return; + case Kind.True: + value.value = Boolean(true); + advance_token(p); + return; + + case Kind.Integer: + value.value = Integer(strconv.parse_i64(token.text)); + advance_token(p); + return; + case Kind.Float: + value.value = Float(strconv.parse_f64(token.text)); + advance_token(p); + return; + case Kind.String: + value.value = String(unquote_string(token, p.allocator)); + advance_token(p); + return; + + case Kind.Open_Brace: + return parse_object(p); + + case Kind.Open_Bracket: + return parse_array(p); + } + + err = Error.Unexpected_Token; + advance_token(p); + return; +} + +parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) { + value.pos = p.curr_token.pos; + if err = expect_token(p, Kind.Open_Bracket); err != Error.None { + return; + } + + array: Array; + array.allocator = p.allocator; + defer if err != Error.None { + for elem in array { + destroy_value(elem); + } + delete(array); + } + + for p.curr_token.kind != Kind.Close_Bracket { + elem, elem_err := parse_value(p); + if elem_err != Error.None { + err = elem_err; + return; + } + append(&array, elem); + + // Disallow trailing commas for the time being + if allow_token(p, Kind.Comma) { + continue; + } else { + break; + } + } + + if err = expect_token(p, Kind.Close_Bracket); err != Error.None { + return; + } + + value.value = array; + return; +} + + +parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) { + value.pos = p.curr_token.pos; + if err = expect_token(p, Kind.Open_Brace); err != Error.None { + value.pos = p.curr_token.pos; + return; + } + + obj: Object; + obj.allocator = p.allocator; + defer if err != Error.None { + for key, elem in obj { + delete(key); + destroy_value(elem); + } + delete(obj); + } + + for p.curr_token.kind != Kind.Close_Brace { + tok := p.curr_token; + if tok_err := expect_token(p, Kind.String); tok_err != Error.None { + err = Error.Expected_String_For_Object_Key; + value.pos = p.curr_token.pos; + return; + } + key := unquote_string(tok, p.allocator); + + if colon_err := expect_token(p, Kind.Colon); colon_err != Error.None { + err = Error.Expected_Colon_After_Key; + value.pos = p.curr_token.pos; + return; + } + + elem, elem_err := parse_value(p); + if elem_err != Error.None { + err = elem_err; + value.pos = p.curr_token.pos; + return; + } + + if key in obj { + err = Error.Duplicate_Object_Key; + value.pos = p.curr_token.pos; + delete(key); + return; + } + + obj[key] = elem; + + // Disallow trailing commas for the time being + if allow_token(p, Kind.Comma) { + continue; + } else { + break; + } + } + + if err = expect_token(p, Kind.Close_Brace); err != Error.None { + value.pos = p.curr_token.pos; + return; + } + + value.value = obj; + return; +} + + +// IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string +unquote_string :: proc(token: Token, allocator := context.allocator) -> string { + get_u4_rune :: proc(s: string) -> rune { + if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { + return -1; + } + + r: rune; + for c in s[2:6] { + x: rune; + switch c { + case '0'..'9': x = c - '0'; + case 'a'..'f': x = c - 'a' + 10; + case 'A'..'F': x = c - 'A' + 10; + case: return -1; + } + r = r*16 + x; + } + return r; + } + + if token.kind != Kind.String { + return ""; + } + s := token.text; + if len(s) <= 2 { + return ""; + } + s = s[1:len(s)-1]; + + i := 0; + for i < len(s) { + c := s[i]; + if c == '\\' || c == '"' || c < ' ' { + break; + } + if c < utf8.RUNE_SELF { + i += 1; + continue; + } + r, w := utf8.decode_rune_in_string(s); + if r == utf8.RUNE_ERROR && w == 1 { + break; + } + i += w; + } + if i == len(s) { + return strings.new_string(s, allocator); + } + + b := make([]byte, len(s) + 2*utf8.UTF_MAX, allocator); + w := copy(b, cast([]byte)s[0:i]); + loop: for i < len(s) { + c := s[i]; + switch { + case c == '\\': + i += 1; + if i >= len(s) { + break loop; + } + switch s[i] { + case: break loop; + case '"', '\'', '\\', '/': + b[w] = s[i]; + i += 1; + w += 1; + + case 'b': + b[w] = '\b'; + i += 1; + w += 1; + case 'f': + b[w] = '\f'; + i += 1; + w += 1; + case 'r': + b[w] = '\r'; + i += 1; + w += 1; + case 't': + b[w] = '\t'; + i += 1; + w += 1; + case 'n': + b[w] = '\n'; + i += 1; + w += 1; + case 'u': + i -= 1; // Include the \u in the check for sanity sake + r := get_u4_rune(s[i:]); + if r < 0 { + break loop; + } + i += 6; + + buf, buf_width := utf8.encode_rune(r); + copy(b[w:], buf[:buf_width]); + w += buf_width; + } + + case c == '"', c < ' ': + break loop; + + case c < utf8.RUNE_SELF: + b[w] = c; + i += 1; + w += 1; + + case: + r, width := utf8.decode_rune_in_string(s[i:]); + i += width; + + buf, buf_width := utf8.encode_rune(r); + assert(buf_width <= width); + copy(b[w:], buf[:buf_width]); + w += buf_width; + } + } + + return string(b[:w]); +} |