diff options
| author | gingerBill <bill@gingerbill.org> | 2018-12-03 20:26:10 +0000 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2018-12-03 20:26:10 +0000 |
| commit | 50c3f4d74e7a332cea27edee3020b54bd3d3bc7d (patch) | |
| tree | d5340cd15809578f5357f67cda4afcb7cb1abedd | |
| parent | 304c7594cd2d904072917bb79d0aa340090d8296 (diff) | |
Add package encoding/cel
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | core/encoding/cel/cel.odin | 840 | ||||
| -rw-r--r-- | core/encoding/cel/doc.odin | 51 | ||||
| -rw-r--r-- | core/encoding/cel/token.odin | 520 | ||||
| -rw-r--r-- | src/build_settings.cpp | 2 |
5 files changed, 1413 insertions, 1 deletions
diff --git a/.gitignore b/.gitignore index 38b611044..0c3e8b65a 100644 --- a/.gitignore +++ b/.gitignore @@ -271,3 +271,4 @@ shared/ # temp files * .ll *.bc +*.ll diff --git a/core/encoding/cel/cel.odin b/core/encoding/cel/cel.odin new file mode 100644 index 000000000..7d59cd386 --- /dev/null +++ b/core/encoding/cel/cel.odin @@ -0,0 +1,840 @@ +package cel; + +import "core:fmt" +import "core:strconv" +import "core:os" +import "core:mem" +import "core:unicode/utf8" +import "core:strings" + +Array :: []Value; +Dict :: map[string]Value; +Nil_Value :: struct{}; + +Value :: union { + Nil_Value, + bool, i64, f64, string, + Array, Dict, +} + +Parser :: struct { + tokens: [dynamic]Token, + prev_token: Token, + curr_token: Token, + curr_token_index: int, + + allocated_strings: [dynamic]string, + + error_count: int, + + root: Dict, + dict_stack: [dynamic]^Dict, // NOTE: Pointers may be stored on the stack +} + + +print_value :: proc(value: Value, pretty := true, indent := 0) { + print_indent :: proc(indent: int) { + for i in 0..indent-1 do fmt.print("\t"); + } + + switch v in value { + case bool: fmt.print(v); + case i64: fmt.print(v); + case f64: fmt.print(v); + case string: fmt.print(v); + case Array: + fmt.print("["); + if pretty do fmt.println(); + for e, i in v { + if pretty { + print_indent(indent+1); + print_value(e, pretty, indent+1); + fmt.println(","); + } else { + if i > 0 do fmt.print(", "); + print_value(e); + } + } + if pretty do print_indent(indent); + fmt.print("]"); + case Dict: + fmt.print("{"); + if pretty do fmt.println(); + + i := 0; + for name, value in v { + if pretty { + print_indent(indent+1); + fmt.printf("%s = ", name); + print_value(value, pretty, indent+1); + fmt.println(","); + } else { + if i > 0 do fmt.print(", "); + fmt.printf("%s = ", name); + print_value(value, pretty, indent+1); + i += 1; + } + } + + if pretty do print_indent(indent); + fmt.print("}"); + case: + fmt.print("nil"); + case Nil_Value: + fmt.print("nil"); + } +} +print :: proc(p: ^Parser, pretty := false) { + for name, val in p.root { + fmt.printf("%s = ", name); + print_value(val, pretty); + fmt.println(";"); + } +} + +create_from_string :: proc(src: string) -> (^Parser, bool) { + return init(cast([]byte)src); +} + + +init :: proc(src: []byte) -> (^Parser, bool) { + t: Tokenizer; + tokenizer_init(&t, src); + return create_from_tokenizer(&t); +} + + +create_from_tokenizer :: proc(t: ^Tokenizer) -> (^Parser, bool) { + p := new(Parser); + for { + tok := scan(t); + if tok.kind == Kind.Illegal { + return p, false; + } + append(&p.tokens, tok); + if tok.kind == Kind.EOF { + break; + } + } + + if t.error_count > 0 { + return p, false; + } + + if len(p.tokens) == 0 { + tok := Token{kind = Kind.EOF}; + tok.line, tok.column = 1, 1; + append(&p.tokens, tok); + return p, true; + } + + p.curr_token_index = 0; + p.prev_token = p.tokens[p.curr_token_index]; + p.curr_token = p.tokens[p.curr_token_index]; + + p.root = Dict{}; + p.dict_stack = make([dynamic]^Dict, 0, 4); + append(&p.dict_stack, &p.root); + + for p.curr_token.kind != Kind.EOF && + p.curr_token.kind != Kind.Illegal && + p.curr_token_index < len(p.tokens) { + if !parse_assignment(p) { + break; + } + } + + return p, true; +} + +destroy :: proc(p: ^Parser) { + destroy_value :: proc(value: Value) { + switch v in value { + case Array: + for elem in v do destroy_value(elem); + delete(v); + + case Dict: + for key, value in v do destroy_value(value); + delete(v); + } + } + + delete(p.tokens); + for s in p.allocated_strings do delete(s); + delete(p.allocated_strings); + delete(p.dict_stack); + + destroy_value(p.root); + free(p); +} + +error :: proc(p: ^Parser, pos: Pos, msg: string, args: ..any) { + fmt.printf_err("%s(%d:%d) Error: ", pos.file, pos.line, pos.column); + fmt.printf_err(msg, ..args); + fmt.println_err(); + + p.error_count += 1; +} + +next_token :: proc(p: ^Parser) -> Token { + p.prev_token = p.curr_token; + prev := p.prev_token; + + if p.curr_token_index+1 < len(p.tokens) { + p.curr_token_index += 1; + p.curr_token = p.tokens[p.curr_token_index]; + return prev; + } + p.curr_token_index = len(p.tokens); + p.curr_token = p.tokens[p.curr_token_index-1]; + error(p, prev.pos, "Token is EOF"); + return prev; +} + +unquote_char :: proc(s: string, quote: byte) -> (r: rune, multiple_bytes: bool, tail_string: string, success: bool) { + hex_to_int :: proc(c: byte) -> int { + switch c { + case '0'..'9': return int(c-'0'); + case 'a'..'f': return int(c-'a')+10; + case 'A'..'F': return int(c-'A')+10; + } + return -1; + } + + if s[0] == quote && quote == '"' { + return; + } else if s[0] >= 0x80 { + r, w := utf8.decode_rune_from_string(s); + return r, true, s[w:], true; + } else if s[0] != '\\' { + return rune(s[0]), false, s[1:], true; + } + + if len(s) <= 1 { + return; + } + c := s[1]; + s = s[2:]; + + switch c { + case: + return; + + case 'a': r = '\a'; + case 'b': r = '\b'; + case 'f': r = '\f'; + case 'n': r = '\n'; + case 'r': r = '\r'; + case 't': r = '\t'; + case 'v': r = '\v'; + case '\\': r = '\\'; + + case '"': r = '"'; + case '\'': r = '\''; + + case '0'..'7': + v := int(c-'0'); + if len(s) < 2 { + return; + } + for i in 0..len(s)-1 { + d := int(s[i]-'0'); + if d < 0 || d > 7 { + return; + } + v = (v<<3) | d; + } + s = s[2:]; + if v > 0xff { + return; + } + r = rune(v); + + case 'x', 'u', 'U': + count: int; + switch c { + case 'x': count = 2; + case 'u': count = 4; + case 'U': count = 8; + } + + if len(s) < count { + return; + } + + for i in 0..count-1 { + d := hex_to_int(s[i]); + if d < 0 { + return; + } + r = (r<<4) | rune(d); + } + s = s[count:]; + if c == 'x' { + break; + } + if r > utf8.MAX_RUNE { + return; + } + multiple_bytes = true; + } + + success = true; + tail_string = s; + return; +} + + +unquote_string :: proc(p: ^Parser, t: Token) -> (string, bool) { + if t.kind != Kind.String { + return t.lit, true; + } + s := t.lit; + n := len(s); + quote := '"'; + + if s == `""` { + return "", true; + } + + if strings.contains_rune(s, '\n') >= 0 { + return s, false; + } + + if strings.contains_rune(s, '\\') < 0 && strings.contains_rune(s, quote) < 0 { + if quote == '"' { + return s, true; + } + } + + + buf_len := 3*len(s) / 2; + buf := make([]byte, buf_len); + offset := 0; + for len(s) > 0 { + r, multiple_bytes, tail_string, ok := unquote_char(s, byte(quote)); + if !ok { + delete(buf); + return s, false; + } + s = tail_string; + if r < 0x80 || !multiple_bytes { + buf[offset] = byte(r); + offset += 1; + } else { + b, w := utf8.encode_rune(r); + copy(buf[offset:], b[:w]); + offset += w; + } + } + + new_string := string(buf[:offset]); + + append(&p.allocated_strings, new_string); + + return new_string, true; +} + + +allow_token :: proc(p: ^Parser, kind: Kind) -> bool { + if p.curr_token.kind == kind { + next_token(p); + return true; + } + return false; +} + +expect_token :: proc(p: ^Parser, kind: Kind) -> Token { + prev := p.curr_token; + if prev.kind != kind { + got := prev.lit; + if got == "\n" do got = ";"; + error(p, prev.pos, "Expected %s, got %s", kind_to_string[kind], got); + } + next_token(p); + return prev; +} + +expect_operator :: proc(p: ^Parser) -> Token { + prev := p.curr_token; + if !is_operator(prev.kind) { + error(p, prev.pos, "Expected an operator, got %s", prev.lit); + } + + + next_token(p); + return prev; +} + +fix_advance :: proc(p: ^Parser) { + for { + switch t := p.curr_token; t.kind { + case Kind.EOF, Kind.Semicolon: + return; + } + next_token(p); + } +} + +copy_value :: proc(value: Value) -> Value { + switch v in value { + case Array: + a := make(Array, len(v)); + for elem, idx in v { + a[idx] = copy_value(elem); + } + return a; + case Dict: + d := make(Dict, cap(v)); + for key, val in v { + d[key] = copy_value(val); + } + return d; + } + return value; +} + +lookup_value :: proc(p: ^Parser, name: string) -> (Value, bool) { + for i := len(p.dict_stack)-1; i >= 0; i -= 1 { + d := p.dict_stack[i]; + if val, ok := d[name]; ok { + return copy_value(val), true; + } + } + + return nil, false; +} + +parse_operand :: proc(p: ^Parser) -> (Value, Pos) { + tok := p.curr_token; + switch p.curr_token.kind { + case Kind.Ident: + next_token(p); + v, ok := lookup_value(p, tok.lit); + if !ok do error(p, tok.pos, "Undeclared identifier %s", tok.lit); + return v, tok.pos; + + case Kind.True: + next_token(p); + return true, tok.pos; + case Kind.False: + next_token(p); + return false, tok.pos; + + case Kind.Nil: + next_token(p); + return Nil_Value{}, tok.pos; + + case Kind.Integer: + next_token(p); + return strconv.parse_i64(tok.lit), tok.pos; + + case Kind.Float: + next_token(p); + return strconv.parse_f64(tok.lit), tok.pos; + + case Kind.String: + next_token(p); + str, ok := unquote_string(p, tok); + if !ok do error(p, tok.pos, "Unable to unquote string"); + return string(str), tok.pos; + + case Kind.Open_Paren: + expect_token(p, Kind.Open_Paren); + expr, pos := parse_expr(p); + expect_token(p, Kind.Close_Paren); + return expr, tok.pos; + + case Kind.Open_Bracket: + expect_token(p, Kind.Open_Bracket); + elems := make([dynamic]Value, 0, 4); + for p.curr_token.kind != Kind.Close_Bracket && + p.curr_token.kind != Kind.EOF { + elem, pos := parse_expr(p); + append(&elems, elem); + + if p.curr_token.kind == Kind.Semicolon && p.curr_token.lit == "\n" { + next_token(p); + } else if !allow_token(p, Kind.Comma) { + break; + } + + } + expect_token(p, Kind.Close_Bracket); + return Array(elems[:]), tok.pos; + + case Kind.Open_Brace: + expect_token(p, Kind.Open_Brace); + + dict := Dict{}; + append(&p.dict_stack, &dict); + defer pop(&p.dict_stack); + + for p.curr_token.kind != Kind.Close_Brace && + p.curr_token.kind != Kind.EOF { + name_tok := p.curr_token; + if !allow_token(p, Kind.Ident) && !allow_token(p, Kind.String) { + name_tok = expect_token(p, Kind.Ident); + } + + name, ok := unquote_string(p, name_tok); + if !ok do error(p, tok.pos, "Unable to unquote string"); + expect_token(p, Kind.Assign); + elem, pos := parse_expr(p); + + if _, ok := dict[name]; ok { + error(p, name_tok.pos, "Previous declaration of %s in this scope", name); + } else { + dict[name] = elem; + } + + if p.curr_token.kind == Kind.Semicolon && p.curr_token.lit == "\n" { + next_token(p); + } else if !allow_token(p, Kind.Comma) { + break; + } + } + expect_token(p, Kind.Close_Brace); + return dict, tok.pos; + + } + return nil, tok.pos; +} + +parse_atom_expr :: proc(p: ^Parser, operand: Value, pos: Pos) -> (Value, Pos) { + loop := true; + for loop { + switch p.curr_token.kind { + case Kind.Period: + next_token(p); + tok := next_token(p); + + switch tok.kind { + case Kind.Ident: + d, ok := operand.(Dict); + if !ok || d == nil { + error(p, tok.pos, "Expected a dictionary"); + operand = nil; + continue; + } + name, usok := unquote_string(p, tok); + if !usok do error(p, tok.pos, "Unable to unquote string"); + val, found := d[name]; + if !found { + error(p, tok.pos, "Field %s not found in dictionary", name); + operand = nil; + continue; + } + operand = val; + case: + error(p, tok.pos, "Expected a selector, got %s", tok.kind); + operand = nil; + } + + case Kind.Open_Bracket: + open := expect_token(p, Kind.Open_Bracket); + index, index_pos := parse_expr(p); + close := expect_token(p, Kind.Close_Bracket); + + + switch a in operand { + case Array: + i, ok := index.(i64); + if !ok { + error(p, index_pos, "Index must be an integer for an array"); + operand = nil; + continue; + } + + if 0 <= i && i < i64(len(a)) { + operand = a[i]; + } else { + error(p, index_pos, "Index %d out of bounds range 0..%d", i, len(a)); + operand = nil; + continue; + } + + case Dict: + key, ok := index.(string); + if !ok { + error(p, index_pos, "Index must be a string for a dictionary"); + operand = nil; + continue; + } + + val, found := a[key]; + if found { + operand = val; + } else { + error(p, index_pos, "`%s` was not found in the dictionary", key); + operand = nil; + continue; + } + + + + case: + error(p, index_pos, "Indexing is only allowed on an array or dictionary"); + } + + case: + loop = false; + } + } + + return operand, pos; +} + +parse_unary_expr :: proc(p: ^Parser) -> (Value, Pos) { + op := p.curr_token; + switch p.curr_token.kind { + case Kind.At: + next_token(p); + tok := expect_token(p, Kind.String); + v, ok := lookup_value(p, tok.lit); + if !ok do error(p, tok.pos, "Undeclared identifier %s", tok.lit); + return parse_atom_expr(p, v, tok.pos); + + case Kind.Add, Kind.Sub: + next_token(p); + // TODO(bill): Calcuate values as you go! + expr, pos := parse_unary_expr(p); + + switch e in expr { + case i64: if op.kind == Kind.Sub do return -e, pos; + case f64: if op.kind == Kind.Sub do return -e, pos; + case: + error(p, op.pos, "Unary operator %s can only be used on integers or floats", op.lit); + return nil, op.pos; + } + + return expr, op.pos; + + case Kind.Not: + next_token(p); + expr, pos := parse_unary_expr(p); + if v, ok := expr.(bool); ok { + return !v, op.pos; + } + error(p, op.pos, "Unary operator %s can only be used on booleans", op.lit); + return nil, op.pos; + } + + return parse_atom_expr(p, parse_operand(p)); +} + + +value_order :: proc(v: Value) -> int { + switch _ in v { + case bool, string: + return 1; + case i64: + return 2; + case f64: + return 3; + } + return 0; +} + +match_values :: proc(left, right: ^Value) -> bool { + if value_order(right^) < value_order(left^) { + return match_values(right, left); + } + + switch x in left^ { + case: + right^ = left^; + case bool, string: + return true; + case i64: + switch y in right^ { + case i64: + return true; + case f64: + left^ = f64(x); + return true; + } + + case f64: + switch y in right { + case f64: + return true; + } + } + + return false; +} + +calculate_binary_value :: proc(p: ^Parser, op: Kind, x, y: Value) -> (Value, bool) { + // TODO(bill): Calculate value as you go! + match_values(&x, &y); + + + switch a in x { + case: return x, true; + + case bool: + b, ok := y.(bool); + if !ok do return nil, false; + switch op { + case Kind.Eq: return a == b, true; + case Kind.NotEq: return a != b, true; + case Kind.And: return a && b, true; + case Kind.Or: return a || b, true; + } + + case i64: + b, ok := y.(i64); + if !ok do return nil, false; + switch op { + case Kind.Add: return a + b, true; + case Kind.Sub: return a - b, true; + case Kind.Mul: return a * b, true; + case Kind.Quo: return a / b, true; + case Kind.Rem: return a % b, true; + case Kind.Eq: return a == b, true; + case Kind.NotEq: return a != b, true; + case Kind.Lt: return a < b, true; + case Kind.Gt: return a > b, true; + case Kind.LtEq: return a <= b, true; + case Kind.GtEq: return a >= b, true; + } + + case f64: + b, ok := y.(f64); + if !ok do return nil, false; + + switch op { + case Kind.Add: return a + b, true; + case Kind.Sub: return a - b, true; + case Kind.Mul: return a * b, true; + case Kind.Quo: return a / b, true; + case Kind.Eq: return a == b, true; + case Kind.NotEq: return a != b, true; + case Kind.Lt: return a < b, true; + case Kind.Gt: return a > b, true; + case Kind.LtEq: return a <= b, true; + case Kind.GtEq: return a >= b, true; + } + + case string: + b, ok := y.(string); + if !ok do return nil, false; + + switch op { + case Kind.Add: + n := len(a) + len(b); + data := make([]byte, n); + copy(data[:], cast([]byte)a); + copy(data[len(a):], cast([]byte)b); + s := string(data); + append(&p.allocated_strings, s); + return s, true; + + case Kind.Eq: return a == b, true; + case Kind.NotEq: return a != b, true; + case Kind.Lt: return a < b, true; + case Kind.Gt: return a > b, true; + case Kind.LtEq: return a <= b, true; + case Kind.GtEq: return a >= b, true; + } + } + + return nil, false; +} + +parse_binary_expr :: proc(p: ^Parser, prec_in: int) -> (Value, Pos) { + expr, pos := parse_unary_expr(p); + for prec := precedence(p.curr_token.kind); prec >= prec_in; prec -= 1 { + for { + op := p.curr_token; + op_prec := precedence(op.kind); + if op_prec != prec { + break; + } + expect_operator(p); + + if op.kind == Kind.Question { + cond := expr; + x, x_pos := parse_expr(p); + expect_token(p, Kind.Colon); + y, y_pos := parse_expr(p); + + if t, ok := cond.(bool); ok { + expr = t ? x : y; + } else { + error(p, pos, "Condition must be a boolean"); + } + + } else { + right, right_pos := parse_binary_expr(p, prec+1); + if right == nil { + error(p, right_pos, "Expected expression on the right-hand side of the binary operator %s", op.lit); + } + left := expr; + ok: bool; + expr, ok = calculate_binary_value(p, op.kind, left, right); + if !ok { + error(p, pos, "Invalid binary operation"); + } + } + } + } + return expr, pos; +} + +parse_expr :: proc(p: ^Parser) -> (Value, Pos) { + return parse_binary_expr(p, 1); +} + +expect_semicolon :: proc(p: ^Parser) { + kind := p.curr_token.kind; + + switch kind { + case Kind.Comma: + error(p, p.curr_token.pos, "Expected ';', got ','"); + next_token(p); + case Kind.Semicolon: + next_token(p); + case Kind.EOF: + // okay + case: + error(p, p.curr_token.pos, "Expected ';', got %s", p.curr_token.lit); + fix_advance(p); + } +} + +parse_assignment :: proc(p: ^Parser) -> bool { + top_dict :: proc(p: ^Parser) -> ^Dict { + assert(len(p.dict_stack) > 0); + return p.dict_stack[len(p.dict_stack)-1]; + } + + if p.curr_token.kind == Kind.Semicolon { + next_token(p); + return true; + } + if p.curr_token.kind == Kind.EOF { + return false; + } + + tok := p.curr_token; + if allow_token(p, Kind.Ident) || allow_token(p, Kind.String) { + expect_token(p, Kind.Assign); + name, ok := unquote_string(p, tok); + if !ok do error(p, tok.pos, "Unable to unquote string"); + expr, pos := parse_expr(p); + d := top_dict(p); + if _, ok := d[name]; ok { + error(p, tok.pos, "Previous declaration of %s", name); + } else { + d[name] = expr; + } + expect_semicolon(p); + return true; + } + error(p, tok.pos, "Expected an assignment, got %s", kind_to_string[tok.kind]); + fix_advance(p); + return false; +} diff --git a/core/encoding/cel/doc.odin b/core/encoding/cel/doc.odin new file mode 100644 index 000000000..2d224d292 --- /dev/null +++ b/core/encoding/cel/doc.odin @@ -0,0 +1,51 @@ +/* + package cel + + sample := ` + x = 123; + y = 321.456; + z = x * (y - 1) / 2; + w = "foo" + "bar"; + + # This is a comment + + asd = "Semicolons are optional" + + a = {id = {b = 123}} # Dict + b = a.id.b + + f = [1, 4, 9] # Array + g = f[2] + + h = x < y and w == "foobar" + i = h ? 123 : "google" + + j = nil + + "127.0.0.1" = "value" # Keys can be strings + + "foo" = { + "bar" = { + "baz" = 123, # optional commas if newline is present + "zab" = 456, + "abz" = 789, + }, + }; + + bar = @"foo"["bar"].baz + `; + + + main :: proc() { + p, ok := create_from_string(sample); + if !ok { + return; + } + defer destroy(p); + + if p.error_count == 0 { + print(p); + } + } +*/ +package cel diff --git a/core/encoding/cel/token.odin b/core/encoding/cel/token.odin new file mode 100644 index 000000000..46c8d61be --- /dev/null +++ b/core/encoding/cel/token.odin @@ -0,0 +1,520 @@ +package cel + +import "core:fmt" +import "core:unicode/utf8" + +using Kind :: enum { + Illegal, + EOF, + Comment, + + _literal_start, + Ident, + Integer, + Float, + Char, + String, + _literal_end, + + _keyword_start, + True, // true + False, // false + Nil, // nil + _keyword_end, + + + _operator_start, + Question, // ? + + And, // and + Or, // or + + Add, // + + Sub, // - + Mul, // * + Quo, // / + Rem, // % + + Not, // ! + + Eq, // == + NotEq, // != + Lt, // < + Gt, // > + LtEq, // <= + GtEq, // >= + + At, // @ + _operator_end, + + _punc_start, + Assign, // = + + Open_Paren, // ( + Close_Paren, // ) + Open_Bracket, // [ + Close_Bracket, // ] + Open_Brace, // { + Close_Brace, // } + + Colon, // : + Semicolon, // ; + Comma, // , + Period, // . + _punc_end, +} + + +Pos :: struct { + file: string, + line: int, + column: int, +} + +Token :: struct { + kind: Kind, + using pos: Pos, + lit: string, +} + +Tokenizer :: struct { + src: []byte, + + file: string, // May not be used + + curr_rune: rune, + offset: int, + read_offset: int, + line_offset: int, + line_count: int, + + insert_semi: bool, + + error_count: int, +} + + +keywords := map[string]Kind{ + "true" = True, + "false" = False, + "nil" = Nil, + "and" = And, + "or" = Or, +}; + +kind_to_string := [len(Kind)]string{ + "illegal", + "EOF", + "comment", + + "", + "identifier", + "integer", + "float", + "character", + "string", + "", + + "", + "true", "false", "nil", + "", + + "", + "?", "and", "or", + "+", "-", "*", "/", "%", + "!", + "==", "!=", "<", ">", "<=", ">=", + "@", + "", + + "", + "=", + "(", ")", + "[", "]", + "{", "}", + ":", ";", ",", ".", + "", +}; + +precedence :: proc(op: Kind) -> int { + switch op { + case Question: + return 1; + case Or: + return 2; + case And: + return 3; + case Eq, NotEq, Lt, Gt, LtEq, GtEq: + return 4; + case Add, Sub: + return 5; + case Mul, Quo, Rem: + return 6; + } + return 0; +} + + +token_lookup :: proc(ident: string) -> Kind { + if tok, is_keyword := keywords[ident]; is_keyword { + return tok; + } + return Ident; +} + +is_literal :: proc(tok: Kind) -> bool do return _literal_start < tok && tok < _literal_end; +is_operator :: proc(tok: Kind) -> bool do return _operator_start < tok && tok < _operator_end; +is_keyword :: proc(tok: Kind) -> bool do return _keyword_start < tok && tok < _keyword_end; + + +tokenizer_init :: proc(t: ^Tokenizer, src: []byte, file := "") { + t.src = src; + t.file = file; + t.curr_rune = ' '; + t.offset = 0; + t.read_offset = 0; + t.line_offset = 0; + t.line_count = 1; + + advance_to_next_rune(t); + if t.curr_rune == utf8.RUNE_BOM { + advance_to_next_rune(t); + } +} + +token_error :: proc(t: ^Tokenizer, msg: string, args: ..any) { + fmt.printf_err("%s(%d:%d) Error: ", t.file, t.line_count, t.read_offset-t.line_offset+1); + fmt.printf_err(msg, ..args); + fmt.println_err(); + t.error_count += 1; +} + +advance_to_next_rune :: proc(t: ^Tokenizer) { + if t.read_offset < len(t.src) { + t.offset = t.read_offset; + if t.curr_rune == '\n' { + t.line_offset = t.offset; + t.line_count += 1; + } + r, w := rune(t.src[t.read_offset]), 1; + switch { + case r == 0: + token_error(t, "Illegal character NUL"); + case r >= utf8.RUNE_SELF: + r, w = utf8.decode_rune(t.src[t.read_offset:]); + if r == utf8.RUNE_ERROR && w == 1 { + token_error(t, "Illegal utf-8 encoding"); + } else if r == utf8.RUNE_BOM && t.offset > 0 { + token_error(t, "Illegal byte order mark"); + } + } + + t.read_offset += w; + t.curr_rune = r; + } else { + t.offset = len(t.src); + if t.curr_rune == '\n' { + t.line_offset = t.offset; + t.line_count += 1; + } + t.curr_rune = utf8.RUNE_EOF; + } +} + + +get_pos :: proc(t: ^Tokenizer) -> Pos { + return Pos { + file = t.file, + line = t.line_count, + column = t.offset - t.line_offset + 1, + }; +} + +is_letter :: proc(r: rune) -> bool { + switch r { + case 'a'..'z', 'A'..'Z', '_': + return true; + } + return false; +} + +is_digit :: proc(r: rune) -> bool { + switch r { + case '0'..'9': + return true; + } + return false; +} + +skip_whitespace :: proc(t: ^Tokenizer) { + loop: for { + switch t.curr_rune { + case '\n': + if t.insert_semi { + break loop; + } + fallthrough; + case ' ', '\t', '\r', '\v', '\f': + advance_to_next_rune(t); + + case: + break loop; + } + } +} + +scan_identifier :: proc(t: ^Tokenizer) -> string { + offset := t.offset; + for is_letter(t.curr_rune) || is_digit(t.curr_rune) { + advance_to_next_rune(t); + } + return string(t.src[offset : t.offset]); +} + +digit_value :: proc(r: rune) -> int { + switch r { + case '0'..'9': return int(r - '0'); + case 'a'..'f': return int(r - 'a' + 10); + case 'A'..'F': return int(r - 'A' + 10); + } + return 16; +} + +scan_number :: proc(t: ^Tokenizer, seen_decimal_point: bool) -> (Kind, string) { + scan_manitissa :: proc(t: ^Tokenizer, base: int) { + for digit_value(t.curr_rune) < base || t.curr_rune == '_' { + advance_to_next_rune(t); + } + } + scan_exponent :: proc(t: ^Tokenizer, tok: Kind, offset: int) -> (Kind, string) { + if t.curr_rune == 'e' || t.curr_rune == 'E' { + tok = Float; + advance_to_next_rune(t); + if t.curr_rune == '-' || t.curr_rune == '+' { + advance_to_next_rune(t); + } + if digit_value(t.curr_rune) < 10 { + scan_manitissa(t, 10); + } else { + token_error(t, "Illegal floating point exponent"); + } + } + return tok, string(t.src[offset : t.offset]); + } + scan_fraction :: proc(t: ^Tokenizer, tok: Kind, offset: int) -> (Kind, string) { + if t.curr_rune == '.' { + tok = Float; + advance_to_next_rune(t); + scan_manitissa(t, 10); + } + + return scan_exponent(t, tok, offset); + } + + offset := t.offset; + tok := Integer; + + if seen_decimal_point { + offset -= 1; + tok = Float; + scan_manitissa(t, 10); + return scan_exponent(t, tok, offset); + } + + if t.curr_rune == '0' { + offset := t.offset; + advance_to_next_rune(t); + switch t.curr_rune { + case 'b', 'B': + advance_to_next_rune(t); + scan_manitissa(t, 2); + if t.offset - offset <= 2 { + token_error(t, "Illegal binary number"); + } + case 'o', 'O': + advance_to_next_rune(t); + scan_manitissa(t, 8); + if t.offset - offset <= 2 { + token_error(t, "Illegal octal number"); + } + case 'x', 'X': + advance_to_next_rune(t); + scan_manitissa(t, 16); + if t.offset - offset <= 2 { + token_error(t, "Illegal hexadecimal number"); + } + case: + scan_manitissa(t, 10); + switch t.curr_rune { + case '.', 'e', 'E': + return scan_fraction(t, tok, offset); + } + } + + return tok, string(t.src[offset:t.offset]); + } + + scan_manitissa(t, 10); + + return scan_fraction(t, tok, offset); +} + +scan :: proc(t: ^Tokenizer) -> Token { + skip_whitespace(t); + + offset := t.offset; + + tok: Kind; + pos := get_pos(t); + lit: string; + + insert_semi := false; + + + switch r := t.curr_rune; { + case is_letter(r): + insert_semi = true; + lit = scan_identifier(t); + tok = Ident; + if len(lit) > 1 { + tok = token_lookup(lit); + } + + case '0' <= r && r <= '9': + insert_semi = true; + tok, lit = scan_number(t, false); + + case: + advance_to_next_rune(t); + switch r { + case -1: + if t.insert_semi { + t.insert_semi = false; + return Token{Semicolon, pos, "\n"}; + } + return Token{EOF, pos, "\n"}; + + case '\n': + t.insert_semi = false; + return Token{Semicolon, pos, "\n"}; + + case '"': + insert_semi = true; + quote := r; + tok = String; + for { + r := t.curr_rune; + if r == '\n' || r < 0 { + token_error(t, "String literal not terminated"); + break; + } + advance_to_next_rune(t); + if r == quote { + break; + } + // TODO(bill); Handle properly + if r == '\\' && t.curr_rune == quote { + advance_to_next_rune(t); + } + } + + lit = string(t.src[offset+1:t.offset-1]); + + + case '#': + for t.curr_rune != '\n' && t.curr_rune >= 0 { + advance_to_next_rune(t); + } + if t.insert_semi { + t.insert_semi = false; + return Token{Semicolon, pos, "\n"}; + } + // Recursive! + return scan(t); + + case '?': tok = Question; + case ':': tok = Colon; + case '@': tok = At; + + case ';': + tok = Semicolon; + lit = ";"; + case ',': tok = Comma; + + case '(': + tok = Open_Paren; + case ')': + insert_semi = true; + tok = Close_Paren; + + case '[': + tok = Open_Bracket; + case ']': + insert_semi = true; + tok = Close_Bracket; + + case '{': + tok = Open_Brace; + case '}': + insert_semi = true; + tok = Close_Brace; + + case '+': tok = Add; + case '-': tok = Sub; + case '*': tok = Mul; + case '/': tok = Quo; + case '%': tok = Rem; + + case '!': + tok = Not; + if t.curr_rune == '=' { + advance_to_next_rune(t); + tok = NotEq; + } + + case '=': + tok = Assign; + if t.curr_rune == '=' { + advance_to_next_rune(t); + tok = Eq; + } + + case '<': + tok = Lt; + if t.curr_rune == '=' { + advance_to_next_rune(t); + tok = LtEq; + } + + case '>': + tok = Gt; + if t.curr_rune == '=' { + advance_to_next_rune(t); + tok = GtEq; + } + + case '.': + if '0' <= t.curr_rune && t.curr_rune <= '9' { + insert_semi = true; + tok, lit = scan_number(t, true); + } else { + tok = Period; + } + + case: + if r != utf8.RUNE_BOM { + token_error(t, "Illegal character '%r'", r); + } + insert_semi = t.insert_semi; + tok = Illegal; + } + } + + t.insert_semi = insert_semi; + + if lit == "" { + lit = string(t.src[offset:t.offset]); + } + + return Token{tok, pos, lit}; +} diff --git a/src/build_settings.cpp b/src/build_settings.cpp index b30ad7527..8a08bd461 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -55,7 +55,7 @@ TargetEndianKind target_endians[TargetArch_COUNT] = { -String const ODIN_VERSION = str_lit("0.9.0"); +String const ODIN_VERSION = str_lit("0.9.1"); String cross_compile_target = str_lit(""); String cross_compile_lib_dir = str_lit(""); |