Add package encoding/cel

author: gingerBill <bill@gingerbill.org> 2018-12-03 20:26:10 +0000
committer: gingerBill <bill@gingerbill.org> 2018-12-03 20:26:10 +0000
commit: 50c3f4d74e7a332cea27edee3020b54bd3d3bc7d (patch)
tree: d5340cd15809578f5357f67cda4afcb7cb1abedd /core/encoding
parent: 304c7594cd2d904072917bb79d0aa340090d8296 (diff)
3 files changed, 1411 insertions, 0 deletions
diff --git a/core/encoding/cel/cel.odin b/core/encoding/cel/cel.odin
new file mode 100644
index 000000000..7d59cd386
--- /dev/null
+++ b/core/encoding/cel/cel.odin
@@ -0,0 +1,840 @@
+package cel;
+
+import "core:fmt"
+import "core:strconv"
+import "core:os"
+import "core:mem"
+import "core:unicode/utf8"
+import "core:strings"
+
+Array :: []Value;
+Dict  :: map[string]Value;
+Nil_Value :: struct{};
+
+Value :: union {
+	Nil_Value,
+	bool, i64, f64, string,
+	Array, Dict,
+}
+
+Parser :: struct {
+	tokens:           [dynamic]Token,
+	prev_token:       Token,
+	curr_token:       Token,
+	curr_token_index: int,
+
+	allocated_strings: [dynamic]string,
+
+	error_count: int,
+
+	root: Dict,
+	dict_stack: [dynamic]^Dict, // NOTE: Pointers may be stored on the stack
+}
+
+
+print_value :: proc(value: Value, pretty := true, indent := 0) {
+	print_indent :: proc(indent: int) {
+		for i in 0..indent-1 do fmt.print("\t");
+	}
+
+	switch v in value {
+	case bool: fmt.print(v);
+	case i64: fmt.print(v);
+	case f64: fmt.print(v);
+	case string: fmt.print(v);
+	case Array:
+		fmt.print("[");
+		if pretty do fmt.println();
+		for e, i in v {
+			if pretty {
+				print_indent(indent+1);
+				print_value(e, pretty, indent+1);
+				fmt.println(",");
+			} else {
+				if i > 0 do fmt.print(", ");
+				print_value(e);
+			}
+		}
+		if pretty do print_indent(indent);
+		fmt.print("]");
+	case Dict:
+		fmt.print("{");
+		if pretty do fmt.println();
+
+		i := 0;
+		for name, value in v {
+			if pretty {
+				print_indent(indent+1);
+				fmt.printf("%s = ", name);
+				print_value(value, pretty, indent+1);
+				fmt.println(",");
+			} else {
+				if i > 0 do fmt.print(", ");
+				fmt.printf("%s = ", name);
+				print_value(value, pretty, indent+1);
+				i += 1;
+			}
+		}
+
+		if pretty do print_indent(indent);
+		fmt.print("}");
+	case:
+		fmt.print("nil");
+	case Nil_Value:
+		fmt.print("nil");
+	}
+}
+print :: proc(p: ^Parser, pretty := false) {
+	for name, val in p.root {
+		fmt.printf("%s = ", name);
+		print_value(val, pretty);
+		fmt.println(";");
+	}
+}
+
+create_from_string :: proc(src: string) -> (^Parser, bool) {
+	return init(cast([]byte)src);
+}
+
+
+init :: proc(src: []byte) -> (^Parser, bool) {
+	t: Tokenizer;
+	tokenizer_init(&t, src);
+	return create_from_tokenizer(&t);
+}
+
+
+create_from_tokenizer :: proc(t: ^Tokenizer) -> (^Parser, bool) {
+	p := new(Parser);
+	for {
+		tok := scan(t);
+		if tok.kind == Kind.Illegal {
+			return p, false;
+		}
+		append(&p.tokens, tok);
+		if tok.kind == Kind.EOF {
+			break;
+		}
+	}
+
+	if t.error_count > 0 {
+		return p, false;
+	}
+
+	if len(p.tokens) == 0 {
+		tok := Token{kind = Kind.EOF};
+		tok.line, tok.column = 1, 1;
+		append(&p.tokens, tok);
+		return p, true;
+	}
+
+	p.curr_token_index = 0;
+	p.prev_token = p.tokens[p.curr_token_index];
+	p.curr_token = p.tokens[p.curr_token_index];
+
+	p.root = Dict{};
+	p.dict_stack = make([dynamic]^Dict, 0, 4);
+	append(&p.dict_stack, &p.root);
+
+	for p.curr_token.kind != Kind.EOF &&
+	    p.curr_token.kind != Kind.Illegal &&
+	    p.curr_token_index < len(p.tokens) {
+		if !parse_assignment(p) {
+			break;
+		}
+	}
+
+	return p, true;
+}
+
+destroy :: proc(p: ^Parser) {
+	destroy_value :: proc(value: Value) {
+		switch v in value {
+		case Array:
+			for elem in v do destroy_value(elem);
+			delete(v);
+
+		case Dict:
+			for key, value in v do destroy_value(value);
+			delete(v);
+		}
+	}
+
+	delete(p.tokens);
+	for s in p.allocated_strings do delete(s);
+	delete(p.allocated_strings);
+	delete(p.dict_stack);
+
+	destroy_value(p.root);
+	free(p);
+}
+
+error :: proc(p: ^Parser, pos: Pos, msg: string, args: ..any) {
+	fmt.printf_err("%s(%d:%d) Error: ", pos.file, pos.line, pos.column);
+	fmt.printf_err(msg, ..args);
+	fmt.println_err();
+
+	p.error_count += 1;
+}
+
+next_token :: proc(p: ^Parser) -> Token {
+	p.prev_token = p.curr_token;
+	prev := p.prev_token;
+
+	if p.curr_token_index+1 < len(p.tokens) {
+		p.curr_token_index += 1;
+		p.curr_token = p.tokens[p.curr_token_index];
+		return prev;
+	}
+	p.curr_token_index = len(p.tokens);
+	p.curr_token = p.tokens[p.curr_token_index-1];
+	error(p, prev.pos, "Token is EOF");
+	return prev;
+}
+
+unquote_char :: proc(s: string, quote: byte) -> (r: rune, multiple_bytes: bool, tail_string: string, success: bool) {
+	hex_to_int :: proc(c: byte) -> int {
+		switch c {
+		case '0'..'9': return int(c-'0');
+		case 'a'..'f': return int(c-'a')+10;
+		case 'A'..'F': return int(c-'A')+10;
+		}
+		return -1;
+	}
+
+	if s[0] == quote && quote == '"' {
+		return;
+	} else if s[0] >= 0x80 {
+		r, w := utf8.decode_rune_from_string(s);
+		return r, true, s[w:], true;
+	} else if s[0] != '\\' {
+		return rune(s[0]), false, s[1:], true;
+	}
+
+	if len(s) <= 1 {
+		return;
+	}
+	c := s[1];
+	s = s[2:];
+
+	switch c {
+	case:
+		return;
+
+	case 'a':  r = '\a';
+	case 'b':  r = '\b';
+	case 'f':  r = '\f';
+	case 'n':  r = '\n';
+	case 'r':  r = '\r';
+	case 't':  r = '\t';
+	case 'v':  r = '\v';
+	case '\\': r = '\\';
+
+	case '"':  r = '"';
+	case '\'': r = '\'';
+
+	case '0'..'7':
+		v := int(c-'0');
+		if len(s) < 2 {
+			return;
+		}
+		for i in 0..len(s)-1 {
+			d := int(s[i]-'0');
+			if d < 0 || d > 7 {
+				return;
+			}
+			v = (v<<3) | d;
+		}
+		s = s[2:];
+		if v > 0xff {
+			return;
+		}
+		r = rune(v);
+
+	case 'x', 'u', 'U':
+		count: int;
+		switch c {
+		case 'x': count = 2;
+		case 'u': count = 4;
+		case 'U': count = 8;
+		}
+
+		if len(s) < count {
+			return;
+		}
+
+		for i in 0..count-1 {
+			d := hex_to_int(s[i]);
+			if d < 0 {
+				return;
+			}
+			r = (r<<4) | rune(d);
+		}
+		s = s[count:];
+		if c == 'x' {
+			break;
+		}
+		if r > utf8.MAX_RUNE {
+			return;
+		}
+		multiple_bytes = true;
+	}
+
+	success = true;
+	tail_string = s;
+	return;
+}
+
+
+unquote_string :: proc(p: ^Parser, t: Token) -> (string, bool) {
+	if t.kind != Kind.String {
+		return t.lit, true;
+	}
+	s := t.lit;
+	n := len(s);
+	quote := '"';
+
+	if s == `""` {
+        return "", true;
+    }
+
+	if strings.contains_rune(s, '\n') >= 0 {
+		return s, false;
+	}
+
+	if strings.contains_rune(s, '\\') < 0 && strings.contains_rune(s, quote) < 0 {
+		if quote == '"' {
+			return s, true;
+		}
+	}
+
+
+	buf_len := 3*len(s) / 2;
+	buf := make([]byte, buf_len);
+	offset := 0;
+	for len(s) > 0 {
+		r, multiple_bytes, tail_string, ok := unquote_char(s, byte(quote));
+		if !ok {
+			delete(buf);
+			return s, false;
+		}
+		s = tail_string;
+		if r < 0x80 || !multiple_bytes {
+			buf[offset] = byte(r);
+			offset += 1;
+		} else {
+			b, w := utf8.encode_rune(r);
+			copy(buf[offset:], b[:w]);
+			offset += w;
+		}
+	}
+
+	new_string := string(buf[:offset]);
+
+	append(&p.allocated_strings, new_string);
+
+	return new_string, true;
+}
+
+
+allow_token :: proc(p: ^Parser, kind: Kind) -> bool {
+	if p.curr_token.kind == kind {
+		next_token(p);
+		return true;
+	}
+	return false;
+}
+
+expect_token :: proc(p: ^Parser, kind: Kind) -> Token {
+	prev := p.curr_token;
+	if prev.kind != kind {
+		got := prev.lit;
+		if got == "\n" do got = ";";
+		error(p, prev.pos, "Expected %s, got %s", kind_to_string[kind], got);
+	}
+	next_token(p);
+	return prev;
+}
+
+expect_operator :: proc(p: ^Parser) -> Token {
+	prev := p.curr_token;
+	if !is_operator(prev.kind) {
+		error(p, prev.pos, "Expected an operator, got %s", prev.lit);
+	}
+
+
+	next_token(p);
+	return prev;
+}
+
+fix_advance :: proc(p: ^Parser) {
+	for {
+		switch t := p.curr_token; t.kind {
+		case Kind.EOF, Kind.Semicolon:
+			return;
+		}
+		next_token(p);
+	}
+}
+
+copy_value :: proc(value: Value) -> Value {
+	switch v in value {
+	case Array:
+		a := make(Array, len(v));
+		for elem, idx in v {
+			a[idx] = copy_value(elem);
+		}
+		return a;
+	case Dict:
+		d := make(Dict, cap(v));
+		for key, val in v {
+			d[key] = copy_value(val);
+		}
+		return d;
+	}
+	return value;
+}
+
+lookup_value :: proc(p: ^Parser, name: string) -> (Value, bool) {
+	for i := len(p.dict_stack)-1; i >= 0; i -= 1 {
+		d := p.dict_stack[i];
+		if val, ok := d[name]; ok {
+			return copy_value(val), true;
+		}
+	}
+
+	return nil, false;
+}
+
+parse_operand :: proc(p: ^Parser) -> (Value, Pos) {
+	tok := p.curr_token;
+	switch p.curr_token.kind {
+	case Kind.Ident:
+		next_token(p);
+		v, ok := lookup_value(p, tok.lit);
+		if !ok do error(p, tok.pos, "Undeclared identifier %s", tok.lit);
+		return v, tok.pos;
+
+	case Kind.True:
+		next_token(p);
+		return true, tok.pos;
+	case Kind.False:
+		next_token(p);
+		return false, tok.pos;
+
+	case Kind.Nil:
+		next_token(p);
+		return Nil_Value{}, tok.pos;
+
+	case Kind.Integer:
+		next_token(p);
+		return strconv.parse_i64(tok.lit), tok.pos;
+
+	case Kind.Float:
+		next_token(p);
+		return strconv.parse_f64(tok.lit), tok.pos;
+
+	case Kind.String:
+		next_token(p);
+		str, ok := unquote_string(p, tok);
+		if !ok do error(p, tok.pos, "Unable to unquote string");
+		return string(str), tok.pos;
+
+	case Kind.Open_Paren:
+		expect_token(p, Kind.Open_Paren);
+		expr, pos := parse_expr(p);
+		expect_token(p, Kind.Close_Paren);
+		return expr, tok.pos;
+
+	case Kind.Open_Bracket:
+		expect_token(p, Kind.Open_Bracket);
+		elems := make([dynamic]Value, 0, 4);
+		for p.curr_token.kind != Kind.Close_Bracket &&
+		    p.curr_token.kind != Kind.EOF {
+			elem, pos := parse_expr(p);
+			append(&elems, elem);
+
+			if p.curr_token.kind == Kind.Semicolon && p.curr_token.lit == "\n" {
+				next_token(p);
+			} else if !allow_token(p, Kind.Comma) {
+				break;
+			}
+
+		}
+		expect_token(p, Kind.Close_Bracket);
+		return Array(elems[:]), tok.pos;
+
+	case Kind.Open_Brace:
+		expect_token(p, Kind.Open_Brace);
+
+    	dict := Dict{};
+    	append(&p.dict_stack, &dict);
+    	defer pop(&p.dict_stack);
+
+		for p.curr_token.kind != Kind.Close_Brace &&
+		    p.curr_token.kind != Kind.EOF {
+		    name_tok := p.curr_token;
+		    if !allow_token(p, Kind.Ident) && !allow_token(p, Kind.String) {
+		    	name_tok = expect_token(p, Kind.Ident);
+		    }
+
+			name, ok := unquote_string(p, name_tok);
+			if !ok do error(p, tok.pos, "Unable to unquote string");
+		    expect_token(p, Kind.Assign);
+			elem, pos := parse_expr(p);
+
+			if _, ok := dict[name]; ok {
+				error(p, name_tok.pos, "Previous declaration of %s in this scope", name);
+			} else {
+				dict[name] = elem;
+			}
+
+			if p.curr_token.kind == Kind.Semicolon && p.curr_token.lit == "\n" {
+				next_token(p);
+			} else if !allow_token(p, Kind.Comma) {
+				break;
+			}
+		}
+		expect_token(p, Kind.Close_Brace);
+		return dict, tok.pos;
+
+	}
+	return nil, tok.pos;
+}
+
+parse_atom_expr :: proc(p: ^Parser, operand: Value, pos: Pos) -> (Value, Pos) {
+	loop := true;
+	for loop {
+		switch p.curr_token.kind {
+		case Kind.Period:
+			next_token(p);
+			tok := next_token(p);
+
+			switch tok.kind {
+			case Kind.Ident:
+				d, ok := operand.(Dict);
+				if !ok || d == nil {
+					error(p, tok.pos, "Expected a dictionary");
+					operand = nil;
+					continue;
+				}
+				name, usok := unquote_string(p, tok);
+				if !usok do error(p, tok.pos, "Unable to unquote string");
+				val, found := d[name];
+				if !found {
+					error(p, tok.pos, "Field %s not found in dictionary", name);
+					operand = nil;
+					continue;
+				}
+				operand = val;
+			case:
+				error(p, tok.pos, "Expected a selector, got %s", tok.kind);
+				operand = nil;
+			}
+
+		case Kind.Open_Bracket:
+			open := expect_token(p, Kind.Open_Bracket);
+			index, index_pos := parse_expr(p);
+			close := expect_token(p, Kind.Close_Bracket);
+
+
+			switch a in operand {
+			case Array:
+				i, ok := index.(i64);
+				if !ok {
+					error(p, index_pos, "Index must be an integer for an array");
+					operand = nil;
+					continue;
+				}
+
+				if 0 <= i && i < i64(len(a)) {
+					operand = a[i];
+				} else {
+					error(p, index_pos, "Index %d out of bounds range 0..%d", i, len(a));
+					operand = nil;
+					continue;
+				}
+
+			case Dict:
+				key, ok := index.(string);
+				if !ok {
+					error(p, index_pos, "Index must be a string for a dictionary");
+					operand = nil;
+					continue;
+				}
+
+				val, found := a[key];
+				if found {
+					operand = val;
+				} else {
+					error(p, index_pos, "`%s` was not found in the dictionary", key);
+					operand = nil;
+					continue;
+				}
+
+
+
+			case:
+				error(p, index_pos, "Indexing is only allowed on an array or dictionary");
+			}
+
+		case:
+			loop = false;
+		}
+	}
+
+	return operand, pos;
+}
+
+parse_unary_expr :: proc(p: ^Parser) -> (Value, Pos) {
+	op := p.curr_token;
+	switch p.curr_token.kind {
+	case Kind.At:
+		next_token(p);
+		tok := expect_token(p, Kind.String);
+		v, ok := lookup_value(p, tok.lit);
+		if !ok do error(p, tok.pos, "Undeclared identifier %s", tok.lit);
+		return parse_atom_expr(p, v, tok.pos);
+
+	case Kind.Add, Kind.Sub:
+		next_token(p);
+		// TODO(bill): Calcuate values as you go!
+		expr, pos := parse_unary_expr(p);
+
+		switch e in expr {
+		case i64: if op.kind == Kind.Sub do return -e, pos;
+		case f64: if op.kind == Kind.Sub do return -e, pos;
+		case:
+			error(p, op.pos, "Unary operator %s can only be used on integers or floats", op.lit);
+			return nil, op.pos;
+		}
+
+		return expr, op.pos;
+
+	case Kind.Not:
+		next_token(p);
+		expr, pos := parse_unary_expr(p);
+		if v, ok := expr.(bool); ok {
+			return !v, op.pos;
+		}
+		error(p, op.pos, "Unary operator %s can only be used on booleans", op.lit);
+		return nil, op.pos;
+	}
+
+	return parse_atom_expr(p, parse_operand(p));
+}
+
+
+value_order :: proc(v: Value) -> int {
+	switch _ in v {
+	case bool, string:
+		return 1;
+	case i64:
+		return 2;
+	case f64:
+		return 3;
+	}
+	return 0;
+}
+
+match_values :: proc(left, right: ^Value) -> bool {
+	if value_order(right^) < value_order(left^) {
+		return match_values(right, left);
+	}
+
+	switch x in left^ {
+	case:
+		right^ = left^;
+	case bool, string:
+		return true;
+	case i64:
+		switch y in right^ {
+		case i64:
+			return true;
+		case f64:
+			left^ = f64(x);
+			return true;
+		}
+
+	case f64:
+		switch y in right {
+		case f64:
+			return true;
+		}
+	}
+
+	return false;
+}
+
+calculate_binary_value :: proc(p: ^Parser, op: Kind, x, y: Value) -> (Value, bool) {
+	// TODO(bill): Calculate value as you go!
+	match_values(&x, &y);
+
+
+	switch a in x {
+	case: return x, true;
+
+	case bool:
+		b, ok := y.(bool);
+		if !ok do return nil, false;
+		switch op {
+		case Kind.Eq:    return a == b, true;
+		case Kind.NotEq: return a != b, true;
+		case Kind.And:   return a && b, true;
+		case Kind.Or:    return a || b, true;
+		}
+
+	case i64:
+		b, ok := y.(i64);
+		if !ok do return nil, false;
+		switch op {
+		case Kind.Add:   return a + b, true;
+		case Kind.Sub:   return a - b, true;
+		case Kind.Mul:   return a * b, true;
+		case Kind.Quo:   return a / b, true;
+		case Kind.Rem:   return a % b, true;
+		case Kind.Eq:    return a == b, true;
+		case Kind.NotEq: return a != b, true;
+		case Kind.Lt:    return a <  b, true;
+		case Kind.Gt:    return a >  b, true;
+		case Kind.LtEq:  return a <= b, true;
+		case Kind.GtEq:  return a >= b, true;
+		}
+
+	case f64:
+		b, ok := y.(f64);
+		if !ok do return nil, false;
+
+		switch op {
+		case Kind.Add:   return a + b, true;
+		case Kind.Sub:   return a - b, true;
+		case Kind.Mul:   return a * b, true;
+		case Kind.Quo:   return a / b, true;
+		case Kind.Eq:    return a == b, true;
+		case Kind.NotEq: return a != b, true;
+		case Kind.Lt:    return a <  b, true;
+		case Kind.Gt:    return a >  b, true;
+		case Kind.LtEq:  return a <= b, true;
+		case Kind.GtEq:  return a >= b, true;
+		}
+
+	case string:
+		b, ok := y.(string);
+		if !ok do return nil, false;
+
+		switch op {
+		case Kind.Add:
+			n := len(a) + len(b);
+			data := make([]byte, n);
+			copy(data[:], cast([]byte)a);
+			copy(data[len(a):], cast([]byte)b);
+			s := string(data);
+			append(&p.allocated_strings, s);
+			return s, true;
+
+		case Kind.Eq:    return a == b, true;
+		case Kind.NotEq: return a != b, true;
+		case Kind.Lt:    return a <  b, true;
+		case Kind.Gt:    return a >  b, true;
+		case Kind.LtEq:  return a <= b, true;
+		case Kind.GtEq:  return a >= b, true;
+		}
+	}
+
+	return nil, false;
+}
+
+parse_binary_expr :: proc(p: ^Parser, prec_in: int) -> (Value, Pos) {
+	expr, pos := parse_unary_expr(p);
+	for prec := precedence(p.curr_token.kind); prec >= prec_in; prec -= 1 {
+		for {
+			op := p.curr_token;
+			op_prec := precedence(op.kind);
+			if op_prec != prec {
+				break;
+			}
+			expect_operator(p);
+
+			if op.kind == Kind.Question {
+				cond := expr;
+				x, x_pos := parse_expr(p);
+				expect_token(p, Kind.Colon);
+				y, y_pos := parse_expr(p);
+
+				if t, ok := cond.(bool); ok {
+					expr = t ? x : y;
+				} else {
+					error(p, pos, "Condition must be a boolean");
+				}
+
+			} else {
+				right, right_pos := parse_binary_expr(p, prec+1);
+				if right == nil {
+					error(p, right_pos, "Expected expression on the right-hand side of the binary operator %s", op.lit);
+				}
+				left := expr;
+				ok: bool;
+				expr, ok = calculate_binary_value(p, op.kind, left, right);
+				if !ok {
+					error(p, pos, "Invalid binary operation");
+				}
+			}
+		}
+	}
+	return expr, pos;
+}
+
+parse_expr :: proc(p: ^Parser) -> (Value, Pos) {
+	return parse_binary_expr(p, 1);
+}
+
+expect_semicolon :: proc(p: ^Parser) {
+	kind := p.curr_token.kind;
+
+	switch kind {
+	case Kind.Comma:
+		error(p, p.curr_token.pos, "Expected ';', got ','");
+		next_token(p);
+	case Kind.Semicolon:
+		next_token(p);
+	case Kind.EOF:
+		// okay
+	case:
+		error(p, p.curr_token.pos, "Expected ';', got %s", p.curr_token.lit);
+		fix_advance(p);
+	}
+}
+
+parse_assignment :: proc(p: ^Parser) -> bool {
+	top_dict :: proc(p: ^Parser) -> ^Dict {
+		assert(len(p.dict_stack) > 0);
+		return p.dict_stack[len(p.dict_stack)-1];
+	}
+
+	if p.curr_token.kind == Kind.Semicolon {
+		next_token(p);
+		return true;
+	}
+	if p.curr_token.kind == Kind.EOF {
+		return false;
+	}
+
+	tok := p.curr_token;
+	if allow_token(p, Kind.Ident) || allow_token(p, Kind.String) {
+		expect_token(p, Kind.Assign);
+		name, ok := unquote_string(p, tok);
+		if !ok do error(p, tok.pos, "Unable to unquote string");
+		expr, pos := parse_expr(p);
+		d := top_dict(p);
+		if _, ok := d[name]; ok {
+			error(p, tok.pos, "Previous declaration of %s", name);
+		} else {
+			d[name] = expr;
+		}
+		expect_semicolon(p);
+		return true;
+	}
+	error(p, tok.pos, "Expected an assignment, got %s", kind_to_string[tok.kind]);
+	fix_advance(p);
+	return false;
+}
diff --git a/core/encoding/cel/doc.odin b/core/encoding/cel/doc.odin
new file mode 100644
index 000000000..2d224d292
--- /dev/null
+++ b/core/encoding/cel/doc.odin
@@ -0,0 +1,51 @@
+/*
+	package cel
+
+	sample := `
+	x = 123;
+	y = 321.456;
+	z = x * (y - 1) / 2;
+	w = "foo" + "bar";
+
+	# This is a comment
+
+	asd = "Semicolons are optional"
+
+	a = {id = {b = 123}} # Dict
+	b = a.id.b
+
+	f = [1, 4, 9] # Array
+	g = f[2]
+
+	h = x < y and w == "foobar"
+	i = h ? 123 : "google"
+
+	j = nil
+
+	"127.0.0.1" = "value" # Keys can be strings
+
+	"foo" = {
+		"bar" = {
+			"baz" = 123, # optional commas if newline is present
+			"zab" = 456,
+			"abz" = 789,
+		},
+	};
+
+	bar = @"foo"["bar"].baz
+	`;
+
+
+	main :: proc() {
+		p, ok := create_from_string(sample);
+		if !ok {
+			return;
+		}
+		defer destroy(p);
+
+		if p.error_count == 0 {
+			print(p);
+		}
+	}
+*/
+package cel
diff --git a/core/encoding/cel/token.odin b/core/encoding/cel/token.odin
new file mode 100644
index 000000000..46c8d61be
--- /dev/null
+++ b/core/encoding/cel/token.odin
@@ -0,0 +1,520 @@
+package cel
+
+import "core:fmt"
+import "core:unicode/utf8"
+
+using Kind :: enum {
+	Illegal,
+	EOF,
+	Comment,
+
+	_literal_start,
+		Ident,
+		Integer,
+		Float,
+		Char,
+		String,
+	_literal_end,
+
+	_keyword_start,
+		True,  // true
+		False, // false
+		Nil,   // nil
+	_keyword_end,
+
+
+	_operator_start,
+		Question, // ?
+
+		And,   // and
+		Or,    // or
+
+		Add,   // +
+		Sub,   // -
+		Mul,   // *
+		Quo,   // /
+		Rem,   // %
+
+		Not,   // !
+
+		Eq,    // ==
+		NotEq, // !=
+		Lt,    // <
+		Gt,    // >
+		LtEq,  // <=
+		GtEq,  // >=
+
+		At,    // @
+	_operator_end,
+
+	_punc_start,
+		Assign, // =
+
+		Open_Paren,    // (
+		Close_Paren,   // )
+		Open_Bracket,  // [
+		Close_Bracket, // ]
+		Open_Brace,    // {
+		Close_Brace,   // }
+
+		Colon,     // :
+		Semicolon, // ;
+		Comma,     // ,
+		Period,    // .
+	_punc_end,
+}
+
+
+Pos :: struct {
+	file:   string,
+	line:   int,
+	column: int,
+}
+
+Token :: struct {
+	kind:      Kind,
+	using pos: Pos,
+	lit:       string,
+}
+
+Tokenizer :: struct {
+	src: []byte,
+
+	file:        string, // May not be used
+
+	curr_rune:   rune,
+	offset:      int,
+	read_offset: int,
+	line_offset: int,
+	line_count:  int,
+
+	insert_semi: bool,
+
+	error_count: int,
+}
+
+
+keywords := map[string]Kind{
+	"true"  = True,
+	"false" = False,
+	"nil"   = Nil,
+	"and"   = And,
+	"or"    = Or,
+};
+
+kind_to_string := [len(Kind)]string{
+	"illegal",
+	"EOF",
+	"comment",
+
+	"",
+	"identifier",
+	"integer",
+	"float",
+	"character",
+	"string",
+	"",
+
+	"",
+	"true", "false", "nil",
+	"",
+
+	"",
+	"?", "and", "or",
+	"+", "-", "*", "/", "%",
+	"!",
+	"==", "!=", "<", ">", "<=", ">=",
+	"@",
+	"",
+
+	"",
+	"=",
+	"(", ")",
+	"[", "]",
+	"{", "}",
+	":", ";", ",", ".",
+	"",
+};
+
+precedence :: proc(op: Kind) -> int {
+	switch op {
+	case Question:
+		return 1;
+	case Or:
+		return 2;
+	case And:
+		return 3;
+	case Eq, NotEq, Lt, Gt, LtEq, GtEq:
+		return 4;
+	case Add, Sub:
+		return 5;
+	case Mul, Quo, Rem:
+		return 6;
+	}
+	return 0;
+}
+
+
+token_lookup :: proc(ident: string) -> Kind {
+	if tok, is_keyword := keywords[ident]; is_keyword {
+		return tok;
+	}
+	return Ident;
+}
+
+is_literal  :: proc(tok: Kind) -> bool do return _literal_start  < tok && tok < _literal_end;
+is_operator :: proc(tok: Kind) -> bool do return _operator_start < tok && tok < _operator_end;
+is_keyword  :: proc(tok: Kind) -> bool do return _keyword_start  < tok && tok < _keyword_end;
+
+
+tokenizer_init :: proc(t: ^Tokenizer, src: []byte, file := "") {
+	t.src = src;
+	t.file = file;
+	t.curr_rune   = ' ';
+	t.offset      = 0;
+	t.read_offset = 0;
+	t.line_offset = 0;
+	t.line_count  = 1;
+
+	advance_to_next_rune(t);
+	if t.curr_rune == utf8.RUNE_BOM {
+		advance_to_next_rune(t);
+	}
+}
+
+token_error :: proc(t: ^Tokenizer, msg: string, args: ..any) {
+	fmt.printf_err("%s(%d:%d) Error: ", t.file, t.line_count, t.read_offset-t.line_offset+1);
+	fmt.printf_err(msg, ..args);
+	fmt.println_err();
+	t.error_count += 1;
+}
+
+advance_to_next_rune :: proc(t: ^Tokenizer) {
+	if t.read_offset < len(t.src) {
+		t.offset = t.read_offset;
+		if t.curr_rune == '\n' {
+			t.line_offset = t.offset;
+			t.line_count += 1;
+		}
+		r, w := rune(t.src[t.read_offset]), 1;
+		switch {
+		case r == 0:
+			token_error(t, "Illegal character NUL");
+		case r >= utf8.RUNE_SELF:
+			r, w = utf8.decode_rune(t.src[t.read_offset:]);
+			if r == utf8.RUNE_ERROR && w == 1 {
+				token_error(t, "Illegal utf-8 encoding");
+			} else if r == utf8.RUNE_BOM && t.offset > 0 {
+				token_error(t, "Illegal byte order mark");
+			}
+		}
+
+		t.read_offset += w;
+		t.curr_rune = r;
+	} else {
+		t.offset = len(t.src);
+		if t.curr_rune == '\n' {
+			t.line_offset = t.offset;
+			t.line_count += 1;
+		}
+		t.curr_rune = utf8.RUNE_EOF;
+	}
+}
+
+
+get_pos :: proc(t: ^Tokenizer) -> Pos {
+	return Pos {
+		file   = t.file,
+		line   = t.line_count,
+		column = t.offset - t.line_offset + 1,
+	};
+}
+
+is_letter :: proc(r: rune) -> bool {
+	switch r {
+	case 'a'..'z', 'A'..'Z', '_':
+		return true;
+	}
+	return false;
+}
+
+is_digit :: proc(r: rune) -> bool {
+	switch r {
+	case '0'..'9':
+		return true;
+	}
+	return false;
+}
+
+skip_whitespace :: proc(t: ^Tokenizer) {
+	loop: for {
+		switch t.curr_rune {
+		case '\n':
+			if t.insert_semi {
+				break loop;
+			}
+			fallthrough;
+		case ' ', '\t', '\r', '\v', '\f':
+			advance_to_next_rune(t);
+
+		case:
+			break loop;
+		}
+	}
+}
+
+scan_identifier :: proc(t: ^Tokenizer) -> string {
+	offset := t.offset;
+	for is_letter(t.curr_rune) || is_digit(t.curr_rune) {
+		advance_to_next_rune(t);
+	}
+	return string(t.src[offset : t.offset]);
+}
+
+digit_value :: proc(r: rune) -> int {
+	switch r {
+	case '0'..'9': return int(r - '0');
+	case 'a'..'f': return int(r - 'a' + 10);
+	case 'A'..'F': return int(r - 'A' + 10);
+	}
+	return 16;
+}
+
+scan_number :: proc(t: ^Tokenizer, seen_decimal_point: bool) -> (Kind, string) {
+	scan_manitissa :: proc(t: ^Tokenizer, base: int) {
+		for digit_value(t.curr_rune) < base || t.curr_rune == '_' {
+			advance_to_next_rune(t);
+		}
+	}
+	scan_exponent :: proc(t: ^Tokenizer, tok: Kind, offset: int) -> (Kind, string) {
+		if t.curr_rune == 'e' || t.curr_rune == 'E' {
+			tok = Float;
+			advance_to_next_rune(t);
+			if t.curr_rune == '-' || t.curr_rune == '+' {
+				advance_to_next_rune(t);
+			}
+			if digit_value(t.curr_rune) < 10 {
+				scan_manitissa(t, 10);
+			} else {
+				token_error(t, "Illegal floating point exponent");
+			}
+		}
+		return tok, string(t.src[offset : t.offset]);
+	}
+	scan_fraction :: proc(t: ^Tokenizer, tok: Kind, offset: int) -> (Kind, string) {
+		if t.curr_rune == '.' {
+			tok = Float;
+			advance_to_next_rune(t);
+			scan_manitissa(t, 10);
+		}
+
+		return scan_exponent(t, tok, offset);
+	}
+
+	offset := t.offset;
+	tok := Integer;
+
+	if seen_decimal_point {
+		offset -= 1;
+		tok = Float;
+		scan_manitissa(t, 10);
+		return scan_exponent(t, tok, offset);
+	}
+
+	if t.curr_rune == '0' {
+		offset := t.offset;
+		advance_to_next_rune(t);
+		switch t.curr_rune {
+		case 'b', 'B':
+			advance_to_next_rune(t);
+			scan_manitissa(t, 2);
+			if t.offset - offset <= 2 {
+				token_error(t, "Illegal binary number");
+			}
+		case 'o', 'O':
+			advance_to_next_rune(t);
+			scan_manitissa(t, 8);
+			if t.offset - offset <= 2 {
+				token_error(t, "Illegal octal number");
+			}
+		case 'x', 'X':
+			advance_to_next_rune(t);
+			scan_manitissa(t, 16);
+			if t.offset - offset <= 2 {
+				token_error(t, "Illegal hexadecimal number");
+			}
+		case:
+			scan_manitissa(t, 10);
+			switch t.curr_rune {
+			case '.', 'e', 'E':
+				return scan_fraction(t, tok, offset);
+			}
+		}
+
+		return tok, string(t.src[offset:t.offset]);
+	}
+
+	scan_manitissa(t, 10);
+
+	return scan_fraction(t, tok, offset);
+}
+
+scan :: proc(t: ^Tokenizer) -> Token {
+	skip_whitespace(t);
+
+	offset := t.offset;
+
+	tok: Kind;
+	pos := get_pos(t);
+	lit: string;
+
+	insert_semi := false;
+
+
+	switch r := t.curr_rune; {
+	case is_letter(r):
+		insert_semi = true;
+		lit = scan_identifier(t);
+		tok = Ident;
+		if len(lit) > 1 {
+			tok = token_lookup(lit);
+		}
+
+	case '0' <= r && r <= '9':
+		insert_semi = true;
+		tok, lit = scan_number(t, false);
+
+	case:
+		advance_to_next_rune(t);
+		switch r {
+		case -1:
+			if t.insert_semi {
+				t.insert_semi = false;
+				return Token{Semicolon, pos, "\n"};
+			}
+			return Token{EOF, pos, "\n"};
+
+		case '\n':
+			t.insert_semi = false;
+			return Token{Semicolon, pos, "\n"};
+
+		case '"':
+			insert_semi = true;
+			quote := r;
+			tok = String;
+			for {
+				r := t.curr_rune;
+				if r == '\n' || r < 0 {
+					token_error(t, "String literal not terminated");
+					break;
+				}
+				advance_to_next_rune(t);
+				if r == quote {
+					break;
+				}
+				// TODO(bill); Handle properly
+				if r == '\\' && t.curr_rune == quote {
+					advance_to_next_rune(t);
+				}
+			}
+
+			lit = string(t.src[offset+1:t.offset-1]);
+
+
+		case '#':
+			for t.curr_rune != '\n' && t.curr_rune >= 0 {
+				advance_to_next_rune(t);
+			}
+			if t.insert_semi {
+				t.insert_semi = false;
+				return Token{Semicolon, pos, "\n"};
+			}
+			// Recursive!
+			return scan(t);
+
+		case '?': tok = Question;
+		case ':': tok = Colon;
+		case '@': tok = At;
+
+		case ';':
+			tok = Semicolon;
+			lit = ";";
+		case ',': tok = Comma;
+
+		case '(':
+			tok = Open_Paren;
+		case ')':
+			insert_semi = true;
+			tok = Close_Paren;
+
+		case '[':
+			tok = Open_Bracket;
+		case ']':
+			insert_semi = true;
+			tok = Close_Bracket;
+
+		case '{':
+			tok = Open_Brace;
+		case '}':
+			insert_semi = true;
+			tok = Close_Brace;
+
+		case '+': tok = Add;
+		case '-': tok = Sub;
+		case '*': tok = Mul;
+		case '/': tok = Quo;
+		case '%': tok = Rem;
+
+		case '!':
+			tok = Not;
+			if t.curr_rune == '=' {
+				advance_to_next_rune(t);
+				tok = NotEq;
+			}
+
+		case '=':
+			tok = Assign;
+			if t.curr_rune == '=' {
+				advance_to_next_rune(t);
+				tok = Eq;
+			}
+
+		case '<':
+			tok = Lt;
+			if t.curr_rune == '=' {
+				advance_to_next_rune(t);
+				tok = LtEq;
+			}
+
+		case '>':
+			tok = Gt;
+			if t.curr_rune == '=' {
+				advance_to_next_rune(t);
+				tok = GtEq;
+			}
+
+		case '.':
+			if '0' <= t.curr_rune && t.curr_rune <= '9' {
+				insert_semi = true;
+				tok, lit = scan_number(t, true);
+			} else {
+				tok = Period;
+			}
+
+		case:
+			if r != utf8.RUNE_BOM {
+				token_error(t, "Illegal character '%r'", r);
+			}
+			insert_semi = t.insert_semi;
+			tok = Illegal;
+		}
+	}
+
+	t.insert_semi = insert_semi;
+
+	if lit == "" {
+		lit = string(t.src[offset:t.offset]);
+	}
+
+	return Token{tok, pos, lit};
+}
author	gingerBill <bill@gingerbill.org>	2018-12-03 20:26:10 +0000
committer	gingerBill <bill@gingerbill.org>	2018-12-03 20:26:10 +0000
commit	50c3f4d74e7a332cea27edee3020b54bd3d3bc7d (patch)
tree	d5340cd15809578f5357f67cda4afcb7cb1abedd /core/encoding
parent	304c7594cd2d904072917bb79d0aa340090d8296 (diff)