aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2020-12-06 00:47:58 +0000
committergingerBill <bill@gingerbill.org>2020-12-06 00:47:58 +0000
commitca4657fd31b9efc7ab52f7e1b6f4145d5ed28fb7 (patch)
tree61ec40b602a2d5aef1dd79a42572c414f368b4e8
parentd94414b0f4eba7d6e42da1912a43731fcbbd94c1 (diff)
Add custom semicolon insertion to odin/tokenizer and odin/parser
-rw-r--r--core/odin/parser/parser.odin91
-rw-r--r--core/odin/tokenizer/token.odin13
-rw-r--r--core/odin/tokenizer/tokenizer.odin79
3 files changed, 157 insertions, 26 deletions
diff --git a/core/odin/parser/parser.odin b/core/odin/parser/parser.odin
index 4d94ed479..583a4cc03 100644
--- a/core/odin/parser/parser.odin
+++ b/core/odin/parser/parser.odin
@@ -190,6 +190,50 @@ peek_token_kind :: proc(p: ^Parser, kind: tokenizer.Token_Kind, lookahead := 0)
return;
}
+peek_token :: proc(p: ^Parser, lookahead := 0) -> (tok: tokenizer.Token) {
+ prev_parser := p^;
+ defer p^ = prev_parser;
+
+ p.tok.err = nil;
+ for i := 0; i <= lookahead; i += 1 {
+ advance_token(p);
+ }
+ tok = p.curr_tok;
+ return;
+}
+skip_possible_newline :: proc(p: ^Parser) -> bool {
+ if .Insert_Semicolon not_in p.tok.flags {
+ return false;
+ }
+
+ prev := p.curr_tok;
+ if tokenizer.is_newline(prev) {
+ advance_token(p);
+ return true;
+ }
+ return false;
+}
+
+skip_possible_newline_for_literal :: proc(p: ^Parser) -> bool {
+ if .Insert_Semicolon not_in p.tok.flags {
+ return false;
+ }
+
+ curr_pos := p.curr_tok.pos;
+ if tokenizer.is_newline(p.curr_tok) {
+ next := peek_token(p);
+ if curr_pos.line+1 >= next.pos.line {
+ #partial switch next.kind {
+ case .Open_Brace, .Else, .Where:
+ advance_token(p);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
next_token0 :: proc(p: ^Parser) -> bool {
p.curr_tok = tokenizer.scan(&p.tok);
@@ -280,7 +324,7 @@ expect_token :: proc(p: ^Parser, kind: tokenizer.Token_Kind) -> tokenizer.Token
prev := p.curr_tok;
if prev.kind != kind {
e := tokenizer.to_string(kind);
- g := tokenizer.to_string(prev.kind);
+ g := tokenizer.token_to_string(prev);
error(p, prev.pos, "expected '%s', got '%s'", e, g);
}
advance_token(p);
@@ -291,7 +335,7 @@ expect_token_after :: proc(p: ^Parser, kind: tokenizer.Token_Kind, msg: string)
prev := p.curr_tok;
if prev.kind != kind {
e := tokenizer.to_string(kind);
- g := tokenizer.to_string(prev.kind);
+ g := tokenizer.token_to_string(prev);
error(p, prev.pos, "expected '%s' after %s, got '%s'", e, msg, g);
}
advance_token(p);
@@ -303,7 +347,7 @@ expect_operator :: proc(p: ^Parser) -> tokenizer.Token {
if prev.kind == .If || prev.kind == .When {
// okay
} else if !tokenizer.is_operator(prev.kind) {
- g := tokenizer.to_string(prev.kind);
+ g := tokenizer.token_to_string(prev);
error(p, prev.pos, "expected an operator, got '%s'", g);
}
advance_token(p);
@@ -400,7 +444,16 @@ expect_semicolon :: proc(p: ^Parser, node: ^ast.Node) -> bool {
}
if node != nil {
- if prev.pos.line != p.curr_tok.pos.line {
+ if .Insert_Semicolon in p.tok.flags {
+ #partial switch p.curr_tok.kind {
+ case .Close_Brace, .Close_Paren, .Else, .EOF:
+ return true;
+ }
+
+ if is_semicolon_optional_for_node(p, node) {
+ return true;
+ }
+ } else if prev.pos.line != p.curr_tok.pos.line {
if is_semicolon_optional_for_node(p, node) {
return true;
}
@@ -420,7 +473,7 @@ expect_semicolon :: proc(p: ^Parser, node: ^ast.Node) -> bool {
}
}
- error(p, prev.pos, "expected ';', got %s", tokenizer.to_string(p.curr_tok.kind));
+ error(p, prev.pos, "expected ';', got %s", tokenizer.token_to_string(p.curr_tok));
return false;
}
@@ -493,6 +546,7 @@ parse_when_stmt :: proc(p: ^Parser) -> ^ast.When_Stmt {
body = convert_stmt_to_body(p, parse_stmt(p));
} else {
body = parse_block_stmt(p, true);
+ skip_possible_newline_for_literal(p);
}
if allow_token(p, .Else) {
@@ -568,6 +622,7 @@ parse_if_stmt :: proc(p: ^Parser) -> ^ast.If_Stmt {
body = convert_stmt_to_body(p, parse_stmt(p));
} else {
body = parse_block_stmt(p, false);
+ skip_possible_newline_for_literal(p);
}
if allow_token(p, .Else) {
@@ -629,6 +684,7 @@ parse_for_stmt :: proc(p: ^Parser) -> ^ast.Stmt {
body = convert_stmt_to_body(p, parse_stmt(p));
} else {
body = parse_body(p);
+ skip_possible_newline_for_literal(p);
}
range_stmt := ast.new(ast.Range_Stmt, tok.pos, body.end);
@@ -663,6 +719,7 @@ parse_for_stmt :: proc(p: ^Parser) -> ^ast.Stmt {
body = convert_stmt_to_body(p, parse_stmt(p));
} else {
body = parse_body(p);
+ skip_possible_newline_for_literal(p);
}
@@ -840,6 +897,8 @@ parse_attribute :: proc(p: ^Parser, tok: tokenizer.Token, open_kind, close_kind:
attribute.elems = elems[:];
attribute.close = close.pos;
+ skip_possible_newline(p);
+
decl := parse_stmt(p);
switch d in &decl.derived {
case ast.Value_Decl:
@@ -1028,10 +1087,11 @@ parse_stmt :: proc(p: ^Parser) -> ^ast.Stmt {
body = convert_stmt_to_body(p, parse_stmt(p));
} else {
body = parse_block_stmt(p, false);
+ skip_possible_newline_for_literal(p);
}
if bad_stmt {
- return ast.new(ast.Bad_Stmt, inline_tok.pos, end_pos(p.prev_tok));
+ return ast.new(ast.Bad_Stmt, inline_tok.pos, end_pos(p.prev_tok));
}
range_stmt := ast.new(ast.Inline_Range_Stmt, inline_tok.pos, body.end);
@@ -1206,7 +1266,7 @@ parse_stmt :: proc(p: ^Parser) -> ^ast.Stmt {
}
tok := advance_token(p);
- error(p, tok.pos, "expected a statement, got %s", tokenizer.to_string(tok.kind));
+ error(p, tok.pos, "expected a statement, got %s", tokenizer.token_to_string(tok));
s := ast.new(ast.Bad_Stmt, tok.pos, end_pos(tok));
return s;
}
@@ -2158,7 +2218,10 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr {
where_token: tokenizer.Token;
where_clauses: []^ast.Expr;
- if (p.curr_tok.kind == .Where) {
+
+ skip_possible_newline_for_literal(p);
+
+ if p.curr_tok.kind == .Where {
where_token = expect_token(p, .Where);
prev_level := p.expr_level;
p.expr_level = -1;
@@ -2334,7 +2397,10 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr {
where_token: tokenizer.Token;
where_clauses: []^ast.Expr;
- if (p.curr_tok.kind == .Where) {
+
+ skip_possible_newline_for_literal(p);
+
+ if p.curr_tok.kind == .Where {
where_token = expect_token(p, .Where);
where_prev_level := p.expr_level;
p.expr_level = -1;
@@ -2397,7 +2463,10 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr {
where_token: tokenizer.Token;
where_clauses: []^ast.Expr;
- if (p.curr_tok.kind == .Where) {
+
+ skip_possible_newline_for_literal(p);
+
+ if p.curr_tok.kind == .Where {
where_token = expect_token(p, .Where);
where_prev_level := p.expr_level;
p.expr_level = -1;
@@ -2730,7 +2799,7 @@ parse_atom_expr :: proc(p: ^Parser, value: ^ast.Expr, lhs: bool) -> (operand: ^a
case .Colon:
interval = advance_token(p);
is_slice_op = true;
- if (p.curr_tok.kind != .Close_Bracket && p.curr_tok.kind != .EOF) {
+ if p.curr_tok.kind != .Close_Bracket && p.curr_tok.kind != .EOF {
indicies[1] = parse_expr(p, false);
}
}
diff --git a/core/odin/tokenizer/token.odin b/core/odin/tokenizer/token.odin
index 54110cf02..997b4967d 100644
--- a/core/odin/tokenizer/token.odin
+++ b/core/odin/tokenizer/token.odin
@@ -283,6 +283,19 @@ tokens := [Token_Kind.COUNT]string {
custom_keyword_tokens: []string;
+
+is_newline :: proc(tok: Token) -> bool {
+ return tok.kind == .Semicolon && tok.text == "\n";
+}
+
+
+token_to_string :: proc(tok: Token) -> string {
+ if is_newline(tok) {
+ return "newline";
+ }
+ return to_string(tok.kind);
+}
+
to_string :: proc(kind: Token_Kind) -> string {
if Token_Kind.Invalid <= kind && kind < Token_Kind.COUNT {
return tokens[kind];
diff --git a/core/odin/tokenizer/tokenizer.odin b/core/odin/tokenizer/tokenizer.odin
index 132b63572..3df65e49b 100644
--- a/core/odin/tokenizer/tokenizer.odin
+++ b/core/odin/tokenizer/tokenizer.odin
@@ -1,22 +1,31 @@
package odin_tokenizer
import "core:fmt"
+import "core:unicode"
import "core:unicode/utf8"
Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any);
+Flag :: enum {
+ Insert_Semicolon,
+}
+Flags :: distinct bit_set[Flag; u32];
+
Tokenizer :: struct {
// Immutable data
path: string,
src: []byte,
err: Error_Handler,
+ flags: Flags,
+
// Tokenizing state
ch: rune,
offset: int,
read_offset: int,
line_offset: int,
line_count: int,
+ insert_semicolon: bool,
// Mutable data
error_count: int,
@@ -105,11 +114,18 @@ peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte {
}
skip_whitespace :: proc(t: ^Tokenizer) {
- for t.ch == ' ' ||
- t.ch == '\t' ||
- t.ch == '\n' ||
- t.ch == '\r' {
- advance_rune(t);
+ for {
+ switch t.ch {
+ case ' ', '\t', '\r':
+ advance_rune(t);
+ case '\n':
+ if t.insert_semicolon {
+ return;
+ }
+ advance_rune(t);
+ case:
+ return;
+ }
}
}
@@ -122,12 +138,13 @@ is_letter :: proc(r: rune) -> bool {
return true;
}
}
- // TODO(bill): Add unicode lookup tables
- return false;
+ return unicode.is_letter(r);
}
is_digit :: proc(r: rune) -> bool {
- // TODO(bill): Add unicode lookup tables
- return '0' <= r && r <= '9';
+ if '0' <= r && r <= '9' {
+ return true;
+ }
+ return unicode.is_digit(r);
}
@@ -491,6 +508,8 @@ scan :: proc(t: ^Tokenizer) -> Token {
lit: string;
pos := offset_to_pos(t, offset);
+ insert_semicolon := false;
+
switch ch := t.ch; true {
case is_letter(ch):
lit = scan_identifier(t);
@@ -509,24 +528,39 @@ scan :: proc(t: ^Tokenizer) -> Token {
break check_keyword;
}
}
- if kind == .Ident && lit == "notin" {
- kind = .Not_In;
+
+ #partial switch kind {
+ case .Ident, .Context, .Typeid, .Break, .Continue, .Fallthrough, .Return:
+ insert_semicolon = true;
}
}
case '0' <= ch && ch <= '9':
+ insert_semicolon = true;
kind, lit = scan_number(t, false);
case:
advance_rune(t);
switch ch {
case -1:
kind = .EOF;
+ if t.insert_semicolon {
+ t.insert_semicolon = false;
+ kind = .Semicolon;
+ lit = "\n";
+ }
+ case '\n':
+ t.insert_semicolon = false;
+ kind = .Semicolon;
+ lit = "\n";
case '"':
+ insert_semicolon = true;
kind = .String;
lit = scan_string(t);
case '\'':
+ insert_semicolon = true;
kind = .Rune;
lit = scan_rune(t);
case '`':
+ insert_semicolon = true;
kind = .String;
lit = scan_raw_string(t);
case '=':
@@ -540,10 +574,13 @@ scan :: proc(t: ^Tokenizer) -> Token {
case '#':
kind = .Hash;
if t.ch == '!' {
+ insert_semicolon = t.insert_semicolon;
kind = .Comment;
lit = scan_comment(t);
}
- case '?': kind = .Question;
+ case '?':
+ insert_semicolon = true;
+ kind = .Question;
case '@': kind = .At;
case '$': kind = .Dollar;
case '^': kind = .Pointer;
@@ -562,6 +599,7 @@ scan :: proc(t: ^Tokenizer) -> Token {
case '*': kind = switch2(t, .Mul, .Mul_Eq);
case '/':
if t.ch == '/' || t.ch == '*' {
+ insert_semicolon = t.insert_semicolon;
kind = .Comment;
lit = scan_comment(t);
} else {
@@ -604,11 +642,17 @@ scan :: proc(t: ^Tokenizer) -> Token {
case ',': kind = .Comma;
case ';': kind = .Semicolon;
case '(': kind = .Open_Paren;
- case ')': kind = .Close_Paren;
+ case ')':
+ insert_semicolon = true;
+ kind = .Close_Paren;
case '[': kind = .Open_Bracket;
- case ']': kind = .Close_Bracket;
+ case ']':
+ insert_semicolon = true;
+ kind = .Close_Bracket;
case '{': kind = .Open_Brace;
- case '}': kind = .Close_Brace;
+ case '}':
+ insert_semicolon = true;
+ kind = .Close_Brace;
case '\\': kind = .Back_Slash;
@@ -616,10 +660,15 @@ scan :: proc(t: ^Tokenizer) -> Token {
if ch != utf8.RUNE_BOM {
error(t, t.offset, "illegal character '%r': %d", ch, ch);
}
+ insert_semicolon = t.insert_semicolon; // preserve insert_semicolon info
kind = .Invalid;
}
}
+ if .Insert_Semicolon in t.flags {
+ t.insert_semicolon = insert_semicolon;
+ }
+
if lit == "" {
lit = string(t.src[offset : t.offset]);
}