diff options
| author | gingerBill <bill@gingerbill.org> | 2020-12-06 00:47:58 +0000 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2020-12-06 00:47:58 +0000 |
| commit | ca4657fd31b9efc7ab52f7e1b6f4145d5ed28fb7 (patch) | |
| tree | 61ec40b602a2d5aef1dd79a42572c414f368b4e8 | |
| parent | d94414b0f4eba7d6e42da1912a43731fcbbd94c1 (diff) | |
Add custom semicolon insertion to odin/tokenizer and odin/parser
| -rw-r--r-- | core/odin/parser/parser.odin | 91 | ||||
| -rw-r--r-- | core/odin/tokenizer/token.odin | 13 | ||||
| -rw-r--r-- | core/odin/tokenizer/tokenizer.odin | 79 |
3 files changed, 157 insertions, 26 deletions
diff --git a/core/odin/parser/parser.odin b/core/odin/parser/parser.odin index 4d94ed479..583a4cc03 100644 --- a/core/odin/parser/parser.odin +++ b/core/odin/parser/parser.odin @@ -190,6 +190,50 @@ peek_token_kind :: proc(p: ^Parser, kind: tokenizer.Token_Kind, lookahead := 0) return; } +peek_token :: proc(p: ^Parser, lookahead := 0) -> (tok: tokenizer.Token) { + prev_parser := p^; + defer p^ = prev_parser; + + p.tok.err = nil; + for i := 0; i <= lookahead; i += 1 { + advance_token(p); + } + tok = p.curr_tok; + return; +} +skip_possible_newline :: proc(p: ^Parser) -> bool { + if .Insert_Semicolon not_in p.tok.flags { + return false; + } + + prev := p.curr_tok; + if tokenizer.is_newline(prev) { + advance_token(p); + return true; + } + return false; +} + +skip_possible_newline_for_literal :: proc(p: ^Parser) -> bool { + if .Insert_Semicolon not_in p.tok.flags { + return false; + } + + curr_pos := p.curr_tok.pos; + if tokenizer.is_newline(p.curr_tok) { + next := peek_token(p); + if curr_pos.line+1 >= next.pos.line { + #partial switch next.kind { + case .Open_Brace, .Else, .Where: + advance_token(p); + return true; + } + } + } + + return false; +} + next_token0 :: proc(p: ^Parser) -> bool { p.curr_tok = tokenizer.scan(&p.tok); @@ -280,7 +324,7 @@ expect_token :: proc(p: ^Parser, kind: tokenizer.Token_Kind) -> tokenizer.Token prev := p.curr_tok; if prev.kind != kind { e := tokenizer.to_string(kind); - g := tokenizer.to_string(prev.kind); + g := tokenizer.token_to_string(prev); error(p, prev.pos, "expected '%s', got '%s'", e, g); } advance_token(p); @@ -291,7 +335,7 @@ expect_token_after :: proc(p: ^Parser, kind: tokenizer.Token_Kind, msg: string) prev := p.curr_tok; if prev.kind != kind { e := tokenizer.to_string(kind); - g := tokenizer.to_string(prev.kind); + g := tokenizer.token_to_string(prev); error(p, prev.pos, "expected '%s' after %s, got '%s'", e, msg, g); } advance_token(p); @@ -303,7 +347,7 @@ expect_operator :: proc(p: ^Parser) -> tokenizer.Token { if prev.kind == .If || prev.kind == .When { // okay } else if !tokenizer.is_operator(prev.kind) { - g := tokenizer.to_string(prev.kind); + g := tokenizer.token_to_string(prev); error(p, prev.pos, "expected an operator, got '%s'", g); } advance_token(p); @@ -400,7 +444,16 @@ expect_semicolon :: proc(p: ^Parser, node: ^ast.Node) -> bool { } if node != nil { - if prev.pos.line != p.curr_tok.pos.line { + if .Insert_Semicolon in p.tok.flags { + #partial switch p.curr_tok.kind { + case .Close_Brace, .Close_Paren, .Else, .EOF: + return true; + } + + if is_semicolon_optional_for_node(p, node) { + return true; + } + } else if prev.pos.line != p.curr_tok.pos.line { if is_semicolon_optional_for_node(p, node) { return true; } @@ -420,7 +473,7 @@ expect_semicolon :: proc(p: ^Parser, node: ^ast.Node) -> bool { } } - error(p, prev.pos, "expected ';', got %s", tokenizer.to_string(p.curr_tok.kind)); + error(p, prev.pos, "expected ';', got %s", tokenizer.token_to_string(p.curr_tok)); return false; } @@ -493,6 +546,7 @@ parse_when_stmt :: proc(p: ^Parser) -> ^ast.When_Stmt { body = convert_stmt_to_body(p, parse_stmt(p)); } else { body = parse_block_stmt(p, true); + skip_possible_newline_for_literal(p); } if allow_token(p, .Else) { @@ -568,6 +622,7 @@ parse_if_stmt :: proc(p: ^Parser) -> ^ast.If_Stmt { body = convert_stmt_to_body(p, parse_stmt(p)); } else { body = parse_block_stmt(p, false); + skip_possible_newline_for_literal(p); } if allow_token(p, .Else) { @@ -629,6 +684,7 @@ parse_for_stmt :: proc(p: ^Parser) -> ^ast.Stmt { body = convert_stmt_to_body(p, parse_stmt(p)); } else { body = parse_body(p); + skip_possible_newline_for_literal(p); } range_stmt := ast.new(ast.Range_Stmt, tok.pos, body.end); @@ -663,6 +719,7 @@ parse_for_stmt :: proc(p: ^Parser) -> ^ast.Stmt { body = convert_stmt_to_body(p, parse_stmt(p)); } else { body = parse_body(p); + skip_possible_newline_for_literal(p); } @@ -840,6 +897,8 @@ parse_attribute :: proc(p: ^Parser, tok: tokenizer.Token, open_kind, close_kind: attribute.elems = elems[:]; attribute.close = close.pos; + skip_possible_newline(p); + decl := parse_stmt(p); switch d in &decl.derived { case ast.Value_Decl: @@ -1028,10 +1087,11 @@ parse_stmt :: proc(p: ^Parser) -> ^ast.Stmt { body = convert_stmt_to_body(p, parse_stmt(p)); } else { body = parse_block_stmt(p, false); + skip_possible_newline_for_literal(p); } if bad_stmt { - return ast.new(ast.Bad_Stmt, inline_tok.pos, end_pos(p.prev_tok)); + return ast.new(ast.Bad_Stmt, inline_tok.pos, end_pos(p.prev_tok)); } range_stmt := ast.new(ast.Inline_Range_Stmt, inline_tok.pos, body.end); @@ -1206,7 +1266,7 @@ parse_stmt :: proc(p: ^Parser) -> ^ast.Stmt { } tok := advance_token(p); - error(p, tok.pos, "expected a statement, got %s", tokenizer.to_string(tok.kind)); + error(p, tok.pos, "expected a statement, got %s", tokenizer.token_to_string(tok)); s := ast.new(ast.Bad_Stmt, tok.pos, end_pos(tok)); return s; } @@ -2158,7 +2218,10 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr { where_token: tokenizer.Token; where_clauses: []^ast.Expr; - if (p.curr_tok.kind == .Where) { + + skip_possible_newline_for_literal(p); + + if p.curr_tok.kind == .Where { where_token = expect_token(p, .Where); prev_level := p.expr_level; p.expr_level = -1; @@ -2334,7 +2397,10 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr { where_token: tokenizer.Token; where_clauses: []^ast.Expr; - if (p.curr_tok.kind == .Where) { + + skip_possible_newline_for_literal(p); + + if p.curr_tok.kind == .Where { where_token = expect_token(p, .Where); where_prev_level := p.expr_level; p.expr_level = -1; @@ -2397,7 +2463,10 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr { where_token: tokenizer.Token; where_clauses: []^ast.Expr; - if (p.curr_tok.kind == .Where) { + + skip_possible_newline_for_literal(p); + + if p.curr_tok.kind == .Where { where_token = expect_token(p, .Where); where_prev_level := p.expr_level; p.expr_level = -1; @@ -2730,7 +2799,7 @@ parse_atom_expr :: proc(p: ^Parser, value: ^ast.Expr, lhs: bool) -> (operand: ^a case .Colon: interval = advance_token(p); is_slice_op = true; - if (p.curr_tok.kind != .Close_Bracket && p.curr_tok.kind != .EOF) { + if p.curr_tok.kind != .Close_Bracket && p.curr_tok.kind != .EOF { indicies[1] = parse_expr(p, false); } } diff --git a/core/odin/tokenizer/token.odin b/core/odin/tokenizer/token.odin index 54110cf02..997b4967d 100644 --- a/core/odin/tokenizer/token.odin +++ b/core/odin/tokenizer/token.odin @@ -283,6 +283,19 @@ tokens := [Token_Kind.COUNT]string { custom_keyword_tokens: []string; + +is_newline :: proc(tok: Token) -> bool { + return tok.kind == .Semicolon && tok.text == "\n"; +} + + +token_to_string :: proc(tok: Token) -> string { + if is_newline(tok) { + return "newline"; + } + return to_string(tok.kind); +} + to_string :: proc(kind: Token_Kind) -> string { if Token_Kind.Invalid <= kind && kind < Token_Kind.COUNT { return tokens[kind]; diff --git a/core/odin/tokenizer/tokenizer.odin b/core/odin/tokenizer/tokenizer.odin index 132b63572..3df65e49b 100644 --- a/core/odin/tokenizer/tokenizer.odin +++ b/core/odin/tokenizer/tokenizer.odin @@ -1,22 +1,31 @@ package odin_tokenizer import "core:fmt" +import "core:unicode" import "core:unicode/utf8" Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any); +Flag :: enum { + Insert_Semicolon, +} +Flags :: distinct bit_set[Flag; u32]; + Tokenizer :: struct { // Immutable data path: string, src: []byte, err: Error_Handler, + flags: Flags, + // Tokenizing state ch: rune, offset: int, read_offset: int, line_offset: int, line_count: int, + insert_semicolon: bool, // Mutable data error_count: int, @@ -105,11 +114,18 @@ peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte { } skip_whitespace :: proc(t: ^Tokenizer) { - for t.ch == ' ' || - t.ch == '\t' || - t.ch == '\n' || - t.ch == '\r' { - advance_rune(t); + for { + switch t.ch { + case ' ', '\t', '\r': + advance_rune(t); + case '\n': + if t.insert_semicolon { + return; + } + advance_rune(t); + case: + return; + } } } @@ -122,12 +138,13 @@ is_letter :: proc(r: rune) -> bool { return true; } } - // TODO(bill): Add unicode lookup tables - return false; + return unicode.is_letter(r); } is_digit :: proc(r: rune) -> bool { - // TODO(bill): Add unicode lookup tables - return '0' <= r && r <= '9'; + if '0' <= r && r <= '9' { + return true; + } + return unicode.is_digit(r); } @@ -491,6 +508,8 @@ scan :: proc(t: ^Tokenizer) -> Token { lit: string; pos := offset_to_pos(t, offset); + insert_semicolon := false; + switch ch := t.ch; true { case is_letter(ch): lit = scan_identifier(t); @@ -509,24 +528,39 @@ scan :: proc(t: ^Tokenizer) -> Token { break check_keyword; } } - if kind == .Ident && lit == "notin" { - kind = .Not_In; + + #partial switch kind { + case .Ident, .Context, .Typeid, .Break, .Continue, .Fallthrough, .Return: + insert_semicolon = true; } } case '0' <= ch && ch <= '9': + insert_semicolon = true; kind, lit = scan_number(t, false); case: advance_rune(t); switch ch { case -1: kind = .EOF; + if t.insert_semicolon { + t.insert_semicolon = false; + kind = .Semicolon; + lit = "\n"; + } + case '\n': + t.insert_semicolon = false; + kind = .Semicolon; + lit = "\n"; case '"': + insert_semicolon = true; kind = .String; lit = scan_string(t); case '\'': + insert_semicolon = true; kind = .Rune; lit = scan_rune(t); case '`': + insert_semicolon = true; kind = .String; lit = scan_raw_string(t); case '=': @@ -540,10 +574,13 @@ scan :: proc(t: ^Tokenizer) -> Token { case '#': kind = .Hash; if t.ch == '!' { + insert_semicolon = t.insert_semicolon; kind = .Comment; lit = scan_comment(t); } - case '?': kind = .Question; + case '?': + insert_semicolon = true; + kind = .Question; case '@': kind = .At; case '$': kind = .Dollar; case '^': kind = .Pointer; @@ -562,6 +599,7 @@ scan :: proc(t: ^Tokenizer) -> Token { case '*': kind = switch2(t, .Mul, .Mul_Eq); case '/': if t.ch == '/' || t.ch == '*' { + insert_semicolon = t.insert_semicolon; kind = .Comment; lit = scan_comment(t); } else { @@ -604,11 +642,17 @@ scan :: proc(t: ^Tokenizer) -> Token { case ',': kind = .Comma; case ';': kind = .Semicolon; case '(': kind = .Open_Paren; - case ')': kind = .Close_Paren; + case ')': + insert_semicolon = true; + kind = .Close_Paren; case '[': kind = .Open_Bracket; - case ']': kind = .Close_Bracket; + case ']': + insert_semicolon = true; + kind = .Close_Bracket; case '{': kind = .Open_Brace; - case '}': kind = .Close_Brace; + case '}': + insert_semicolon = true; + kind = .Close_Brace; case '\\': kind = .Back_Slash; @@ -616,10 +660,15 @@ scan :: proc(t: ^Tokenizer) -> Token { if ch != utf8.RUNE_BOM { error(t, t.offset, "illegal character '%r': %d", ch, ch); } + insert_semicolon = t.insert_semicolon; // preserve insert_semicolon info kind = .Invalid; } } + if .Insert_Semicolon in t.flags { + t.insert_semicolon = insert_semicolon; + } + if lit == "" { lit = string(t.src[offset : t.offset]); } |