aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2021-08-03 23:27:26 +0100
committergingerBill <bill@gingerbill.org>2021-08-03 23:27:26 +0100
commitafff9478c8a699050dd2899c4e929f56717c9243 (patch)
tree4fb63fdebf6742c4512f245c4fe4a68eda6ed8a5
parentb352b42afc01edcf01ad339b67d6a01c73e8dca0 (diff)
Make core:odin/tokenizer be consistent with the compiler's version
-rw-r--r--core/odin/tokenizer/token.odin6
-rw-r--r--core/odin/tokenizer/tokenizer.odin325
2 files changed, 203 insertions, 128 deletions
diff --git a/core/odin/tokenizer/token.odin b/core/odin/tokenizer/token.odin
index 88908d7f8..61aa351dd 100644
--- a/core/odin/tokenizer/token.odin
+++ b/core/odin/tokenizer/token.odin
@@ -83,6 +83,8 @@ Token_Kind :: enum u32 {
Cmp_Or_Eq, // ||=
B_Assign_Op_End,
+ Increment, // ++
+ Decrement, // --
Arrow_Right, // ->
Undef, // ---
@@ -108,7 +110,6 @@ Token_Kind :: enum u32 {
Ellipsis, // ..
Range_Half, // ..<
Range_Full, // ..=
- Back_Slash, // \
B_Operator_End,
B_Keyword_Begin,
@@ -210,6 +211,8 @@ tokens := [Token_Kind.COUNT]string {
"||=",
"",
+ "++",
+ "--",
"->",
"---",
@@ -235,7 +238,6 @@ tokens := [Token_Kind.COUNT]string {
"..",
"..<",
"..=",
- "\\",
"",
"",
diff --git a/core/odin/tokenizer/tokenizer.odin b/core/odin/tokenizer/tokenizer.odin
index e0cc6dcd3..e16264032 100644
--- a/core/odin/tokenizer/tokenizer.odin
+++ b/core/odin/tokenizer/tokenizer.odin
@@ -114,17 +114,23 @@ peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte {
}
skip_whitespace :: proc(t: ^Tokenizer) {
- for {
- switch t.ch {
- case ' ', '\t', '\r':
- advance_rune(t);
- case '\n':
- if t.insert_semicolon {
+ if t.insert_semicolon {
+ for {
+ switch t.ch {
+ case ' ', '\t', '\r':
+ advance_rune(t);
+ case:
+ return;
+ }
+ }
+ } else {
+ for {
+ switch t.ch {
+ case ' ', '\t', '\r', '\n':
+ advance_rune(t);
+ case:
return;
}
- advance_rune(t);
- case:
- return;
}
}
}
@@ -465,51 +471,14 @@ scan_number :: proc(t: ^Tokenizer, seen_decimal_point: bool) -> (Token_Kind, str
scan :: proc(t: ^Tokenizer) -> Token {
- switch2 :: proc(t: ^Tokenizer, tok0, tok1: Token_Kind) -> Token_Kind {
- if t.ch == '=' {
- advance_rune(t);
- return tok1;
- }
- return tok0;
- }
- switch3 :: proc(t: ^Tokenizer, tok0, tok1: Token_Kind, ch2: rune, tok2: Token_Kind) -> Token_Kind {
- if t.ch == '=' {
- advance_rune(t);
- return tok1;
- }
- if t.ch == ch2 {
- advance_rune(t);
- return tok2;
- }
- return tok0;
- }
- switch4 :: proc(t: ^Tokenizer, tok0, tok1: Token_Kind, ch2: rune, tok2, tok3: Token_Kind) -> Token_Kind {
- if t.ch == '=' {
- advance_rune(t);
- return tok1;
- }
- if t.ch == ch2 {
- advance_rune(t);
- if t.ch == '=' {
- advance_rune(t);
- return tok3;
- }
- return tok2;
- }
- return tok0;
- }
-
-
skip_whitespace(t);
offset := t.offset;
kind: Token_Kind;
- lit: string;
+ lit: string;
pos := offset_to_pos(t, offset);
- insert_semicolon := false;
-
switch ch := t.ch; true {
case is_letter(ch):
lit = scan_identifier(t);
@@ -528,14 +497,9 @@ scan :: proc(t: ^Tokenizer) -> Token {
break check_keyword;
}
}
-
- #partial switch kind {
- case .Ident, .Context, .Typeid, .Break, .Continue, .Fallthrough, .Return:
- insert_semicolon = true;
- }
+ break check_keyword;
}
case '0' <= ch && ch <= '9':
- insert_semicolon = true;
kind, lit = scan_number(t, false);
case:
advance_rune(t);
@@ -546,118 +510,227 @@ scan :: proc(t: ^Tokenizer) -> Token {
t.insert_semicolon = false;
kind = .Semicolon;
lit = "\n";
+ return Token{kind, lit, pos};
}
case '\n':
t.insert_semicolon = false;
kind = .Semicolon;
lit = "\n";
- case '"':
- insert_semicolon = true;
- kind = .String;
- lit = scan_string(t);
+ case '\\':
+ if .Insert_Semicolon in t.flags {
+ t.insert_semicolon = false;
+ }
+ token := scan(t);
+ if token.pos.line == pos.line {
+ error(t, token.pos.offset, "expected a newline after \\");
+ }
+ return token;
+
case '\'':
- insert_semicolon = true;
kind = .Rune;
lit = scan_rune(t);
+ case '"':
+ kind = .String;
+ lit = scan_string(t);
case '`':
- insert_semicolon = true;
kind = .String;
lit = scan_raw_string(t);
- case '=': kind = switch2(t, .Eq, .Cmp_Eq);
- case '!': kind = switch2(t, .Not, .Not_Eq);
- case '#':
- kind = .Hash;
- if t.ch == '!' {
- insert_semicolon = t.insert_semicolon;
- kind = .Comment;
- lit = scan_comment(t);
+ case '.':
+ kind = .Period;
+ switch t.ch {
+ case '0'..='9':
+ kind, lit = scan_number(t, true);
+ case '.':
+ advance_rune(t);
+ kind = .Ellipsis;
+ switch t.ch {
+ case '<':
+ advance_rune(t);
+ kind = .Range_Half;
+ case '=':
+ advance_rune(t);
+ kind = .Range_Full;
+ }
}
- case '?':
- insert_semicolon = true;
- kind = .Question;
case '@': kind = .At;
case '$': kind = .Dollar;
+ case '?': kind = .Question;
case '^': kind = .Pointer;
- case '+': kind = switch2(t, .Add, .Add_Eq);
+ case ';': kind = .Semicolon;
+ case ',': kind = .Comma;
+ case ':': kind = .Colon;
+ case '(': kind = .Open_Paren;
+ case ')': kind = .Close_Paren;
+ case '[': kind = .Open_Bracket;
+ case ']': kind = .Close_Bracket;
+ case '{': kind = .Open_Brace;
+ case '}': kind = .Close_Brace;
+ case '%':
+ kind = .Mod;
+ switch t.ch {
+ case '=':
+ advance_rune(t);
+ kind = .Mod_Eq;
+ case '%':
+ advance_rune(t);
+ kind = .Mod_Mod;
+ if t.ch == '=' {
+ advance_rune(t);
+ kind = .Mod_Mod_Eq;
+ }
+ }
+ case '*':
+ kind = .Mul;
+ if t.ch == '=' {
+ advance_rune(t);
+ kind = .Mul_Eq;
+ }
+ case '=':
+ kind = .Eq;
+ if t.ch == '=' {
+ advance_rune(t);
+ kind = .Cmp_Eq;
+ }
+ case '~':
+ kind = .Xor;
+ if t.ch == '=' {
+ advance_rune(t);
+ kind = .Xor_Eq;
+ }
+ case '!':
+ kind = .Not;
+ if t.ch == '=' {
+ advance_rune(t);
+ kind = .Not_Eq;
+ }
+ case '+':
+ kind = .Add;
+ switch t.ch {
+ case '=':
+ advance_rune(t);
+ kind = .Add_Eq;
+ case '+':
+ advance_rune(t);
+ kind = .Increment;
+ }
case '-':
- if t.ch == '>' {
+ kind = .Sub;
+ switch t.ch {
+ case '-':
advance_rune(t);
- kind = .Arrow_Right;
- } else if t.ch == '-' && peek_byte(t) == '-' {
+ kind = .Decrement;
+ if t.ch == '-' {
+ advance_rune(t);
+ kind = .Undef;
+ }
+ case '>':
advance_rune(t);
+ kind = .Arrow_Right;
+ case '=':
advance_rune(t);
- kind = .Undef;
- } else {
- kind = switch2(t, .Sub, .Sub_Eq);
+ kind = .Sub_Eq;
+ }
+ case '#':
+ kind = .Hash;
+ if t.ch == '!' {
+ kind = .Comment;
+ lit = scan_comment(t);
}
- case '*': kind = switch2(t, .Mul, .Mul_Eq);
case '/':
- if t.ch == '/' || t.ch == '*' {
- insert_semicolon = t.insert_semicolon;
+ kind = .Quo;
+ switch t.ch {
+ case '/', '*':
kind = .Comment;
lit = scan_comment(t);
- } else {
- kind = switch2(t, .Quo, .Quo_Eq);
+ case '=':
+ advance_rune(t);
+ kind = .Quo_Eq;
+ }
+ case '<':
+ kind = .Lt;
+ switch t.ch {
+ case '=':
+ advance_rune(t);
+ kind = .Lt_Eq;
+ case '<':
+ advance_rune(t);
+ kind = .Shl;
+ if t.ch == '=' {
+ advance_rune(t);
+ kind = .Shl_Eq;
+ }
+ }
+ case '>':
+ kind = .Gt;
+ switch t.ch {
+ case '=':
+ advance_rune(t);
+ kind = .Gt_Eq;
+ case '<':
+ advance_rune(t);
+ kind = .Shr;
+ if t.ch == '=' {
+ advance_rune(t);
+ kind = .Shr_Eq;
+ }
}
- case '%': kind = switch4(t, .Mod, .Mod_Eq, '%', .Mod_Mod, .Mod_Mod_Eq);
case '&':
- if t.ch == '~' {
+ kind = .And;
+ switch t.ch {
+ case '~':
advance_rune(t);
- kind = switch2(t, .And_Not, .And_Not_Eq);
- } else {
- kind = switch3(t, .And, .And_Eq, '&', .Cmp_And);
+ kind = .And_Not;
+ if t.ch == '=' {
+ advance_rune(t);
+ kind = .And_Not_Eq;
+ }
+ case '=':
+ advance_rune(t);
+ kind = .And_Eq;
+ case '&':
+ advance_rune(t);
+ kind = .Cmp_And;
+ if t.ch == '=' {
+ advance_rune(t);
+ kind = .Cmp_And_Eq;
+ }
}
- case '|': kind = switch3(t, .Or, .Or_Eq, '|', .Cmp_Or);
- case '~': kind = switch2(t, .Xor, .Xor_Eq);
- case '<': kind = switch4(t, .Lt, .Lt_Eq, '<', .Shl, .Shl_Eq);
- case '>': kind = switch4(t, .Gt, .Gt_Eq, '>', .Shr,.Shr_Eq);
-
- case '.':
- if '0' <= t.ch && t.ch <= '9' {
- kind, lit = scan_number(t, true);
- } else {
- kind = .Period;
- if t.ch == '.' {
+ case '|':
+ kind = .Or;
+ switch t.ch {
+ case '=':
+ advance_rune(t);
+ kind = .Or_Eq;
+ case '|':
+ advance_rune(t);
+ kind = .Cmp_Or;
+ if t.ch == '=' {
advance_rune(t);
- kind = .Ellipsis;
- if t.ch == '<' {
- advance_rune(t);
- kind = .Range_Half;
- } else if t.ch == '=' {
- advance_rune(t);
- kind = .Range_Full;
- }
+ kind = .Cmp_Or_Eq;
}
}
- case ':': kind = .Colon;
- case ',': kind = .Comma;
- case ';': kind = .Semicolon;
- case '(': kind = .Open_Paren;
- case ')':
- insert_semicolon = true;
- kind = .Close_Paren;
- case '[': kind = .Open_Bracket;
- case ']':
- insert_semicolon = true;
- kind = .Close_Bracket;
- case '{': kind = .Open_Brace;
- case '}':
- insert_semicolon = true;
- kind = .Close_Brace;
-
- case '\\': kind = .Back_Slash;
-
case:
if ch != utf8.RUNE_BOM {
error(t, t.offset, "illegal character '%r': %d", ch, ch);
}
- insert_semicolon = t.insert_semicolon; // preserve insert_semicolon info
kind = .Invalid;
}
}
if .Insert_Semicolon in t.flags {
- t.insert_semicolon = insert_semicolon;
+ #partial switch kind {
+ case .Invalid, .Comment:
+ // Preserve insert_semicolon info
+ case .Ident, .Context, .Typeid, .Break, .Continue, .Fallthrough, .Return,
+ .Integer, .Float, .Imag, .Rune, .String, .Undef,
+ .Question, .Pointer, .Close_Paren, .Close_Bracket, .Close_Brace,
+ .Increment, .Decrement:
+ /*fallthrough*/
+ t.insert_semicolon = true;
+ case:
+ t.insert_semicolon = false;
+ break;
+ }
}
if lit == "" {