aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.cpp
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2020-11-01 15:10:06 +0000
committergingerBill <bill@gingerbill.org>2020-11-01 15:10:06 +0000
commit54fbdabc380905a925ab5e922749fa2b1ccb2621 (patch)
tree4830e6ad4c519a6fe20750f64913f2df943aef4f /src/tokenizer.cpp
parent75e8e5e06f0b2739cef7a76a4b59d4d95ff397bd (diff)
Add experimental `-insert-semicolon` functionality to tokenizer and parser
Diffstat (limited to 'src/tokenizer.cpp')
-rw-r--r--src/tokenizer.cpp115
1 files changed, 101 insertions, 14 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index d89ec43b5..72448b869 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -527,6 +527,12 @@ struct TokenizerState {
u8 * read_curr; // pos from start
u8 * line; // current line pos
isize line_count;
+ bool insert_semicolon;
+};
+
+enum TokenizerFlags {
+ TokenizerFlag_None = 0,
+ TokenizerFlag_InsertSemicolon = 1<<0,
};
struct Tokenizer {
@@ -542,6 +548,9 @@ struct Tokenizer {
isize error_count;
Array<String> allocated_strings;
+
+ TokenizerFlags flags;
+ bool insert_semicolon;
};
@@ -552,15 +561,17 @@ TokenizerState save_tokenizer_state(Tokenizer *t) {
state.read_curr = t->read_curr;
state.line = t->line;
state.line_count = t->line_count;
+ state.insert_semicolon = t->insert_semicolon;
return state;
}
void restore_tokenizer_state(Tokenizer *t, TokenizerState *state) {
- t->curr_rune = state->curr_rune;
- t->curr = state->curr;
- t->read_curr = state->read_curr;
- t->line = state->line;
- t->line_count = state->line_count;
+ t->curr_rune = state->curr_rune;
+ t->curr = state->curr;
+ t->read_curr = state->read_curr;
+ t->line = state->line;
+ t->line_count = state->line_count;
+ t->insert_semicolon = state->insert_semicolon;
}
@@ -615,7 +626,7 @@ void advance_to_next_rune(Tokenizer *t) {
}
}
-TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath) {
+TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath, TokenizerFlags flags = TokenizerFlag_None) {
TokenizerInitError err = TokenizerInit_None;
char *c_str = alloc_cstring(heap_allocator(), fullpath);
@@ -625,6 +636,7 @@ TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath) {
gbFileContents fc = gb_file_read_contents(heap_allocator(), true, c_str);
gb_zero_item(t);
+ t->flags = flags;
t->fullpath = fullpath;
t->line_count = 1;
@@ -888,9 +900,13 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
// Skip whitespace
for (;;) {
switch (t->curr_rune) {
+ case '\n':
+ if (t->insert_semicolon) {
+ break;
+ }
+ /*fallthrough*/
case ' ':
case '\t':
- case '\n':
case '\r':
advance_to_next_rune(t);
continue;
@@ -907,6 +923,8 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
token->pos.offset = t->curr - t->start;
token->pos.column = t->curr - t->line + 1;
+ bool insert_semicolon = false;
+
Rune curr_rune = t->curr_rune;
if (rune_is_letter(curr_rune)) {
token->kind = Token_Ident;
@@ -930,19 +948,51 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
}
}
}
+
+ switch (token->kind) {
+ case Token_Ident:
+ case Token_context:
+ case Token_typeid: // Dunno?
+ case Token_break:
+ case Token_continue:
+ case Token_fallthrough:
+ case Token_return:
+ insert_semicolon = true;
+ break;
+ }
+
+
+ if (t->flags & TokenizerFlag_InsertSemicolon) {
+ t->insert_semicolon = insert_semicolon;
+ }
return;
} else if (gb_is_between(curr_rune, '0', '9')) {
+ insert_semicolon = true;
scan_number_to_token(t, token, false);
} else {
advance_to_next_rune(t);
switch (curr_rune) {
case GB_RUNE_EOF:
token->kind = Token_EOF;
+ if (t->insert_semicolon) {
+ t->insert_semicolon = false; // EOF consumed
+ token->string = str_lit("\n");
+ token->kind = Token_Semicolon;
+ return;
+ }
break;
+ case '\n':
+ t->insert_semicolon = false;
+ token->string = str_lit("\n");
+ token->kind = Token_Semicolon;
+ return;
+
case '\'': // Rune Literal
{
+ insert_semicolon = true;
+
token->kind = Token_Rune;
Rune quote = curr_rune;
bool valid = true;
@@ -978,12 +1028,19 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
} else {
tokenizer_err(t, "Invalid rune literal");
}
+
+ if (t->flags & TokenizerFlag_InsertSemicolon) {
+ t->insert_semicolon = insert_semicolon;
+ }
+
return;
} break;
case '`': // Raw String Literal
case '"': // String Literal
{
+ insert_semicolon = true;
+
bool has_carriage_return = false;
i32 success;
Rune quote = curr_rune;
@@ -1028,6 +1085,11 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
} else {
tokenizer_err(t, "Invalid string literal");
}
+
+ if (t->flags & TokenizerFlag_InsertSemicolon) {
+ t->insert_semicolon = insert_semicolon;
+ }
+
return;
} break;
@@ -1048,17 +1110,32 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
case '@': token->kind = Token_At; break;
case '$': token->kind = Token_Dollar; break;
- case '?': token->kind = Token_Question; break;
- case '^': token->kind = Token_Pointer; break;
+ case '?':
+ insert_semicolon = true;
+ token->kind = Token_Question;
+ break;
+ case '^':
+ insert_semicolon = true;
+ token->kind = Token_Pointer;
+ break;
case ';': token->kind = Token_Semicolon; break;
case ',': token->kind = Token_Comma; break;
case ':': token->kind = Token_Colon; break;
case '(': token->kind = Token_OpenParen; break;
- case ')': token->kind = Token_CloseParen; break;
- case '[': token->kind = Token_OpenBracket; break;
- case ']': token->kind = Token_CloseBracket; break;
+ case ')':
+ insert_semicolon = true;
+ token->kind = Token_CloseParen;
+ break;
+ case '[': token->kind = Token_OpenBracket; break;
+ case ']':
+ insert_semicolon = true;
+ token->kind = Token_CloseBracket;
+ break;
case '{': token->kind = Token_OpenBrace; break;
- case '}': token->kind = Token_CloseBrace; break;
+ case '}':
+ insert_semicolon = true;
+ token->kind = Token_CloseBrace;
+ break;
case '\\': token->kind = Token_BackSlash; break;
case '%':
@@ -1131,10 +1208,12 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
case '#':
if (t->curr_rune == '!') {
+ insert_semicolon = t->insert_semicolon;
+ token->kind = Token_Comment;
+
while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
advance_to_next_rune(t);
}
- token->kind = Token_Comment;
} else {
token->kind = Token_Hash;
}
@@ -1144,6 +1223,7 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
case '/': {
token->kind = Token_Quo;
if (t->curr_rune == '/') {
+ insert_semicolon = t->insert_semicolon;
token->kind = Token_Comment;
while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
@@ -1255,11 +1335,18 @@ void tokenizer_get_token(Tokenizer *t, Token *token) {
int len = cast(int)gb_utf8_encode_rune(str, curr_rune);
tokenizer_err(t, "Illegal character: %.*s (%d) ", len, str, curr_rune);
}
+ insert_semicolon = t->insert_semicolon; // Preserve insert_semicolon info
token->kind = Token_Invalid;
break;
}
}
+ if (t->flags & TokenizerFlag_InsertSemicolon) {
+ t->insert_semicolon = insert_semicolon;
+ }
+
token->string.len = t->curr - token->string.text;
+
+
return;
}