diff options
Diffstat (limited to 'src/tokenizer.cpp')
| -rw-r--r-- | src/tokenizer.cpp | 115 |
1 files changed, 101 insertions, 14 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index d89ec43b5..72448b869 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -527,6 +527,12 @@ struct TokenizerState { u8 * read_curr; // pos from start u8 * line; // current line pos isize line_count; + bool insert_semicolon; +}; + +enum TokenizerFlags { + TokenizerFlag_None = 0, + TokenizerFlag_InsertSemicolon = 1<<0, }; struct Tokenizer { @@ -542,6 +548,9 @@ struct Tokenizer { isize error_count; Array<String> allocated_strings; + + TokenizerFlags flags; + bool insert_semicolon; }; @@ -552,15 +561,17 @@ TokenizerState save_tokenizer_state(Tokenizer *t) { state.read_curr = t->read_curr; state.line = t->line; state.line_count = t->line_count; + state.insert_semicolon = t->insert_semicolon; return state; } void restore_tokenizer_state(Tokenizer *t, TokenizerState *state) { - t->curr_rune = state->curr_rune; - t->curr = state->curr; - t->read_curr = state->read_curr; - t->line = state->line; - t->line_count = state->line_count; + t->curr_rune = state->curr_rune; + t->curr = state->curr; + t->read_curr = state->read_curr; + t->line = state->line; + t->line_count = state->line_count; + t->insert_semicolon = state->insert_semicolon; } @@ -615,7 +626,7 @@ void advance_to_next_rune(Tokenizer *t) { } } -TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath) { +TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath, TokenizerFlags flags = TokenizerFlag_None) { TokenizerInitError err = TokenizerInit_None; char *c_str = alloc_cstring(heap_allocator(), fullpath); @@ -625,6 +636,7 @@ TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath) { gbFileContents fc = gb_file_read_contents(heap_allocator(), true, c_str); gb_zero_item(t); + t->flags = flags; t->fullpath = fullpath; t->line_count = 1; @@ -888,9 +900,13 @@ void tokenizer_get_token(Tokenizer *t, Token *token) { // Skip whitespace for (;;) { switch (t->curr_rune) { + case '\n': + if (t->insert_semicolon) { + break; + } + /*fallthrough*/ case ' ': case '\t': - case '\n': case '\r': advance_to_next_rune(t); continue; @@ -907,6 +923,8 @@ void tokenizer_get_token(Tokenizer *t, Token *token) { token->pos.offset = t->curr - t->start; token->pos.column = t->curr - t->line + 1; + bool insert_semicolon = false; + Rune curr_rune = t->curr_rune; if (rune_is_letter(curr_rune)) { token->kind = Token_Ident; @@ -930,19 +948,51 @@ void tokenizer_get_token(Tokenizer *t, Token *token) { } } } + + switch (token->kind) { + case Token_Ident: + case Token_context: + case Token_typeid: // Dunno? + case Token_break: + case Token_continue: + case Token_fallthrough: + case Token_return: + insert_semicolon = true; + break; + } + + + if (t->flags & TokenizerFlag_InsertSemicolon) { + t->insert_semicolon = insert_semicolon; + } return; } else if (gb_is_between(curr_rune, '0', '9')) { + insert_semicolon = true; scan_number_to_token(t, token, false); } else { advance_to_next_rune(t); switch (curr_rune) { case GB_RUNE_EOF: token->kind = Token_EOF; + if (t->insert_semicolon) { + t->insert_semicolon = false; // EOF consumed + token->string = str_lit("\n"); + token->kind = Token_Semicolon; + return; + } break; + case '\n': + t->insert_semicolon = false; + token->string = str_lit("\n"); + token->kind = Token_Semicolon; + return; + case '\'': // Rune Literal { + insert_semicolon = true; + token->kind = Token_Rune; Rune quote = curr_rune; bool valid = true; @@ -978,12 +1028,19 @@ void tokenizer_get_token(Tokenizer *t, Token *token) { } else { tokenizer_err(t, "Invalid rune literal"); } + + if (t->flags & TokenizerFlag_InsertSemicolon) { + t->insert_semicolon = insert_semicolon; + } + return; } break; case '`': // Raw String Literal case '"': // String Literal { + insert_semicolon = true; + bool has_carriage_return = false; i32 success; Rune quote = curr_rune; @@ -1028,6 +1085,11 @@ void tokenizer_get_token(Tokenizer *t, Token *token) { } else { tokenizer_err(t, "Invalid string literal"); } + + if (t->flags & TokenizerFlag_InsertSemicolon) { + t->insert_semicolon = insert_semicolon; + } + return; } break; @@ -1048,17 +1110,32 @@ void tokenizer_get_token(Tokenizer *t, Token *token) { case '@': token->kind = Token_At; break; case '$': token->kind = Token_Dollar; break; - case '?': token->kind = Token_Question; break; - case '^': token->kind = Token_Pointer; break; + case '?': + insert_semicolon = true; + token->kind = Token_Question; + break; + case '^': + insert_semicolon = true; + token->kind = Token_Pointer; + break; case ';': token->kind = Token_Semicolon; break; case ',': token->kind = Token_Comma; break; case ':': token->kind = Token_Colon; break; case '(': token->kind = Token_OpenParen; break; - case ')': token->kind = Token_CloseParen; break; - case '[': token->kind = Token_OpenBracket; break; - case ']': token->kind = Token_CloseBracket; break; + case ')': + insert_semicolon = true; + token->kind = Token_CloseParen; + break; + case '[': token->kind = Token_OpenBracket; break; + case ']': + insert_semicolon = true; + token->kind = Token_CloseBracket; + break; case '{': token->kind = Token_OpenBrace; break; - case '}': token->kind = Token_CloseBrace; break; + case '}': + insert_semicolon = true; + token->kind = Token_CloseBrace; + break; case '\\': token->kind = Token_BackSlash; break; case '%': @@ -1131,10 +1208,12 @@ void tokenizer_get_token(Tokenizer *t, Token *token) { case '#': if (t->curr_rune == '!') { + insert_semicolon = t->insert_semicolon; + token->kind = Token_Comment; + while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) { advance_to_next_rune(t); } - token->kind = Token_Comment; } else { token->kind = Token_Hash; } @@ -1144,6 +1223,7 @@ void tokenizer_get_token(Tokenizer *t, Token *token) { case '/': { token->kind = Token_Quo; if (t->curr_rune == '/') { + insert_semicolon = t->insert_semicolon; token->kind = Token_Comment; while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) { @@ -1255,11 +1335,18 @@ void tokenizer_get_token(Tokenizer *t, Token *token) { int len = cast(int)gb_utf8_encode_rune(str, curr_rune); tokenizer_err(t, "Illegal character: %.*s (%d) ", len, str, curr_rune); } + insert_semicolon = t->insert_semicolon; // Preserve insert_semicolon info token->kind = Token_Invalid; break; } } + if (t->flags & TokenizerFlag_InsertSemicolon) { + t->insert_semicolon = insert_semicolon; + } + token->string.len = t->curr - token->string.text; + + return; } |