aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizer.cpp')
-rw-r--r--src/tokenizer.cpp71
1 files changed, 26 insertions, 45 deletions
diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp
index d375ca05d..71e11b301 100644
--- a/src/tokenizer.cpp
+++ b/src/tokenizer.cpp
@@ -699,15 +699,6 @@ enum TokenizerInitError {
};
-struct TokenizerState {
- Rune curr_rune; // current character
- u8 * curr; // character pos
- u8 * read_curr; // pos from start
- u8 * line; // current line pos
- i32 line_count;
- bool insert_semicolon;
-};
-
enum TokenizerFlags {
TokenizerFlag_None = 0,
TokenizerFlag_InsertSemicolon = 1<<0,
@@ -722,7 +713,7 @@ struct Tokenizer {
Rune curr_rune; // current character
u8 * curr; // character pos
u8 * read_curr; // pos from start
- u8 * line; // current line pos
+ i32 column_minus_one;
i32 line_count;
i32 error_count;
@@ -733,30 +724,9 @@ struct Tokenizer {
};
-TokenizerState save_tokenizer_state(Tokenizer *t) {
- TokenizerState state = {};
- state.curr_rune = t->curr_rune;
- state.curr = t->curr;
- state.read_curr = t->read_curr;
- state.line = t->line;
- state.line_count = t->line_count;
- state.insert_semicolon = t->insert_semicolon;
- return state;
-}
-
-void restore_tokenizer_state(Tokenizer *t, TokenizerState *state) {
- t->curr_rune = state->curr_rune;
- t->curr = state->curr;
- t->read_curr = state->read_curr;
- t->line = state->line;
- t->line_count = state->line_count;
- t->insert_semicolon = state->insert_semicolon;
-}
-
-
void tokenizer_err(Tokenizer *t, char const *msg, ...) {
va_list va;
- isize column = t->read_curr - t->line+1;
+ i32 column = t->column_minus_one+1;
if (column < 1) {
column = 1;
}
@@ -775,7 +745,7 @@ void tokenizer_err(Tokenizer *t, char const *msg, ...) {
void tokenizer_err(Tokenizer *t, TokenPos const &pos, char const *msg, ...) {
va_list va;
- isize column = t->read_curr - t->line+1;
+ i32 column = t->column_minus_one+1;
if (column < 1) {
column = 1;
}
@@ -788,13 +758,19 @@ void tokenizer_err(Tokenizer *t, TokenPos const &pos, char const *msg, ...) {
}
void advance_to_next_rune(Tokenizer *t) {
+#if 1
+ if (t->curr_rune == '\n') {
+ t->column_minus_one = 0;
+ t->line_count++;
+ }
+#else
+ // NOTE(bill, 2021-08-02): This is branchless but it is slower in practice
+ i32 is_newline = t->curr_rune == '\n';
+ t->column_minus_one *= 1-is_newline;
+ t->line_count += is_newline;
+#endif
if (t->read_curr < t->end) {
t->curr = t->read_curr;
- if (t->curr_rune == '\n') {
- t->line = t->curr;
- t->line_count++;
- }
-
Rune rune = *t->read_curr;
if (rune == 0) {
tokenizer_err(t, "Illegal character NUL");
@@ -813,10 +789,6 @@ void advance_to_next_rune(Tokenizer *t) {
t->curr_rune = rune;
} else {
t->curr = t->end;
- if (t->curr_rune == '\n') {
- t->line = t->curr;
- t->line_count++;
- }
t->curr_rune = GB_RUNE_EOF;
}
}
@@ -827,7 +799,7 @@ void init_tokenizer_with_file_contents(Tokenizer *t, String const &fullpath, gbF
t->line_count = 1;
t->start = cast(u8 *)fc->data;
- t->line = t->read_curr = t->curr = t->start;
+ t->read_curr = t->curr = t->start;
t->end = t->start + fc->size;
advance_to_next_rune(t);
@@ -921,7 +893,7 @@ void scan_number_to_token(Tokenizer *t, Token *token, bool seen_decimal_point) {
token->string = {t->curr, 1};
token->pos.file_id = t->curr_file_id;
token->pos.line = t->line_count;
- token->pos.column = cast(i32)(t->curr-t->line+1);
+ token->pos.column = t->column_minus_one+1;
if (seen_decimal_point) {
token->string.text -= 1;
@@ -1113,6 +1085,15 @@ gb_inline void tokenizer_skip_line(Tokenizer *t) {
while (t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
advance_to_next_rune(t);
}
+#elif 0
+ while (t->read_curr != t->end && t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
+ t->curr = t->read_curr;
+ t->curr_rune = *t->read_curr;
+ if (t->curr_rune == 0) {
+ tokenizer_err(t, "Illegal character NUL");
+ }
+ t->read_curr++;
+ }
#else
while (t->read_curr != t->end && t->curr_rune != '\n' && t->curr_rune != GB_RUNE_EOF) {
t->curr = t->read_curr;
@@ -1161,7 +1142,7 @@ void tokenizer_get_token(Tokenizer *t, Token *token, int repeat=0) {
token->pos.file_id = t->curr_file_id;
token->pos.line = t->line_count;
token->pos.offset = cast(i32)(t->curr - t->start);
- token->pos.column = cast(i32)(t->curr - t->line + 1);
+ token->pos.column = t->column_minus_one+1;
TokenPos current_pos = token->pos;