aboutsummaryrefslogtreecommitdiff
path: root/src/error.cpp
diff options
context:
space:
mode:
authorFeoramund <161657516+Feoramund@users.noreply.github.com>2024-06-29 18:17:44 -0400
committerFeoramund <161657516+Feoramund@users.noreply.github.com>2024-06-29 18:55:12 -0400
commit8ed5cb283b5039693e96d6f47eea6213194d6cbe (patch)
tree4da07364741104e8892f2a24a66e476f5dcf0f3d /src/error.cpp
parent8b305a4c67b0ddab5c1133c6e0efce85c98c46c5 (diff)
Re-implement the error squiggles with visual width
Diffstat (limited to 'src/error.cpp')
-rw-r--r--src/error.cpp289
1 files changed, 130 insertions, 159 deletions
diff --git a/src/error.cpp b/src/error.cpp
index 26b4106a0..f95123f15 100644
--- a/src/error.cpp
+++ b/src/error.cpp
@@ -283,9 +283,6 @@ gb_internal isize show_error_on_line(TokenPos const &pos, TokenPos end) {
error_out("\t( empty line )\n");
terminal_reset_colours();
- // Preserve the old return behaviour. Even if we can't guarantee the
- // exact visual space offset, there are two places that check this to
- // change what sort of suggestion they offer.
if (the_line == nullptr) {
return -1;
} else {
@@ -293,244 +290,218 @@ gb_internal isize show_error_on_line(TokenPos const &pos, TokenPos end) {
}
}
- // Specfically use basic ASCII arrows here, in case the terminal
- // doesn't support anything fancy. This is meant to be a good fallback.
- char const *mark_error_sign = "><";
- char const *open_error_sign = ">>";
- char const *close_error_sign = "<<";
- const TerminalColour marker_colour = TerminalColour_Yellow;
-
- // ANSI SGR:
- // 0 = Reset.
- // 58:5:2 = Underline colour, 8-bit, green. (non-standard)
- // 4:3 = Wiggly underline. (non-standard)
- char const *wiggly_underline_sgr = "";
- char const *disable_underline_sgr = "";
- if (has_ansi_terminal_colours()) {
- wiggly_underline_sgr = "\x1b[0;58:5:2;4:3m";
- disable_underline_sgr = "\x1b[24m";
- }
-
// These two will be used like an Odin slice later.
char const *line_text = the_line;
i32 line_length_bytes = cast(i32)gb_string_length(the_line);
- // NOTE(Feoramund): The numbers below are in Unicode codepoints
- // (or runes), not visual glyph width. Calculating the visual width of
- // a cluster of Unicode codepoints is vexing, and `utf8proc_charwidth`
- // is inadequate.
- //
- // We're counting codepoints here so we don't slice one down the
- // middle during truncation. It will still look strange if we slice
- // a cluster down the middle. (i.e. a letter and a combining diacritic)
- //
- // Luckily, if our assumption about 1 codepoint == 1 glyph is wrong,
- // we only suffer a shorter or longer line displayed in total, but all
- // of our highlighting and marking will be precise.
- // (Unless there's an invalid Unicode codepoint, in which case, no guarantees.)
- //
- // The line will be longer if a codepoint occupies more than one space
- // (CJK in most cases) and shorter if a codepoint is invisible or is
- // a type of joiner or combining codepoint.
- //
- // If we get a complete Unicode glyph counter, it would be as simple as
- // replacing `utf8_decode` below to make all of this work perfectly.
+ ucg_grapheme* graphemes;
+ i32 line_length_runes = 0;
+ i32 line_length_graphemes = 0;
+ i32 line_width = 0;
+
+ int ucg_result = ucg_decode_grapheme_clusters(
+ permanent_allocator(), (const uint8_t*)line_text, line_length_bytes,
+ &graphemes, &line_length_runes, &line_length_graphemes, &line_width);
+
+ if (ucg_result < 0) {
+ // There was a UTF-8 parsing error.
+ // Insert a dummy grapheme so the start of the invalid rune can be pointed at.
+ graphemes = (ucg_grapheme*)gb_resize(permanent_allocator(),
+ graphemes,
+ sizeof(ucg_grapheme) * (line_length_graphemes),
+ sizeof(ucg_grapheme) * (1 + line_length_graphemes));
+ ucg_grapheme append = {
+ error_start_index_bytes,
+ line_length_runes,
+ 1,
+ };
+
+ graphemes[line_length_graphemes] = append;
+ }
+
+ // The units below are counted in visual, monospace cells.
enum {
MAX_LINE_LENGTH = 80,
MAX_TAB_WIDTH = 8,
ELLIPSIS_PADDING = 8, // `... ...`
- MAX_MARK_WIDTH = 4, // `><` or `>>` and `<<`
MIN_LEFT_VIEW = 8,
// A rough estimate of how many characters we'll insert, at most:
- MAX_INSERTED_WIDTH = MAX_TAB_WIDTH + ELLIPSIS_PADDING + MAX_MARK_WIDTH,
+ MAX_INSERTED_WIDTH = MAX_TAB_WIDTH + ELLIPSIS_PADDING,
MAX_LINE_LENGTH_PADDED = MAX_LINE_LENGTH - MAX_INSERTED_WIDTH,
};
- // For the purposes of truncating long lines, we calculate how many
- // runes the line is composed of, first. We'll take note of at which
- // rune index the error starts, too.
- i32 error_start_index_runes = 0;
-
- i32 line_length_runes = 0;
- for (i32 i = 0; i < line_length_bytes; /**/) {
- Rune rune;
-
- if (i == error_start_index_bytes) {
- error_start_index_runes = line_length_runes;
- }
-
- i32 bytes_read = cast(i32)utf8_decode(cast(const u8 *)line_text + i, line_length_bytes - i, &rune);
- if (rune == GB_RUNE_INVALID || bytes_read <= 0) {
- // Bail out; we won't even try to truncate the line later.
- line_length_runes = 0;
+ i32 error_start_index_graphemes = 0;
+ for (i32 i = 0; i < line_length_graphemes; i += 1) {
+ if (graphemes[i].byte_index == error_start_index_bytes) {
+ error_start_index_graphemes = i;
break;
}
-
- line_length_runes += 1;
- i += bytes_read;
}
- if (error_start_index_runes == 0 && error_start_index_bytes != 0 && line_length_runes != 0) {
- // The error index in runes was not found, but we did find a valid Unicode string.
+ if (error_start_index_graphemes == 0 && error_start_index_bytes != 0 && line_length_graphemes != 0) {
+ // The error index in graphemes was not found, but we did find a valid Unicode string.
//
// This is an edge case where the error is sitting on a newline or the
// end of the line, as that is the only location we could not have checked.
- error_start_index_runes = line_length_runes;
+ error_start_index_graphemes = line_length_graphemes;
}
error_out("\t");
bool show_right_ellipsis = false;
- if (line_length_runes > MAX_LINE_LENGTH_PADDED) {
+ i32 squiggle_padding = 0;
+ i32 window_open_bytes = 0;
+ i32 window_close_bytes = 0;
+ if (line_width > MAX_LINE_LENGTH_PADDED) {
// Now that we know the line is over the length limit, we have to
- // compose a runic window in which to display the error.
- i32 window_width = MAX_LINE_LENGTH_PADDED;
-
- i32 extend_right = 0;
- i32 extend_left = 0;
- if (error_start_index_runes + window_width > line_length_runes - 1) {
- // Trade space from the right to the left.
- extend_right = line_length_runes - error_start_index_runes;
- extend_left = window_width - extend_right;
- } else if (MIN_LEFT_VIEW - error_start_index_runes > 0) {
- // Trade space from the left to the right.
- extend_left = error_start_index_runes;
- extend_right = window_width - extend_left;
- } else {
- // Square in the middle somewhere.
- extend_left = MIN_LEFT_VIEW;
- extend_right = window_width - extend_left;
+ // compose a visual window in which to display the error.
+ i32 window_size_left = 0;
+ i32 window_size_right = 0;
+ i32 window_open_graphemes = 0;
+
+ for (i32 i = error_start_index_graphemes - 1; i > 0; i -= 1) {
+ window_size_left += graphemes[i].width;
+ if (window_size_left >= MIN_LEFT_VIEW) {
+ window_open_graphemes = i;
+ window_open_bytes = graphemes[i].byte_index;
+ break;
+ }
}
- i32 window_right_runes = gb_min(error_start_index_runes + extend_right, line_length_runes);
- i32 window_left_runes = gb_max(0, error_start_index_runes - extend_left);
-
- i32 window_right_bytes = 0;
- i32 window_left_bytes = 0;
-
- i32 i_runes = 0;
- for (i32 i = 0; i < line_length_bytes; /**/) {
- if (i_runes == window_left_runes ) { window_left_bytes = i; }
- if (i_runes == window_right_runes) { window_right_bytes = i; }
-
- // No need for error-checking.
- //
- // We've already validated the string at this point, otherwise
- // `line_length_runes` would be 0, and we would not have
- // entered this block.
- i32 bytes_read = cast(i32)utf8_decode(cast(const u8 *)line_text + i, line_length_bytes - i, nullptr);
-
- i_runes += 1;
- i += bytes_read;
+ for (i32 i = error_start_index_graphemes; i < line_length_graphemes; i += 1) {
+ window_size_right += graphemes[i].width;
+ if (window_size_right >= MAX_LINE_LENGTH_PADDED - MIN_LEFT_VIEW) {
+ window_close_bytes = graphemes[i].byte_index;
+ break;
+ }
+ }
+ if (window_close_bytes == 0) {
+ // The window ends at the end of the line.
+ window_close_bytes = line_length_bytes;
}
- if (window_right_bytes == 0) {
- // The end of the window is the end of the line.
- window_right_bytes = line_length_bytes;
+ if (window_size_right < MAX_LINE_LENGTH_PADDED - MIN_LEFT_VIEW) {
+ // Hit the end of the string early on the right side; expand backwards.
+ for (i32 i = window_open_graphemes - 1; i > 0; i -= 1) {
+ window_size_left += graphemes[i].width;
+ if (window_size_left + window_size_right >= MAX_LINE_LENGTH_PADDED) {
+ window_open_graphemes = i;
+ window_open_bytes = graphemes[i].byte_index;
+ break;
+ }
+ }
}
- GB_ASSERT_MSG(window_right_runes >= window_left_runes, "Error line truncation window has wrong rune indices. (left, right: %i, %i)", window_left_runes, window_right_runes);
- GB_ASSERT_MSG(window_right_bytes >= window_left_bytes, "Error line truncation window has wrong byte indices. (left, right: %i, %i)", window_left_bytes, window_right_bytes);
+ GB_ASSERT_MSG(window_close_bytes >= window_open_bytes, "Error line truncation window has wrong byte indices. (open, close: %i, %i)", window_open_bytes, window_close_bytes);
- if (window_right_bytes != line_length_bytes) {
+ if (window_close_bytes != line_length_bytes) {
show_right_ellipsis = true;
}
- // The text will advance; all indices and lengths will become relative.
- // We must keep our other iterators in sync.
- // NOTE: Uncomment the rune versions if they ever get used beyond this point.
-
// Close the window, going left.
- line_length_bytes = window_right_bytes;
+ line_length_bytes = window_close_bytes;
// Adjust the slice of text. In Odin, this would be:
// `line_text = line_text[window_left_bytes:]`
- line_text += window_left_bytes;
- line_length_bytes -= window_left_bytes;
- // line_length_runes -= window_left_runes;
+ line_text += window_open_bytes;
+ line_length_bytes -= window_open_bytes;
GB_ASSERT_MSG(line_length_bytes >= 0, "Bounds-checking error: line_length_bytes");
- // Part of advancing `line_text`:
- error_start_index_bytes -= window_left_bytes;
- // error_start_index_runes -= window_left_runes;
- GB_ASSERT_MSG(error_start_index_bytes >= 0, "Bounds-checking error: error_start_index_bytes");
-
- if (window_left_bytes > 0) {
+ if (window_open_bytes > 0) {
error_out("... ");
+ squiggle_padding += 4;
}
+ } else {
+ // No truncation needed.
+ window_open_bytes = 0;
+ window_close_bytes = line_length_bytes;
+ }
+
+ for (i32 i = error_start_index_graphemes; i > 0; i -= 1) {
+ if (graphemes[i].byte_index == window_open_bytes) {
+ break;
+ }
+ squiggle_padding += graphemes[i].width;
}
// Start printing code.
terminal_set_colours(TerminalStyle_Normal, TerminalColour_White);
- error_out("%.*s", error_start_index_bytes, line_text);
+ error_out("%.*s", line_length_bytes, line_text);
- // Odin-like: `line_text = line_text[error_start_index_bytes:]`
- line_text += error_start_index_bytes;
- line_length_bytes -= error_start_index_bytes;
- GB_ASSERT_MSG(line_length_bytes >= 0, "Bounds-checking error: line_length_bytes");
+ i32 squiggle_length = 0;
+ bool trailing_squiggle = false;
if (end.file_id == pos.file_id) {
// The error has an endpoint.
- terminal_set_colours(TerminalStyle_Bold, marker_colour);
- error_out(open_error_sign);
if (end.line > pos.line) {
// Error goes to next line.
- error_out(wiggly_underline_sgr);
- error_out("%.*s", line_length_bytes, line_text);
-
- error_out(disable_underline_sgr);
-
// Always show the ellipsis in this case
show_right_ellipsis = true;
+ for (i32 i = error_start_index_graphemes; i < line_length_graphemes; i += 1) {
+ squiggle_length += graphemes[i].width;
+ trailing_squiggle = true;
+ }
+
} else if (end.line == pos.line && end.column > pos.column) {
// Error terminates before line end.
- i32 error_length_bytes = gb_min(end.column - pos.column, line_length_bytes);
-
- error_out(wiggly_underline_sgr);
- error_out("%.*s", error_length_bytes, line_text);
- line_text += error_length_bytes;
- line_length_bytes -= error_length_bytes;
- GB_ASSERT_MSG(line_length_bytes >= 0, "Bounds-checking error: line_length_bytes");
+ i32 adjusted_end_index = graphemes[error_start_index_graphemes].byte_index + end.column - pos.column;
- error_out(disable_underline_sgr);
-
- if (!show_right_ellipsis) {
- // The line hasn't been truncated; show the end marker.
- terminal_set_colours(TerminalStyle_Bold, marker_colour);
- error_out(close_error_sign);
+ for (i32 i = error_start_index_graphemes; i < line_length_graphemes; i += 1) {
+ if (graphemes[i].byte_index >= adjusted_end_index) {
+ break;
+ } else if (graphemes[i].byte_index >= window_close_bytes) {
+ trailing_squiggle = true;
+ break;
+ }
+ squiggle_length += graphemes[i].width;
}
-
- terminal_set_colours(TerminalStyle_Normal, TerminalColour_White);
- error_out("%.*s", line_length_bytes, line_text);
}
-
} else {
// The error is at one spot; no range known.
- terminal_set_colours(TerminalStyle_Bold, marker_colour);
- error_out(mark_error_sign);
-
- terminal_set_colours(TerminalStyle_Normal, TerminalColour_White);
- error_out("%.*s", line_length_bytes, line_text);
+ squiggle_length = 1;
}
if (show_right_ellipsis) {
error_out(" ...");
}
+ error_out("\n\t");
+
+ for (i32 i = squiggle_padding; i > 0; i -= 1) {
+ error_out(" ");
+ }
+
+ terminal_set_colours(TerminalStyle_Bold, TerminalColour_Green);
+
+ if (squiggle_length > 0) {
+ error_out("^");
+ squiggle_length -= 1;
+ }
+ for (/**/; squiggle_length > 1; squiggle_length -= 1) {
+ error_out("~");
+ }
+ if (squiggle_length > 0) {
+ if (trailing_squiggle) {
+ error_out("~ ...");
+ } else {
+ error_out("^");
+ }
+ }
+
// NOTE(Feoramund): Specifically print a newline, then reset colours,
// instead of the other way around. Otherwise the printing mechanism
// will collapse the newline for reasons currently beyond my ken.
error_out("\n");
terminal_reset_colours();
- return error_start_index_bytes;
+ return squiggle_padding;
}
gb_internal void error_out_empty(void) {