aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDanielGavin <danielgavin5@hotmail.com>2020-12-09 14:16:56 +0100
committerDanielGavin <danielgavin5@hotmail.com>2020-12-09 14:16:56 +0100
commit705f77aae24e383e6adc3cb92a48b60d22699e82 (patch)
tree3316f8505dfff4a7a1fea0537ff82dfa36d17fe4 /src
parent25eff04b182e035613ae70ffee1fad918df42942 (diff)
added fuzzy matcher
Diffstat (limited to 'src')
-rw-r--r--src/common/ast.odin3
-rw-r--r--src/common/fuzzy.odin390
-rw-r--r--src/index/indexer.odin21
-rw-r--r--src/index/memory_index.odin30
-rw-r--r--src/index/util.odin4
-rw-r--r--src/server/analysis.odin32
6 files changed, 437 insertions, 43 deletions
diff --git a/src/common/ast.odin b/src/common/ast.odin
index a81eeb0..d2f4b4d 100644
--- a/src/common/ast.odin
+++ b/src/common/ast.odin
@@ -19,7 +19,8 @@ keyword_map : map [string] bool =
"true" = true,
"false" = true,
"nil" = true,
- "byte" = true};
+ "byte" = true,
+ "u8" = true};
get_ast_node_string :: proc(node: ^ast.Node, src: [] byte) -> string {
return string(src[node.pos.offset:node.end.offset]);
diff --git a/src/common/fuzzy.odin b/src/common/fuzzy.odin
index 4a03c45..1cc1ed4 100644
--- a/src/common/fuzzy.odin
+++ b/src/common/fuzzy.odin
@@ -1,24 +1,394 @@
package common
+import "core:strings"
+import "core:fmt"
+
+max_pattern :: 63;
+max_word :: 127;
+
+awful_score: int = -(1 << 13);
+perfect_bonus :: 4;
+miss :: 0;
+match :: 1;
+
+FuzzyCharTypeSet :: u8;
+
+
+
+//do bitfield instead
+FuzzyScoreInfo :: struct {
+ score: int,
+ prev: int,
+};
+
+
+
+FuzzyCharRole :: enum(u8) {
+ Unknown = 0, // Stray control characters or impossible states.
+ Tail = 1, // Part of a word segment, but not the first character.
+ Head = 2, // The first character of a word segment.
+ Separator = 3, // Punctuation characters that separate word segments.
+};
+
+FuzzyCharType :: enum(u8) {
+ Empty = 0, // Before-the-start and after-the-end (and control chars).
+ Lower = 1, // Lowercase letters, digits, and non-ASCII bytes.
+ Upper = 2, // Uppercase letters.
+ Punctuation = 3, // ASCII punctuation (including Space)
+};
FuzzyMatcher :: struct {
pattern: string,
+ word: string,
+ lower_pattern: string,
+ lower_word: string,
+ scores: [max_pattern + 1][max_word + 1][2] FuzzyScoreInfo,
+ pattern_count: int,
+ pattern_type_set: FuzzyCharTypeSet,
+ word_type_set: FuzzyCharTypeSet,
+ pattern_role: [max_pattern] FuzzyCharRole,
+ word_count: int,
+ score_scale: f32,
+ word_role: [max_word] FuzzyCharRole,
+};
+
+
+char_roles : [] u8 = {
+ // clang-format off
+ // Curr= Empty Lower Upper Separ
+ /* Prev=Empty */ 0x00, 0xaa, 0xaa, 0xff, // At start, Lower|Upper->Head
+ /* Prev=Lower */ 0x00, 0x55, 0xaa, 0xff, // In word, Upper->Head;Lower->Tail
+ /* Prev=Upper */ 0x00, 0x55, 0x59, 0xff, // Ditto, but U(U)U->Tail
+ /* Prev=Separ */ 0x00, 0xaa, 0xaa, 0xff, // After separator, like at start
+ // clang-format on
+};
+
+char_types : [] u8 = {
+ 0x00, 0x00, 0x00, 0x00, // Control characters
+ 0x00, 0x00, 0x00, 0x00, // Control characters
+ 0xff, 0xff, 0xff, 0xff, // Punctuation
+ 0x55, 0x55, 0xf5, 0xff, // Numbers->Lower, more Punctuation.
+ 0xab, 0xaa, 0xaa, 0xaa, // @ and A-O
+ 0xaa, 0xaa, 0xea, 0xff, // P-Z, more Punctuation.
+ 0x57, 0x55, 0x55, 0x55, // ` and a-o
+ 0x55, 0x55, 0xd5, 0x3f, // p-z, Punctuation, DEL.
+ 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, // Bytes over 127 -> Lower.
+ 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, // (probably UTF-8).
+ 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
+ 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55,
};
-make_fuzzy_matcher :: proc (pattern: string) -> FuzzyMatcher {
- return FuzzyMatcher {
- pattern = pattern
- };
+
+make_fuzzy_matcher :: proc (pattern: string, allocator := context.temp_allocator) -> ^FuzzyMatcher {
+
+ matcher := new(FuzzyMatcher, allocator);
+
+ matcher.pattern_count = min(len(pattern), max_pattern);
+ matcher.score_scale = matcher.pattern_count > 0 ? 1 / cast(f32)(perfect_bonus * matcher.pattern_count) : 0;
+ matcher.pattern = pattern[0:matcher.pattern_count];
+ matcher.lower_pattern = strings.to_lower(matcher.pattern, context.temp_allocator);
+
+ score_info_miss: FuzzyScoreInfo;
+ score_info_miss.score = 0;
+ score_info_miss.prev = miss;
+
+ matcher.scores[0][0][miss] = score_info_miss;
+
+ score_info_match: FuzzyScoreInfo;
+ score_info_match.score = awful_score;
+ score_info_match.prev = match;
+
+ matcher.scores[0][0][match] = score_info_match;
+
+ for p := 0; p < matcher.pattern_count; p += 1 {
+
+ for w := 0; w < p; w+= 1 {
+
+ for a := 0; a < 2; a += 1 {
+ score_info: FuzzyScoreInfo;
+ score_info.score = awful_score;
+ score_info.prev = miss;
+ matcher.scores[p][w][a] = score_info;
+ ref := matcher.pattern_role[:matcher.pattern_count];
+ matcher.pattern_type_set = fuzzy_calculate_roles(matcher.pattern, &ref);
+ }
+
+ }
+
+
+ }
+
+ return matcher;
+}
+
+fuzzy_match :: proc (matcher: ^FuzzyMatcher, word: string) -> (f32, bool) {
+
+ if !fuzzy_init(matcher, word) {
+ return 0, false;
+ }
+
+ if matcher.pattern_count <= 0 {
+ //fmt.println("return");
+ return 1, true;
+ }
+
+ fuzzy_build_graph(matcher);
+
+ best := max(cast(int)matcher.scores[matcher.pattern_count][matcher.word_count][miss].score,
+ cast(int)matcher.scores[matcher.pattern_count][matcher.word_count][match].score);
+
+ //fmt.println("best ", best);
+
+ if fuzzy_is_awful(best) {
+ //fmt.println("awful");
+ return 0.0, false;
+ }
+
+ score := matcher.score_scale * min(perfect_bonus * cast(f32)matcher.pattern_count, cast(f32)max(0, best));
+
+ if matcher.word_count == matcher.pattern_count {
+ score *= 2;
+ }
+
+ return score, true;
+}
+
+fuzzy_is_awful :: proc(s: int) -> bool {
+ return s < awful_score / 2;
+}
+
+fuzzy_calculate_roles :: proc(text: string, roles: ^[] FuzzyCharRole) -> FuzzyCharTypeSet {
+
+ assert(len(text) == len(roles));
+
+ if len(text) == 0 {
+ return 0;
+ }
+
+ type: FuzzyCharType = cast(FuzzyCharType)fuzzy_packed_lookup(char_types, cast(uint)text[0]);
+
+ type_set: FuzzyCharTypeSet = cast(u8)(1 << cast(uint)type);
+
+ types := type;
+
+ for i := 0; i < len(text) - 1; i += 1 {
+ type = cast(FuzzyCharType)fuzzy_packed_lookup(char_types, cast(uint)text[i+1]);
+ type_set |= 1 << cast(uint)type;
+
+ fuzzy_rotate(type, &types);
+
+ roles[i] = cast(FuzzyCharRole)fuzzy_packed_lookup(char_roles, cast(uint)types);
+ }
+
+ fuzzy_rotate(.Empty, &types);
+
+ roles[len(text) - 1] = cast(FuzzyCharRole) fuzzy_packed_lookup(char_roles, cast(uint)types);
+
+ return type_set;
+}
+
+fuzzy_rotate :: proc(t: FuzzyCharType, types: ^FuzzyCharType) {
+ types^ = cast(FuzzyCharType)(((cast(uint)types^ << 2) | cast(uint)t) & 0x3f);
}
-fuzzy_match :: proc (matcher: FuzzyMatcher, match: string) -> f64 {
+fuzzy_packed_lookup :: proc(data: $A/[]$T, i: uint) -> T {
+ return (data[i >> 2] >> ((i & 3) * 2)) & 3;
+}
+
+fuzzy_init :: proc(matcher: ^FuzzyMatcher, word: string) -> bool {
+
+ matcher.word = word;
+ matcher.word_count = min(max_word, len(matcher.word));
+
+ if matcher.pattern_count > matcher.word_count {
+ return false;
+ }
+
+ if matcher.pattern_count == 0 {
+ return true;
+ }
+
+ matcher.lower_word = strings.to_lower(word, context.temp_allocator);
+
+ w, p := 0, 0;
+
+ for ; p != matcher.pattern_count; w += 1{
+ if w == matcher.word_count {
+ return false;
+ }
+
+ if matcher.lower_word[w] == matcher.lower_pattern[p] {
+ p += 1;
+ }
+ }
+
+
+ ref := matcher.word_role[:matcher.word_count];
+
+ matcher.word_type_set = fuzzy_calculate_roles(word, &ref);
+
+ return true;
+}
+
+fuzzy_skip_penalty :: proc(matcher: ^FuzzyMatcher, w: int) -> int {
+
+ if w == 0 { // Skipping the first character.
+ return 3;
+ }
+
+ if matcher.word_role[w] == .Head { // Skipping a segment.
+ return 1;
+ }
+
+ return 0;
+}
+
+fuzzy_build_graph :: proc(matcher: ^FuzzyMatcher) {
+
+ for w := 0; w < matcher.word_count; w += 1 {
+
+ s: FuzzyScoreInfo;
+
+ score := cast(int)matcher.scores[0][w][miss].score;
+ penalty := fuzzy_skip_penalty(matcher, w);
+ sum := score - penalty;
+
+ s.score = sum;
+ s.prev = miss;
+
+ matcher.scores[0][w + 1][miss] = s;
+
+ s.score = awful_score;
+ s.prev = miss;
+
+ matcher.scores[0][w + 1][match] = s;
+
+ }
+
+ for p := 0; p < matcher.pattern_count; p += 1 {
+
+ for w := p; w < matcher.word_count; w += 1 {
+ score := &matcher.scores[p + 1][w + 1];
+ pre_miss := &matcher.scores[p + 1][w];
- //temp just look at the beginning on the character - will need to learn about fuzzy matching first.
+ match_miss_score := pre_miss[match].score;
+ miss_miss_score := pre_miss[miss].score;
- if matcher.pattern[0] == match[0] {
- return 1.0;
- }
+ if p < matcher.pattern_count - 1 {
+ match_miss_score -= fuzzy_skip_penalty(matcher, w);
+ miss_miss_score -= fuzzy_skip_penalty(matcher, w);
+ }
+ if match_miss_score > miss_miss_score {
+ s: FuzzyScoreInfo;
+ s.score = match_miss_score;
+ s.prev = match;
+ score[miss] = s;
+ }
- return 0.0;
+ else {
+ s: FuzzyScoreInfo;
+ s.score = miss_miss_score;
+ s.prev = miss;
+ score[miss] = s;
+ }
+
+ pre_match := &matcher.scores[p][w];
+
+ match_match_score := fuzzy_allow_match(matcher, p, w, match) ?
+ cast(int)pre_match[match].score + fuzzy_match_bonus(matcher, p, w, match)
+ : awful_score;
+
+ miss_match_score := fuzzy_allow_match(matcher, p, w, miss) ?
+ cast(int)pre_match[miss].score + fuzzy_match_bonus(matcher, p, w, miss)
+ : awful_score;
+
+ if match_match_score > miss_match_score {
+ s: FuzzyScoreInfo;
+ s.score = match_match_score;
+ s.prev = match;
+ score[match] = s;
+ }
+
+ else {
+ s: FuzzyScoreInfo;
+ s.score = miss_match_score;
+ s.prev = miss;
+ score[match] = s;
+ }
+
+ }
+
+ }
+
+
+}
+
+
+fuzzy_match_bonus :: proc(matcher: ^FuzzyMatcher, p: int, w: int, last: int) -> int {
+
+ assert(matcher.lower_pattern[p] == matcher.lower_word[w]);
+
+ s := 1;
+
+ is_pattern_single_case := (cast(uint)matcher.pattern_type_set == 1 << cast(uint)FuzzyCharType.Lower);
+ is_pattern_single_case |= (cast(uint)matcher.pattern_type_set == 1 << cast(uint)FuzzyCharType.Upper);
+
+ // Bonus: case matches, or a Head in the pattern aligns with one in the word.
+ // Single-case patterns lack segmentation signals and we assume any character
+ // can be a head of a segment.
+ if matcher.pattern[p] == matcher.word[w] ||
+ (matcher.word_role[w] == FuzzyCharRole.Head &&
+ (is_pattern_single_case || matcher.pattern_role[p] == FuzzyCharRole.Head)) {
+ s += 1;
+ //fmt.println("match 1");
+ }
+
+ // Bonus: a consecutive match. First character match also gets a bonus to
+ // ensure prefix final match score normalizes to 1.0.
+ if w == 0 || last == match {
+ s += 2;
+ //fmt.println("match 2");
+ }
+
+ // Penalty: matching inside a segment (and previous char wasn't matched).
+ if matcher.word_role[w] == FuzzyCharRole.Tail && p > 0 && last == miss {
+ s -= 3;
+ //fmt.println("match 3");
+ }
+
+ // Penalty: a Head in the pattern matches in the middle of a word segment.
+ if matcher.pattern_role[p] == FuzzyCharRole.Head && matcher.word_role[w] == FuzzyCharRole.Tail {
+ s -= 1;
+ //fmt.println("match 4");
+ }
+
+ // Penalty: matching the first pattern character in the middle of a segment.
+ if p == 0 && matcher.word_role[w] == FuzzyCharRole.Tail {
+ s -= 4;
+ //fmt.println("match 5");
+ }
+
+ assert(s <= perfect_bonus);
+
+ return s;
}
+
+fuzzy_allow_match :: proc(matcher: ^FuzzyMatcher, p: int, w: int, last: int) -> bool {
+
+ if matcher.lower_pattern[p] != matcher.lower_word[w] {
+ return false;
+ }
+
+ if last == miss {
+
+ if matcher.word_role[w] == FuzzyCharRole.Tail && (matcher.word[w] == matcher.lower_word[w] ||
+ 0 >= (cast(uint)matcher.word_type_set & 1 << cast(uint)FuzzyCharType.Lower)) {
+ return false;
+ }
+
+ }
+
+ return true;
+}
+
diff --git a/src/index/indexer.odin b/src/index/indexer.odin
index 9c7b902..073d32d 100644
--- a/src/index/indexer.odin
+++ b/src/index/indexer.odin
@@ -4,6 +4,7 @@ import "core:odin/ast"
import "core:fmt"
import "core:strings"
import "core:log"
+import "core:sort"
/*
@@ -45,7 +46,7 @@ indexer: Indexer;
FuzzyResult :: struct {
symbol: Symbol,
- weight: f32,
+ score: f32,
};
@@ -71,3 +72,21 @@ fuzzy_search :: proc(name: string, pkgs: [] string) -> ([] FuzzyResult, bool) {
return memory_index_fuzzy_search(&indexer.static_index, name, pkgs);
}
+
+fuzzy_sort_interface :: proc(s: ^[dynamic] FuzzyResult) -> sort.Interface {
+ return sort.Interface{
+ collection = rawptr(s),
+ len = proc(it: sort.Interface) -> int {
+ s := (^[dynamic] FuzzyResult)(it.collection);
+ return len(s^);
+ },
+ less = proc(it: sort.Interface, i, j: int) -> bool {
+ s := (^[dynamic] FuzzyResult)(it.collection);
+ return s[i].score > s[j].score;
+ },
+ swap = proc(it: sort.Interface, i, j: int) {
+ s := (^[dynamic] FuzzyResult)(it.collection);
+ s[i], s[j] = s[j], s[i];
+ },
+ };
+}
diff --git a/src/index/memory_index.odin b/src/index/memory_index.odin
index 4948b4d..39b938c 100644
--- a/src/index/memory_index.odin
+++ b/src/index/memory_index.odin
@@ -4,6 +4,7 @@ import "core:hash"
import "core:strings"
import "core:fmt"
import "core:log"
+import "core:sort"
import "shared:common"
@@ -38,28 +39,35 @@ memory_index_fuzzy_search :: proc(index: ^MemoryIndex, name: string, pkgs: [] st
fuzzy_matcher := common.make_fuzzy_matcher(name);
top := 20;
- i := 0;
for _, symbol in index.collection.symbols {
- if i >= top {
- break;
- }
-
if !exists_in_scope(symbol.pkg, pkgs) {
continue;
}
- if name == "" || common.fuzzy_match(fuzzy_matcher, symbol.name) > 0.5 {
- result := FuzzyResult {symbol = symbol};
- append(&symbols, result);
- i += 1;
- }
+ score, ok := common.fuzzy_match(fuzzy_matcher, symbol.name);
+ result := FuzzyResult {
+ symbol = symbol,
+ score = score,
+ };
+
+ append(&symbols, result);
}
- return symbols[:], true;
+ //strings.builder
+ sort.sort(fuzzy_sort_interface(&symbols));
+
+ //sort. ERROR CRASH
+
+ for s in symbols {
+ log.infof("score %v", s.score);
+ }
+
+
+ return symbols[:top], true;
}
exists_in_scope :: proc(symbol_scope: string, scope: [] string) -> bool {
diff --git a/src/index/util.odin b/src/index/util.odin
index 1755f0c..d420a0d 100644
--- a/src/index/util.odin
+++ b/src/index/util.odin
@@ -58,6 +58,7 @@ build_string_node :: proc(node: ^ast.Node, builder: ^strings.Builder) {
case Implicit:
case Undef:
case Basic_Lit:
+ //strings.write_string(builder, n.tok.text);
case Ellipsis:
build_string(n.expr, builder);
case Proc_Lit:
@@ -151,7 +152,10 @@ build_string_node :: proc(node: ^ast.Node, builder: ^strings.Builder) {
build_string(n.elem, builder);
build_string(n.underlying, builder);
case Map_Type:
+ strings.write_string(builder, "map");
+ strings.write_string(builder, "[");
build_string(n.key, builder);
+ strings.write_string(builder, "]");
build_string(n.value, builder);
}
diff --git a/src/server/analysis.odin b/src/server/analysis.odin
index 07f7f33..a75e475 100644
--- a/src/server/analysis.odin
+++ b/src/server/analysis.odin
@@ -553,15 +553,6 @@ resolve_type_expression :: proc(ast_context: ^AstContext, node: ^ast.Expr) -> (i
return index.Symbol {}, false;
case Selector_Expr:
- if ident, ok := v.expr.derived.(Ident); ok {
-
- if !resolve_ident_is_variable(ast_context, ident) && !resolve_ident_is_package(ast_context, ident) {
- log.debugf("not a variable or package %v", ident.name);
- return {}, false;
- }
-
- }
-
if selector, ok := resolve_type_expression(ast_context, v.expr); ok {
ast_context.use_locals = false;
@@ -611,15 +602,6 @@ resolve_type_expression :: proc(ast_context: ^AstContext, node: ^ast.Expr) -> (i
if ptr, ok := s.expr.derived.(Pointer_Type); ok {
- if ident, ok := v.expr.derived.(Ident); ok {
-
- if !resolve_ident_is_variable(ast_context, ident) && !resolve_ident_is_package(ast_context, ident) {
- log.debugf("not a variable or package %v", ident.name);
- return {}, false;
- }
-
- }
-
if symbol, ok := resolve_type_expression(ast_context, ptr.elem); ok {
#partial switch s2 in symbol.value {
@@ -1234,6 +1216,8 @@ get_locals_stmt :: proc(file: ast.File, stmt: ^ast.Stmt, ast_context: ^AstContex
get_locals_switch_stmt(file, v, ast_context, document_position);
case For_Stmt:
get_locals_for_stmt(file, v, ast_context, document_position);
+ case Inline_Range_Stmt:
+ get_locals_stmt(file, v.body, ast_context, document_position);
case Range_Stmt:
get_locals_for_range_stmt(file, v, ast_context, document_position);
case If_Stmt:
@@ -1275,6 +1259,7 @@ get_locals_using_stmt :: proc(file: ast.File, stmt: ast.Using_Stmt, ast_context:
selector.field = index.new_type(ast.Ident, v.types[i].pos, v.types[i].end, context.temp_allocator);
selector.field.name = name;
ast_context.locals[name] = selector;
+ ast_context.variables[name] = true;
}
}
@@ -1665,9 +1650,16 @@ get_completion_list :: proc(document: ^Document, position: common.Position) -> (
items := make([dynamic] CompletionItem, context.temp_allocator);
- //log.infof("ident %v", position_context.identifier);
if position_context.selector != nil {
+ if ident, ok := position_context.selector.derived.(ast.Ident); ok {
+
+ if !resolve_ident_is_variable(&ast_context, ident) && !resolve_ident_is_package(&ast_context, ident) {
+ return list, true;
+ }
+
+ }
+
symbols := make([dynamic] index.Symbol, context.temp_allocator);
selector: index.Symbol;
@@ -2070,7 +2062,7 @@ fallback_position_context_completion :: proc(document: ^Document, position: comm
}
if c == ' ' || c == '{' || c == ',' ||
- c == '}' || c == '^' ||
+ c == '}' || c == '^' || c == ':' ||
c == '\n' || c == '\r' {
start = i+1;
break;