diff options
| author | DanielGavin <danielgavin5@hotmail.com> | 2020-12-09 14:16:56 +0100 |
|---|---|---|
| committer | DanielGavin <danielgavin5@hotmail.com> | 2020-12-09 14:16:56 +0100 |
| commit | 705f77aae24e383e6adc3cb92a48b60d22699e82 (patch) | |
| tree | 3316f8505dfff4a7a1fea0537ff82dfa36d17fe4 | |
| parent | 25eff04b182e035613ae70ffee1fad918df42942 (diff) | |
added fuzzy matcher
| -rw-r--r-- | src/common/ast.odin | 3 | ||||
| -rw-r--r-- | src/common/fuzzy.odin | 390 | ||||
| -rw-r--r-- | src/index/indexer.odin | 21 | ||||
| -rw-r--r-- | src/index/memory_index.odin | 30 | ||||
| -rw-r--r-- | src/index/util.odin | 4 | ||||
| -rw-r--r-- | src/server/analysis.odin | 32 |
6 files changed, 437 insertions, 43 deletions
diff --git a/src/common/ast.odin b/src/common/ast.odin index a81eeb0..d2f4b4d 100644 --- a/src/common/ast.odin +++ b/src/common/ast.odin @@ -19,7 +19,8 @@ keyword_map : map [string] bool = "true" = true, "false" = true, "nil" = true, - "byte" = true}; + "byte" = true, + "u8" = true}; get_ast_node_string :: proc(node: ^ast.Node, src: [] byte) -> string { return string(src[node.pos.offset:node.end.offset]); diff --git a/src/common/fuzzy.odin b/src/common/fuzzy.odin index 4a03c45..1cc1ed4 100644 --- a/src/common/fuzzy.odin +++ b/src/common/fuzzy.odin @@ -1,24 +1,394 @@ package common +import "core:strings" +import "core:fmt" + +max_pattern :: 63; +max_word :: 127; + +awful_score: int = -(1 << 13); +perfect_bonus :: 4; +miss :: 0; +match :: 1; + +FuzzyCharTypeSet :: u8; + + + +//do bitfield instead +FuzzyScoreInfo :: struct { + score: int, + prev: int, +}; + + + +FuzzyCharRole :: enum(u8) { + Unknown = 0, // Stray control characters or impossible states. + Tail = 1, // Part of a word segment, but not the first character. + Head = 2, // The first character of a word segment. + Separator = 3, // Punctuation characters that separate word segments. +}; + +FuzzyCharType :: enum(u8) { + Empty = 0, // Before-the-start and after-the-end (and control chars). + Lower = 1, // Lowercase letters, digits, and non-ASCII bytes. + Upper = 2, // Uppercase letters. + Punctuation = 3, // ASCII punctuation (including Space) +}; FuzzyMatcher :: struct { pattern: string, + word: string, + lower_pattern: string, + lower_word: string, + scores: [max_pattern + 1][max_word + 1][2] FuzzyScoreInfo, + pattern_count: int, + pattern_type_set: FuzzyCharTypeSet, + word_type_set: FuzzyCharTypeSet, + pattern_role: [max_pattern] FuzzyCharRole, + word_count: int, + score_scale: f32, + word_role: [max_word] FuzzyCharRole, +}; + + +char_roles : [] u8 = { + // clang-format off + // Curr= Empty Lower Upper Separ + /* Prev=Empty */ 0x00, 0xaa, 0xaa, 0xff, // At start, Lower|Upper->Head + /* Prev=Lower */ 0x00, 0x55, 0xaa, 0xff, // In word, Upper->Head;Lower->Tail + /* Prev=Upper */ 0x00, 0x55, 0x59, 0xff, // Ditto, but U(U)U->Tail + /* Prev=Separ */ 0x00, 0xaa, 0xaa, 0xff, // After separator, like at start + // clang-format on +}; + +char_types : [] u8 = { + 0x00, 0x00, 0x00, 0x00, // Control characters + 0x00, 0x00, 0x00, 0x00, // Control characters + 0xff, 0xff, 0xff, 0xff, // Punctuation + 0x55, 0x55, 0xf5, 0xff, // Numbers->Lower, more Punctuation. + 0xab, 0xaa, 0xaa, 0xaa, // @ and A-O + 0xaa, 0xaa, 0xea, 0xff, // P-Z, more Punctuation. + 0x57, 0x55, 0x55, 0x55, // ` and a-o + 0x55, 0x55, 0xd5, 0x3f, // p-z, Punctuation, DEL. + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, // Bytes over 127 -> Lower. + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, // (probably UTF-8). + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, + 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, }; -make_fuzzy_matcher :: proc (pattern: string) -> FuzzyMatcher { - return FuzzyMatcher { - pattern = pattern - }; + +make_fuzzy_matcher :: proc (pattern: string, allocator := context.temp_allocator) -> ^FuzzyMatcher { + + matcher := new(FuzzyMatcher, allocator); + + matcher.pattern_count = min(len(pattern), max_pattern); + matcher.score_scale = matcher.pattern_count > 0 ? 1 / cast(f32)(perfect_bonus * matcher.pattern_count) : 0; + matcher.pattern = pattern[0:matcher.pattern_count]; + matcher.lower_pattern = strings.to_lower(matcher.pattern, context.temp_allocator); + + score_info_miss: FuzzyScoreInfo; + score_info_miss.score = 0; + score_info_miss.prev = miss; + + matcher.scores[0][0][miss] = score_info_miss; + + score_info_match: FuzzyScoreInfo; + score_info_match.score = awful_score; + score_info_match.prev = match; + + matcher.scores[0][0][match] = score_info_match; + + for p := 0; p < matcher.pattern_count; p += 1 { + + for w := 0; w < p; w+= 1 { + + for a := 0; a < 2; a += 1 { + score_info: FuzzyScoreInfo; + score_info.score = awful_score; + score_info.prev = miss; + matcher.scores[p][w][a] = score_info; + ref := matcher.pattern_role[:matcher.pattern_count]; + matcher.pattern_type_set = fuzzy_calculate_roles(matcher.pattern, &ref); + } + + } + + + } + + return matcher; +} + +fuzzy_match :: proc (matcher: ^FuzzyMatcher, word: string) -> (f32, bool) { + + if !fuzzy_init(matcher, word) { + return 0, false; + } + + if matcher.pattern_count <= 0 { + //fmt.println("return"); + return 1, true; + } + + fuzzy_build_graph(matcher); + + best := max(cast(int)matcher.scores[matcher.pattern_count][matcher.word_count][miss].score, + cast(int)matcher.scores[matcher.pattern_count][matcher.word_count][match].score); + + //fmt.println("best ", best); + + if fuzzy_is_awful(best) { + //fmt.println("awful"); + return 0.0, false; + } + + score := matcher.score_scale * min(perfect_bonus * cast(f32)matcher.pattern_count, cast(f32)max(0, best)); + + if matcher.word_count == matcher.pattern_count { + score *= 2; + } + + return score, true; +} + +fuzzy_is_awful :: proc(s: int) -> bool { + return s < awful_score / 2; +} + +fuzzy_calculate_roles :: proc(text: string, roles: ^[] FuzzyCharRole) -> FuzzyCharTypeSet { + + assert(len(text) == len(roles)); + + if len(text) == 0 { + return 0; + } + + type: FuzzyCharType = cast(FuzzyCharType)fuzzy_packed_lookup(char_types, cast(uint)text[0]); + + type_set: FuzzyCharTypeSet = cast(u8)(1 << cast(uint)type); + + types := type; + + for i := 0; i < len(text) - 1; i += 1 { + type = cast(FuzzyCharType)fuzzy_packed_lookup(char_types, cast(uint)text[i+1]); + type_set |= 1 << cast(uint)type; + + fuzzy_rotate(type, &types); + + roles[i] = cast(FuzzyCharRole)fuzzy_packed_lookup(char_roles, cast(uint)types); + } + + fuzzy_rotate(.Empty, &types); + + roles[len(text) - 1] = cast(FuzzyCharRole) fuzzy_packed_lookup(char_roles, cast(uint)types); + + return type_set; +} + +fuzzy_rotate :: proc(t: FuzzyCharType, types: ^FuzzyCharType) { + types^ = cast(FuzzyCharType)(((cast(uint)types^ << 2) | cast(uint)t) & 0x3f); } -fuzzy_match :: proc (matcher: FuzzyMatcher, match: string) -> f64 { +fuzzy_packed_lookup :: proc(data: $A/[]$T, i: uint) -> T { + return (data[i >> 2] >> ((i & 3) * 2)) & 3; +} + +fuzzy_init :: proc(matcher: ^FuzzyMatcher, word: string) -> bool { + + matcher.word = word; + matcher.word_count = min(max_word, len(matcher.word)); + + if matcher.pattern_count > matcher.word_count { + return false; + } + + if matcher.pattern_count == 0 { + return true; + } + + matcher.lower_word = strings.to_lower(word, context.temp_allocator); + + w, p := 0, 0; + + for ; p != matcher.pattern_count; w += 1{ + if w == matcher.word_count { + return false; + } + + if matcher.lower_word[w] == matcher.lower_pattern[p] { + p += 1; + } + } + + + ref := matcher.word_role[:matcher.word_count]; + + matcher.word_type_set = fuzzy_calculate_roles(word, &ref); + + return true; +} + +fuzzy_skip_penalty :: proc(matcher: ^FuzzyMatcher, w: int) -> int { + + if w == 0 { // Skipping the first character. + return 3; + } + + if matcher.word_role[w] == .Head { // Skipping a segment. + return 1; + } + + return 0; +} + +fuzzy_build_graph :: proc(matcher: ^FuzzyMatcher) { + + for w := 0; w < matcher.word_count; w += 1 { + + s: FuzzyScoreInfo; + + score := cast(int)matcher.scores[0][w][miss].score; + penalty := fuzzy_skip_penalty(matcher, w); + sum := score - penalty; + + s.score = sum; + s.prev = miss; + + matcher.scores[0][w + 1][miss] = s; + + s.score = awful_score; + s.prev = miss; + + matcher.scores[0][w + 1][match] = s; + + } + + for p := 0; p < matcher.pattern_count; p += 1 { + + for w := p; w < matcher.word_count; w += 1 { + score := &matcher.scores[p + 1][w + 1]; + pre_miss := &matcher.scores[p + 1][w]; - //temp just look at the beginning on the character - will need to learn about fuzzy matching first. + match_miss_score := pre_miss[match].score; + miss_miss_score := pre_miss[miss].score; - if matcher.pattern[0] == match[0] { - return 1.0; - } + if p < matcher.pattern_count - 1 { + match_miss_score -= fuzzy_skip_penalty(matcher, w); + miss_miss_score -= fuzzy_skip_penalty(matcher, w); + } + if match_miss_score > miss_miss_score { + s: FuzzyScoreInfo; + s.score = match_miss_score; + s.prev = match; + score[miss] = s; + } - return 0.0; + else { + s: FuzzyScoreInfo; + s.score = miss_miss_score; + s.prev = miss; + score[miss] = s; + } + + pre_match := &matcher.scores[p][w]; + + match_match_score := fuzzy_allow_match(matcher, p, w, match) ? + cast(int)pre_match[match].score + fuzzy_match_bonus(matcher, p, w, match) + : awful_score; + + miss_match_score := fuzzy_allow_match(matcher, p, w, miss) ? + cast(int)pre_match[miss].score + fuzzy_match_bonus(matcher, p, w, miss) + : awful_score; + + if match_match_score > miss_match_score { + s: FuzzyScoreInfo; + s.score = match_match_score; + s.prev = match; + score[match] = s; + } + + else { + s: FuzzyScoreInfo; + s.score = miss_match_score; + s.prev = miss; + score[match] = s; + } + + } + + } + + +} + + +fuzzy_match_bonus :: proc(matcher: ^FuzzyMatcher, p: int, w: int, last: int) -> int { + + assert(matcher.lower_pattern[p] == matcher.lower_word[w]); + + s := 1; + + is_pattern_single_case := (cast(uint)matcher.pattern_type_set == 1 << cast(uint)FuzzyCharType.Lower); + is_pattern_single_case |= (cast(uint)matcher.pattern_type_set == 1 << cast(uint)FuzzyCharType.Upper); + + // Bonus: case matches, or a Head in the pattern aligns with one in the word. + // Single-case patterns lack segmentation signals and we assume any character + // can be a head of a segment. + if matcher.pattern[p] == matcher.word[w] || + (matcher.word_role[w] == FuzzyCharRole.Head && + (is_pattern_single_case || matcher.pattern_role[p] == FuzzyCharRole.Head)) { + s += 1; + //fmt.println("match 1"); + } + + // Bonus: a consecutive match. First character match also gets a bonus to + // ensure prefix final match score normalizes to 1.0. + if w == 0 || last == match { + s += 2; + //fmt.println("match 2"); + } + + // Penalty: matching inside a segment (and previous char wasn't matched). + if matcher.word_role[w] == FuzzyCharRole.Tail && p > 0 && last == miss { + s -= 3; + //fmt.println("match 3"); + } + + // Penalty: a Head in the pattern matches in the middle of a word segment. + if matcher.pattern_role[p] == FuzzyCharRole.Head && matcher.word_role[w] == FuzzyCharRole.Tail { + s -= 1; + //fmt.println("match 4"); + } + + // Penalty: matching the first pattern character in the middle of a segment. + if p == 0 && matcher.word_role[w] == FuzzyCharRole.Tail { + s -= 4; + //fmt.println("match 5"); + } + + assert(s <= perfect_bonus); + + return s; } + +fuzzy_allow_match :: proc(matcher: ^FuzzyMatcher, p: int, w: int, last: int) -> bool { + + if matcher.lower_pattern[p] != matcher.lower_word[w] { + return false; + } + + if last == miss { + + if matcher.word_role[w] == FuzzyCharRole.Tail && (matcher.word[w] == matcher.lower_word[w] || + 0 >= (cast(uint)matcher.word_type_set & 1 << cast(uint)FuzzyCharType.Lower)) { + return false; + } + + } + + return true; +} + diff --git a/src/index/indexer.odin b/src/index/indexer.odin index 9c7b902..073d32d 100644 --- a/src/index/indexer.odin +++ b/src/index/indexer.odin @@ -4,6 +4,7 @@ import "core:odin/ast" import "core:fmt" import "core:strings" import "core:log" +import "core:sort" /* @@ -45,7 +46,7 @@ indexer: Indexer; FuzzyResult :: struct { symbol: Symbol, - weight: f32, + score: f32, }; @@ -71,3 +72,21 @@ fuzzy_search :: proc(name: string, pkgs: [] string) -> ([] FuzzyResult, bool) { return memory_index_fuzzy_search(&indexer.static_index, name, pkgs); } + +fuzzy_sort_interface :: proc(s: ^[dynamic] FuzzyResult) -> sort.Interface { + return sort.Interface{ + collection = rawptr(s), + len = proc(it: sort.Interface) -> int { + s := (^[dynamic] FuzzyResult)(it.collection); + return len(s^); + }, + less = proc(it: sort.Interface, i, j: int) -> bool { + s := (^[dynamic] FuzzyResult)(it.collection); + return s[i].score > s[j].score; + }, + swap = proc(it: sort.Interface, i, j: int) { + s := (^[dynamic] FuzzyResult)(it.collection); + s[i], s[j] = s[j], s[i]; + }, + }; +} diff --git a/src/index/memory_index.odin b/src/index/memory_index.odin index 4948b4d..39b938c 100644 --- a/src/index/memory_index.odin +++ b/src/index/memory_index.odin @@ -4,6 +4,7 @@ import "core:hash" import "core:strings" import "core:fmt" import "core:log" +import "core:sort" import "shared:common" @@ -38,28 +39,35 @@ memory_index_fuzzy_search :: proc(index: ^MemoryIndex, name: string, pkgs: [] st fuzzy_matcher := common.make_fuzzy_matcher(name); top := 20; - i := 0; for _, symbol in index.collection.symbols { - if i >= top { - break; - } - if !exists_in_scope(symbol.pkg, pkgs) { continue; } - if name == "" || common.fuzzy_match(fuzzy_matcher, symbol.name) > 0.5 { - result := FuzzyResult {symbol = symbol}; - append(&symbols, result); - i += 1; - } + score, ok := common.fuzzy_match(fuzzy_matcher, symbol.name); + result := FuzzyResult { + symbol = symbol, + score = score, + }; + + append(&symbols, result); } - return symbols[:], true; + //strings.builder + sort.sort(fuzzy_sort_interface(&symbols)); + + //sort. ERROR CRASH + + for s in symbols { + log.infof("score %v", s.score); + } + + + return symbols[:top], true; } exists_in_scope :: proc(symbol_scope: string, scope: [] string) -> bool { diff --git a/src/index/util.odin b/src/index/util.odin index 1755f0c..d420a0d 100644 --- a/src/index/util.odin +++ b/src/index/util.odin @@ -58,6 +58,7 @@ build_string_node :: proc(node: ^ast.Node, builder: ^strings.Builder) { case Implicit: case Undef: case Basic_Lit: + //strings.write_string(builder, n.tok.text); case Ellipsis: build_string(n.expr, builder); case Proc_Lit: @@ -151,7 +152,10 @@ build_string_node :: proc(node: ^ast.Node, builder: ^strings.Builder) { build_string(n.elem, builder); build_string(n.underlying, builder); case Map_Type: + strings.write_string(builder, "map"); + strings.write_string(builder, "["); build_string(n.key, builder); + strings.write_string(builder, "]"); build_string(n.value, builder); } diff --git a/src/server/analysis.odin b/src/server/analysis.odin index 07f7f33..a75e475 100644 --- a/src/server/analysis.odin +++ b/src/server/analysis.odin @@ -553,15 +553,6 @@ resolve_type_expression :: proc(ast_context: ^AstContext, node: ^ast.Expr) -> (i return index.Symbol {}, false; case Selector_Expr: - if ident, ok := v.expr.derived.(Ident); ok { - - if !resolve_ident_is_variable(ast_context, ident) && !resolve_ident_is_package(ast_context, ident) { - log.debugf("not a variable or package %v", ident.name); - return {}, false; - } - - } - if selector, ok := resolve_type_expression(ast_context, v.expr); ok { ast_context.use_locals = false; @@ -611,15 +602,6 @@ resolve_type_expression :: proc(ast_context: ^AstContext, node: ^ast.Expr) -> (i if ptr, ok := s.expr.derived.(Pointer_Type); ok { - if ident, ok := v.expr.derived.(Ident); ok { - - if !resolve_ident_is_variable(ast_context, ident) && !resolve_ident_is_package(ast_context, ident) { - log.debugf("not a variable or package %v", ident.name); - return {}, false; - } - - } - if symbol, ok := resolve_type_expression(ast_context, ptr.elem); ok { #partial switch s2 in symbol.value { @@ -1234,6 +1216,8 @@ get_locals_stmt :: proc(file: ast.File, stmt: ^ast.Stmt, ast_context: ^AstContex get_locals_switch_stmt(file, v, ast_context, document_position); case For_Stmt: get_locals_for_stmt(file, v, ast_context, document_position); + case Inline_Range_Stmt: + get_locals_stmt(file, v.body, ast_context, document_position); case Range_Stmt: get_locals_for_range_stmt(file, v, ast_context, document_position); case If_Stmt: @@ -1275,6 +1259,7 @@ get_locals_using_stmt :: proc(file: ast.File, stmt: ast.Using_Stmt, ast_context: selector.field = index.new_type(ast.Ident, v.types[i].pos, v.types[i].end, context.temp_allocator); selector.field.name = name; ast_context.locals[name] = selector; + ast_context.variables[name] = true; } } @@ -1665,9 +1650,16 @@ get_completion_list :: proc(document: ^Document, position: common.Position) -> ( items := make([dynamic] CompletionItem, context.temp_allocator); - //log.infof("ident %v", position_context.identifier); if position_context.selector != nil { + if ident, ok := position_context.selector.derived.(ast.Ident); ok { + + if !resolve_ident_is_variable(&ast_context, ident) && !resolve_ident_is_package(&ast_context, ident) { + return list, true; + } + + } + symbols := make([dynamic] index.Symbol, context.temp_allocator); selector: index.Symbol; @@ -2070,7 +2062,7 @@ fallback_position_context_completion :: proc(document: ^Document, position: comm } if c == ' ' || c == '{' || c == ',' || - c == '}' || c == '^' || + c == '}' || c == '^' || c == ':' || c == '\n' || c == '\r' { start = i+1; break; |