diff options
| author | Daniel Gavin <danielgavin5@hotmail.com> | 2022-03-10 15:16:58 +0100 |
|---|---|---|
| committer | Daniel Gavin <danielgavin5@hotmail.com> | 2022-03-10 15:16:58 +0100 |
| commit | 968e428c217e963b82e3c8fd39b9a9721e00f7d6 (patch) | |
| tree | 83c80c6d669b448c482570479d2fe721ddc34f1a /src | |
| parent | 9f1e92e8316ff0db92a3e72d0a21d1e7311171f3 (diff) | |
Use murmur hash and bucket a map in each package.
Diffstat (limited to 'src')
| -rw-r--r-- | src/common/sha1.odin | 290 | ||||
| -rw-r--r-- | src/index/collector.odin | 33 | ||||
| -rw-r--r-- | src/index/memory_index.odin | 49 | ||||
| -rw-r--r-- | src/index/symbol.odin | 6 | ||||
| -rw-r--r-- | src/server/caches.odin | 10 | ||||
| -rw-r--r-- | src/server/documents.odin | 2 | ||||
| -rw-r--r-- | src/server/requests.odin | 14 |
7 files changed, 67 insertions, 337 deletions
diff --git a/src/common/sha1.odin b/src/common/sha1.odin deleted file mode 100644 index 4f1d62f..0000000 --- a/src/common/sha1.odin +++ /dev/null @@ -1,290 +0,0 @@ -package common - -import "core:fmt" - -//ported version of https://llvm.org/doxygen/SHa1_8cpp_source.html - -rol :: proc (number: u32, bits: u32) -> u32 { - return number << bits | number >> (32 - bits) -} - -blk0 :: proc (buf: []u32, i: int) -> u32 { - return buf[i] -} - -blk :: proc (buf: []u32, i: int) -> u32 { - buf[i & 15] = rol(buf[(i + 13) & 15] ~ buf[(i + 8) & 15] ~ buf[(i + 2) & 15] ~ - buf[i & 15], 1) - - return buf[i & 15] -} - -r0 :: proc (a: ^u32, b: ^u32, c: ^u32, d: ^u32, e: ^u32, i: int, buf: []u32) { - e^ += ((b^ & (c^ ~ d^)) ~ d^) + blk0(buf, i) + 0x5a827999 + rol(a^, 5) - b^ = rol(b^, 30) -} - -r1 :: proc (a: ^u32, b: ^u32, c: ^u32, d: ^u32, e: ^u32, i: int, buf: []u32) { - e^ += ((b^ & (c^ ~ d^)) ~ d^) + blk(buf, i) + 0x5a827999 + rol(a^, 5) - b^ += rol(b^, 30) -} - -r2 :: proc (a: ^u32, b: ^u32, c: ^u32, d: ^u32, e: ^u32, i: int, buf: []u32) { - e^ += (b^ ~ c^ ~ d^) + blk(buf, i) + 0x6ed9eba1 + rol(a^, 5) - b^ += rol(b^, 30) -} - -r3 :: proc (a: ^u32, b: ^u32, c: ^u32, d: ^u32, e: ^u32, i: int, buf: []u32) { - e^ += (((b^ | c^) & d^) | (b^ & c^)) + blk(buf, i) + 0x8F1bbcdc + rol(a^, 5) - b^ += rol(b^, 30) -} - -r4 :: proc (a: ^u32, b: ^u32, c: ^u32, d: ^u32, e: ^u32, i: int, buf: []u32) { - e^ += (b^ ~ c^ ~ d^) + blk(buf, i) + 0xca62c1d6 + rol(a^, 5) - b^ += rol(b^, 30) -} - -SHA1_K0 :: 0x5a827999 -SHA1_K20 :: 0x6ed9eba1 -SHA1_K40 :: 0x8f1bbcdc -SHA1_K60 :: 0xca62c1d6 - -SEED_0 :: 0x67452301 -SEED_1 :: 0xefcdab89 -SEED_2 :: 0x98badcfe -SEED_3 :: 0x10325476 -SEED_4 :: 0xc3d2e1f0 - -BLOCK_LENGTH :: 64 -HASH_LENGTH :: 20 - -Sha1context :: struct { - buf: struct #raw_union { - c: [BLOCK_LENGTH]byte, - l: [BLOCK_LENGTH / 4]u32, - }, - state: [HASH_LENGTH / 4]u32, - byte_count: u32, - buf_offset: u8, -} - -sha1_init :: proc (state_context: ^Sha1context) { - state_context.state[0] = SEED_0 - state_context.state[1] = SEED_1 - state_context.state[2] = SEED_2 - state_context.state[3] = SEED_3 - state_context.state[4] = SEED_4 - state_context.byte_count = 0 - state_context.buf_offset = 0 -} - -sha1_hash_block :: proc (state_context: ^Sha1context) { - a := state_context.state[0] - b := state_context.state[1] - c := state_context.state[2] - d := state_context.state[3] - e := state_context.state[4] - - // 4 rounds of 20 operations each. loop unrolled. - r0(&a, &b, &c, &d, &e, 0, state_context.buf.l[:]) - r0(&e, &a, &b, &c, &d, 1, state_context.buf.l[:]) - r0(&d, &e, &a, &b, &c, 2, state_context.buf.l[:]) - r0(&c, &d, &e, &a, &b, 3, state_context.buf.l[:]) - r0(&b, &c, &d, &e, &a, 4, state_context.buf.l[:]) - r0(&a, &b, &c, &d, &e, 5, state_context.buf.l[:]) - r0(&e, &a, &b, &c, &d, 6, state_context.buf.l[:]) - r0(&d, &e, &a, &b, &c, 7, state_context.buf.l[:]) - r0(&c, &d, &e, &a, &b, 8, state_context.buf.l[:]) - r0(&b, &c, &d, &e, &a, 9, state_context.buf.l[:]) - r0(&a, &b, &c, &d, &e, 10, state_context.buf.l[:]) - r0(&e, &a, &b, &c, &d, 11, state_context.buf.l[:]) - r0(&d, &e, &a, &b, &c, 12, state_context.buf.l[:]) - r0(&c, &d, &e, &a, &b, 13, state_context.buf.l[:]) - r0(&b, &c, &d, &e, &a, 14, state_context.buf.l[:]) - r0(&a, &b, &c, &d, &e, 15, state_context.buf.l[:]) - r1(&e, &a, &b, &c, &d, 16, state_context.buf.l[:]) - r1(&d, &e, &a, &b, &c, 17, state_context.buf.l[:]) - r1(&c, &d, &e, &a, &b, 18, state_context.buf.l[:]) - r1(&b, &c, &d, &e, &a, 19, state_context.buf.l[:]) - - r2(&a, &b, &c, &d, &e, 20, state_context.buf.l[:]) - r2(&e, &a, &b, &c, &d, 21, state_context.buf.l[:]) - r2(&d, &e, &a, &b, &c, 22, state_context.buf.l[:]) - r2(&c, &d, &e, &a, &b, 23, state_context.buf.l[:]) - r2(&b, &c, &d, &e, &a, 24, state_context.buf.l[:]) - r2(&a, &b, &c, &d, &e, 25, state_context.buf.l[:]) - r2(&e, &a, &b, &c, &d, 26, state_context.buf.l[:]) - r2(&d, &e, &a, &b, &c, 27, state_context.buf.l[:]) - r2(&c, &d, &e, &a, &b, 28, state_context.buf.l[:]) - r2(&b, &c, &d, &e, &a, 29, state_context.buf.l[:]) - r2(&a, &b, &c, &d, &e, 30, state_context.buf.l[:]) - r2(&e, &a, &b, &c, &d, 31, state_context.buf.l[:]) - r2(&d, &e, &a, &b, &c, 32, state_context.buf.l[:]) - r2(&c, &d, &e, &a, &b, 33, state_context.buf.l[:]) - r2(&b, &c, &d, &e, &a, 34, state_context.buf.l[:]) - r2(&a, &b, &c, &d, &e, 35, state_context.buf.l[:]) - r2(&e, &a, &b, &c, &d, 36, state_context.buf.l[:]) - r2(&d, &e, &a, &b, &c, 37, state_context.buf.l[:]) - r2(&c, &d, &e, &a, &b, 38, state_context.buf.l[:]) - r2(&b, &c, &d, &e, &a, 39, state_context.buf.l[:]) - - r3(&a, &b, &c, &d, &e, 40, state_context.buf.l[:]) - r3(&e, &a, &b, &c, &d, 41, state_context.buf.l[:]) - r3(&d, &e, &a, &b, &c, 42, state_context.buf.l[:]) - r3(&c, &d, &e, &a, &b, 43, state_context.buf.l[:]) - r3(&b, &c, &d, &e, &a, 44, state_context.buf.l[:]) - r3(&a, &b, &c, &d, &e, 45, state_context.buf.l[:]) - r3(&e, &a, &b, &c, &d, 46, state_context.buf.l[:]) - r3(&d, &e, &a, &b, &c, 47, state_context.buf.l[:]) - r3(&c, &d, &e, &a, &b, 48, state_context.buf.l[:]) - r3(&b, &c, &d, &e, &a, 49, state_context.buf.l[:]) - r3(&a, &b, &c, &d, &e, 50, state_context.buf.l[:]) - r3(&e, &a, &b, &c, &d, 51, state_context.buf.l[:]) - r3(&d, &e, &a, &b, &c, 52, state_context.buf.l[:]) - r3(&c, &d, &e, &a, &b, 53, state_context.buf.l[:]) - r3(&b, &c, &d, &e, &a, 54, state_context.buf.l[:]) - r3(&a, &b, &c, &d, &e, 55, state_context.buf.l[:]) - r3(&e, &a, &b, &c, &d, 56, state_context.buf.l[:]) - r3(&d, &e, &a, &b, &c, 57, state_context.buf.l[:]) - r3(&c, &d, &e, &a, &b, 58, state_context.buf.l[:]) - r3(&b, &c, &d, &e, &a, 59, state_context.buf.l[:]) - - r4(&a, &b, &c, &d, &e, 60, state_context.buf.l[:]) - r4(&e, &a, &b, &c, &d, 61, state_context.buf.l[:]) - r4(&d, &e, &a, &b, &c, 62, state_context.buf.l[:]) - r4(&c, &d, &e, &a, &b, 63, state_context.buf.l[:]) - r4(&b, &c, &d, &e, &a, 64, state_context.buf.l[:]) - r4(&a, &b, &c, &d, &e, 65, state_context.buf.l[:]) - r4(&e, &a, &b, &c, &d, 66, state_context.buf.l[:]) - r4(&d, &e, &a, &b, &c, 67, state_context.buf.l[:]) - r4(&c, &d, &e, &a, &b, 68, state_context.buf.l[:]) - r4(&b, &c, &d, &e, &a, 69, state_context.buf.l[:]) - r4(&a, &b, &c, &d, &e, 70, state_context.buf.l[:]) - r4(&e, &a, &b, &c, &d, 71, state_context.buf.l[:]) - r4(&d, &e, &a, &b, &c, 72, state_context.buf.l[:]) - r4(&c, &d, &e, &a, &b, 73, state_context.buf.l[:]) - r4(&b, &c, &d, &e, &a, 74, state_context.buf.l[:]) - r4(&a, &b, &c, &d, &e, 75, state_context.buf.l[:]) - r4(&e, &a, &b, &c, &d, 76, state_context.buf.l[:]) - r4(&d, &e, &a, &b, &c, 77, state_context.buf.l[:]) - r4(&c, &d, &e, &a, &b, 78, state_context.buf.l[:]) - r4(&b, &c, &d, &e, &a, 79, state_context.buf.l[:]) - - state_context.state[0] += a - state_context.state[1] += b - state_context.state[2] += c - state_context.state[3] += d - state_context.state[4] += e -} - -sha1_add_uncounted :: proc (state_context: ^Sha1context, data: byte) { - when ODIN_ENDIAN == .Big { - state_context.buf.c[state_context.buf_offset] = data - } else - - { - state_context.buf.c[state_context.buf_offset ~ 3] = data - } - - state_context.buf_offset += 1 - - if state_context.buf_offset == BLOCK_LENGTH { - sha1_hash_block(state_context) - state_context.buf_offset = 0 - } -} - -sha1_write_byte :: proc (state_context: ^Sha1context, data: byte) { - state_context.byte_count += 1 - sha1_add_uncounted(state_context, data) -} - -sha1_update :: proc (state_context: ^Sha1context, data: []byte) { - state_context.byte_count += cast(u32)len(data) - - current_data := data - - if state_context.buf_offset > 0 { - remainder := min(len(current_data), BLOCK_LENGTH - cast(int)state_context.buf_offset) - - for i := 0; i < remainder; i += 1 { - sha1_add_uncounted(state_context, current_data[i]) - } - - current_data = current_data[remainder - 1:] - } - - for len(current_data) >= BLOCK_LENGTH { - assert(state_context.buf_offset == 0) - assert(BLOCK_LENGTH % 4 == 0) - - BLOCK_LENGTH_32 :: BLOCK_LENGTH / 4 - - for i := 0; i < BLOCK_LENGTH_32; i += 1 { - n := (transmute([]u32)current_data)[i] - - state_context.buf.l[i] = (((n & 0xFF) << 24) | - ((n & 0xFF00) << 8) | - ((n & 0xFF0000) >> 8) | - ((n & 0xFF000000) >> 24)) - } - - sha1_hash_block(state_context) - - current_data = current_data[BLOCK_LENGTH - 1:] - } - - for c in current_data { - sha1_add_uncounted(state_context, c) - } -} - -sha1_pad :: proc (state_context: ^Sha1context) { - sha1_add_uncounted(state_context, 0x80) - - for state_context.buf_offset != 56 { - sha1_add_uncounted(state_context, 0x00) - } - - sha1_add_uncounted(state_context, 0) // We're only using 32 bit lengths - sha1_add_uncounted(state_context, 0) // But SHA-1 supports 64 bit lengths - sha1_add_uncounted(state_context, 0) // So zero pad the top bits - sha1_add_uncounted(state_context, cast(u8)(state_context.byte_count >> 29)) // Shifting to multiply by 8 - sha1_add_uncounted(state_context, cast(u8)(state_context.byte_count >> 21)) // as SHA-1 supports bitstreams as well as - sha1_add_uncounted(state_context, cast(u8)(state_context.byte_count >> 13)) // byte. - sha1_add_uncounted(state_context, cast(u8)(state_context.byte_count >> 5)) - sha1_add_uncounted(state_context, cast(u8)(state_context.byte_count << 3)) -} -sha1_final :: proc (state_context: ^Sha1context, result: ^[5]u32) { - - sha1_pad(state_context) - - when ODIN_ENDIAN == .Big { - for i := 0; i < 5; i += 1 { - result[i] = state_context.state[i] - } - } else { - for i := 0; i < 5; i += 1 { - result[i] = (((state_context.state[i]) << 24) & 0xff000000) | - (((state_context.state[i]) << 8) & 0x00ff0000) | - (((state_context.state[i]) >> 8) & 0x0000ff00) | - (((state_context.state[i]) >> 24) & 0x000000ff) - } - } -} - -sha1_hash :: proc (data: []byte) -> [20]byte { - sha1_context: Sha1context - sha1_init(&sha1_context) - sha1_update(&sha1_context, data) - - result: [20]byte - - sha1_final(&sha1_context, cast(^[5]u32)&result) - - ret: [20]byte - - copy(ret[:], result[:]) - - return ret -} diff --git a/src/index/collector.odin b/src/index/collector.odin index c527873..fc8d857 100644 --- a/src/index/collector.odin +++ b/src/index/collector.odin @@ -15,7 +15,7 @@ import "shared:common" SymbolCollection :: struct { allocator: mem.Allocator, config: ^common.Config, - symbols: map[uint]Symbol, + packages: map[uint]map[uint]Symbol, unique_strings: map[string]string, //store all our strings as unique strings and reference them to save memory. } @@ -41,21 +41,27 @@ make_symbol_collection :: proc(allocator := context.allocator, config: ^common.C return SymbolCollection { allocator = allocator, config = config, - symbols = make(map[uint]Symbol, 16, allocator), + packages = make(map[uint]map[uint]Symbol, 16, allocator), unique_strings = make(map[string]string, 16, allocator), } } delete_symbol_collection :: proc(collection: SymbolCollection) { - for k, v in collection.symbols { - free_symbol(v, collection.allocator) + for k, v in collection.packages { + for k2, v2 in v { + free_symbol(v2, collection.allocator) + } } for k, v in collection.unique_strings { delete(v, collection.allocator) } - delete(collection.symbols) + for k, v in collection.packages { + delete(v) + } + + delete(collection.packages) delete(collection.unique_strings) } @@ -383,15 +389,22 @@ collect_symbols :: proc(collection: ^SymbolCollection, file: ast.File, uri: stri symbol.uri = get_index_unique_string(collection, uri) } - cat := strings.concatenate({symbol.pkg, name}, context.temp_allocator) + package_id := get_id(symbol.pkg) + name_id := get_id(name) - id := get_symbol_id(cat) + pkg: ^map[uint]Symbol + ok: bool - if v, ok := collection.symbols[id]; !ok || v.name == "" { - collection.symbols[id] = symbol + if pkg, ok = &collection.packages[package_id]; !ok { + collection.packages[package_id] = make(map[uint]Symbol, 100, collection.allocator) + pkg = &collection.packages[package_id] + } + + if v, ok := pkg[name_id]; !ok || v.name == "" { + pkg[name_id] = symbol } else { free_symbol(symbol, collection.allocator) - } + } } return .None diff --git a/src/index/memory_index.odin b/src/index/memory_index.odin index 5146bf8..4afa334 100644 --- a/src/index/memory_index.odin +++ b/src/index/memory_index.odin @@ -25,8 +25,17 @@ make_memory_index :: proc(collection: SymbolCollection) -> MemoryIndex { } memory_index_lookup :: proc(index: ^MemoryIndex, name: string, pkg: string) -> (Symbol, bool) { - id := get_symbol_id(strings.concatenate({pkg, name}, context.temp_allocator)) - return index.collection.symbols[id] + package_id := get_id(pkg) + name_id := get_id(name) + + pkg: ^map[uint]Symbol + ok: bool + + if pkg, ok = &index.collection.packages[package_id]; ok { + return pkg[name_id] + } + + return {}, false } memory_index_fuzzy_search :: proc(index: ^MemoryIndex, name: string, pkgs: []string) -> ([]FuzzyResult, bool) { @@ -36,20 +45,21 @@ memory_index_fuzzy_search :: proc(index: ^MemoryIndex, name: string, pkgs: []str top := 20 - for _, symbol in index.collection.symbols { - - if !exists_in_scope(symbol.pkg, pkgs) { - continue - } - - if score, ok := common.fuzzy_match(fuzzy_matcher, symbol.name); ok == 1 { - result := FuzzyResult { - symbol = symbol, - score = score, + for pkg in pkgs { + package_id := get_id(pkg) + + if pkg, ok := index.collection.packages[package_id]; ok { + for _, symbol in pkg { + if score, ok := common.fuzzy_match(fuzzy_matcher, symbol.name); ok == 1 { + result := FuzzyResult { + symbol = symbol, + score = score, + } + + append(&symbols, result) + } } - - append(&symbols, result) - } + } } sort.sort(fuzzy_sort_interface(&symbols)) @@ -61,12 +71,3 @@ memory_index_fuzzy_search :: proc(index: ^MemoryIndex, name: string, pkgs: []str } } -exists_in_scope :: proc(symbol_scope: string, scope: []string) -> bool { - for s in scope { - if strings.compare(symbol_scope, s) == 0 { - return true - } - } - - return false -} diff --git a/src/index/symbol.odin b/src/index/symbol.odin index c00c08e..4f80032 100644 --- a/src/index/symbol.odin +++ b/src/index/symbol.odin @@ -184,8 +184,6 @@ free_symbol :: proc(symbol: Symbol, allocator: mem.Allocator) { } } -get_symbol_id :: proc(str: string) -> uint { - ret := common.sha1_hash(transmute([]byte)str) - r := cast(^uint)slice.first_ptr(ret[:]) - return r^ +get_id :: proc(str: string) -> uint { + return cast(uint)hash.murmur64(transmute([]byte)str) } diff --git a/src/server/caches.odin b/src/server/caches.odin index 640677f..2bb4b7f 100644 --- a/src/server/caches.odin +++ b/src/server/caches.odin @@ -12,8 +12,10 @@ FileResolveCache :: struct { file_resolve_cache: FileResolveCache resolve_entire_file :: proc(document: ^common.Document) { - file_resolve_cache.files[document.uri.uri] = analysis.resolve_entire_file( - document, - common.scratch_allocator(document.allocator), - ) + if document.uri.uri not_in file_resolve_cache.files { + file_resolve_cache.files[document.uri.uri] = analysis.resolve_entire_file( + document, + common.scratch_allocator(document.allocator), + ) + } } diff --git a/src/server/documents.odin b/src/server/documents.odin index 9c70035..3ac8cdc 100644 --- a/src/server/documents.odin +++ b/src/server/documents.odin @@ -323,6 +323,8 @@ parse_document :: proc(document: ^common.Document, config: ^common.Config) -> ([ free_all(common.scratch_allocator(document.allocator)) + delete_key(&file_resolve_cache.files, document.uri.uri) + context.allocator = common.scratch_allocator(document.allocator) //have to cheat the parser since it really wants to parse an entire package with the new changes... diff --git a/src/server/requests.odin b/src/server/requests.odin index 62ef3b1..8b988cc 100644 --- a/src/server/requests.odin +++ b/src/server/requests.odin @@ -816,11 +816,13 @@ notification_did_save :: proc (params: json.Value, id: RequestId, config: ^commo log.errorf("error in parse file for indexing %v", fullpath) } - for key, value in index.indexer.dynamic_index.collection.symbols { - if value.uri == uri.uri { - index.free_symbol(value, context.allocator) - index.indexer.dynamic_index.collection.symbols[key] = {} - } + for k, v in &index.indexer.dynamic_index.collection.packages { + for k2, v2 in &v { + if v2.uri == uri.uri { + index.free_symbol(v2, context.allocator) + v[k2] = {} + } + } } if ret := index.collect_symbols(&index.indexer.dynamic_index.collection, file, uri.uri); ret != .None { @@ -995,6 +997,8 @@ request_inlay_hint :: proc (params: json.Value, id: RequestId, config: ^common.C hints: []InlayHint + resolve_entire_file(document) + if cache_symbols, ok := file_resolve_cache.files[document.uri.uri]; ok { hints, ok = get_inlay_hints(document, cache_symbols) } |