Merge pull request #1718 from Kelimion/varint_streamed

Add uleb128 byte-at-a-time decoder.
author: Jeroen van Rijn <Kelimion@users.noreply.github.com> 2022-04-16 02:20:07 +0200
committer: GitHub <noreply@github.com> 2022-04-16 02:20:07 +0200
commit: a5773f165795704637052457e687d84018301d5c (patch)
tree: 85f3ab8914168d8636f7223a04cce252e0e36f48 /core/encoding/varint
parent: 989641a6167498dfe6663fb330525d4d92becf8a (diff)
parent: 44316401c915cd96a9c97a0747d7487bac905ac7 (diff)
1 files changed, 32 insertions, 20 deletions
diff --git a/core/encoding/varint/leb128.odin b/core/encoding/varint/leb128.odin
index 476b9c2c9..4cad1da76 100644
--- a/core/encoding/varint/leb128.odin
+++ b/core/encoding/varint/leb128.odin
@@ -10,8 +10,6 @@
 // the LEB128 format as used by DWARF debug info, Android .dex and other file formats.
 package varint
 
-import "core:fmt"
-
 // In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file.
 // Instead we'll set limits on the values we'll encode/decode
 // 18 * 7 bits = 126, which means that a possible 19th byte may at most be `0b0000_0011`.
@@ -25,31 +23,46 @@ Error :: enum {
 
 // Decode a slice of bytes encoding an unsigned LEB128 integer into value and number of bytes used.
 // Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes.
-decode_uleb128 :: proc(buf: []u8) -> (val: u128, size: int, err: Error) {
-	more := true
-
-	for v, i in buf {
-		size = i + 1
+decode_uleb128_buffer :: proc(buf: []u8) -> (val: u128, size: int, err: Error) {
+	if len(buf) == 0 {
+		return 0, 0, .Buffer_Too_Small
+	}
 
-		// 18 * 7 bits = 126, which means that a possible 19th byte may at most be 0b0000_0011.
-		if size > LEB128_MAX_BYTES || size == LEB128_MAX_BYTES && v > 0b0000_0011 {
-			return 0, 0, .Value_Too_Large
+	for v in buf {
+		val, size, err = decode_uleb128_byte(v, size, val)
+		if err != .Buffer_Too_Small {
+			return
 		}
+	}
+
+	if err == .Buffer_Too_Small {
+		val, size = 0, 0
+	}
+	return
+}
 
-		val |= u128(v & 0x7f) << uint(i * 7)
+// Decodes an unsigned LEB128 integer into value a byte at a time.
+// Returns `.None` when decoded properly, `.Value_Too_Large` when they value
+// exceeds the limits of a u128, and `.Buffer_Too_Small` when it's not yet fully decoded.
+decode_uleb128_byte :: proc(input: u8, offset: int, accumulator: u128) -> (val: u128, size: int, err: Error) {
+	size = offset + 1
 
-		if v < 128 {
-			more = false
-			break
-		}
+	// 18 * 7 bits = 126, which means that a possible 19th byte may at most be 0b0000_0011.
+	if size > LEB128_MAX_BYTES || size == LEB128_MAX_BYTES && input > 0b0000_0011 {
+		return 0, 0, .Value_Too_Large
 	}
 
-	// If the buffer runs out before the number ends, return an error.
-	if more {
-		return 0, 0, .Buffer_Too_Small
+	val = accumulator | u128(input & 0x7f) << uint(offset * 7)
+
+	if input < 128 {
+		// We're done
+		return
 	}
-	return
+
+	// If the buffer runs out before the number ends, return an error.
+	return val, size, .Buffer_Too_Small
 }
+decode_uleb128 :: proc {decode_uleb128_buffer, decode_uleb128_byte}
 
 // Decode a slice of bytes encoding a signed LEB128 integer into value and number of bytes used.
 // Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes.
@@ -89,7 +102,6 @@ encode_uleb128 :: proc(buf: []u8, val: u128) -> (size: int, err: Error) {
 		size += 1
 
 		if size > len(buf) {
-			fmt.println(val, buf[:size - 1])
 			return 0, .Buffer_Too_Small
 		}
author	Jeroen van Rijn <Kelimion@users.noreply.github.com>	2022-04-16 02:20:07 +0200
committer	GitHub <noreply@github.com>	2022-04-16 02:20:07 +0200
commit	a5773f165795704637052457e687d84018301d5c (patch)
tree	85f3ab8914168d8636f7223a04cce252e0e36f48 /core/encoding/varint
parent	989641a6167498dfe6663fb330525d4d92becf8a (diff)
parent	44316401c915cd96a9c97a0747d7487bac905ac7 (diff)