diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2022-03-08 18:07:16 +0100 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2022-03-08 18:07:16 +0100 |
| commit | e76a5d8e12ae3d6e8e75e8df53a30bfbcc66b829 (patch) | |
| tree | bffca0059ce13d0d150a20f740cbff85e868065b /core/encoding | |
| parent | 6d7217f37a241c688e4734d7b8f6904299cadd2b (diff) | |
[varint] Add signed LEB128 encoding.
Diffstat (limited to 'core/encoding')
| -rw-r--r-- | core/encoding/varint/leb128.odin | 96 |
1 files changed, 82 insertions, 14 deletions
diff --git a/core/encoding/varint/leb128.odin b/core/encoding/varint/leb128.odin index 0c314b3f8..7640ba1fb 100644 --- a/core/encoding/varint/leb128.odin +++ b/core/encoding/varint/leb128.odin @@ -6,21 +6,31 @@ Jeroen van Rijn: Initial implementation. */ -// package varint implements variable length integer encoding and decoding -// using the LEB128 format as used by DWARF debug and other file formats +// package varint implements variable length integer encoding and decoding using +// the LEB128 format as used by DWARF debug info, Android .dex and other file formats. package varint -// Decode a slice of bytes encoding an unsigned LEB128 integer into value and number of bytes used. -// Returns `size` == 0 for an invalid value, empty slice, or a varint > 16 bytes. // In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file. -decode_uleb128 :: proc(buf: []u8) -> (val: u128, size: int) { +// Instead we'll set limits on the values we'll encode/decode +// 18 * 7 bits = 126, which means that a possible 19th byte may at most be `0b0000_0011`. +LEB128_MAX_BYTES :: 19 + +Error :: enum { + None = 0, + Buffer_Too_Small = 1, + Value_Too_Large = 2, +} + +// Decode a slice of bytes encoding an unsigned LEB128 integer into value and number of bytes used. +// Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes. +decode_uleb128 :: proc(buf: []u8) -> (val: u128, size: int, err: Error) { more := true for v, i in buf { size = i + 1 - if size > size_of(u128) { - return + if size == LEB128_MAX_BYTES && v > 0b0000_0011 { + return 0, 0, .Value_Too_Large } val |= u128(v & 0x7f) << uint(i * 7) @@ -33,25 +43,26 @@ decode_uleb128 :: proc(buf: []u8) -> (val: u128, size: int) { // If the buffer runs out before the number ends, return an error. if more { - return 0, 0 + return 0, 0, .Buffer_Too_Small } return } // Decode a slice of bytes encoding a signed LEB128 integer into value and number of bytes used. -// Returns `size` == 0 for an invalid value, empty slice, or a varint > 16 bytes. -// In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file. -decode_ileb128 :: proc(buf: []u8) -> (val: i128, size: int) { +// Returns `size` == 0 for an invalid value, empty slice, or a varint > 18 bytes. +decode_ileb128 :: proc(buf: []u8) -> (val: i128, size: int, err: Error) { shift: uint if len(buf) == 0 { - return + return 0, 0, .Buffer_Too_Small } for v in buf { size += 1 - if size > size_of(i128) { - return + + // 18 * 7 bits = 126, which means that a possible 19th byte may at most be 0b0000_0011. + if size == LEB128_MAX_BYTES && v > 0b0000_0011 { + return 0, 0, .Value_Too_Large } val |= i128(v & 0x7f) << shift @@ -64,4 +75,61 @@ decode_ileb128 :: proc(buf: []u8) -> (val: i128, size: int) { val |= max(i128) << shift } return +} + +// Encode `val` into `buf` as an unsigned LEB128 encoded series of bytes. +// `buf` must be appropriately sized. +encode_uleb128 :: proc(buf: []u8, val: u128) -> (size: int, err: Error) { + val := val + + for { + size += 1 + + if size > len(buf) { + return 0, .Buffer_Too_Small + } + + low := val & 0x7f + val >>= 7 + + if val > 0 { + low |= 0x80 // more bytes to follow + } + buf[size - 1] = u8(low) + + if val == 0 { break } + } + return +} + +@(private) +SIGN_MASK :: (i128(1) << 121) // sign extend mask + +// Encode `val` into `buf` as a signed LEB128 encoded series of bytes. +// `buf` must be appropriately sized. +encode_ileb128 :: proc(buf: []u8, val: i128) -> (size: int, err: Error) { + val := val + more := true + + for more { + size += 1 + + if size > len(buf) { + return 0, .Buffer_Too_Small + } + + low := val & 0x7f + val >>= 7 + + low = (low ~ SIGN_MASK) - SIGN_MASK + + if (val == 0 && low & 0x40 != 0x40) || (val == -1 && low & 0x40 == 0x40) { + more = false + } else { + low |= 0x80 + } + + buf[size - 1] = u8(low) + } + return }
\ No newline at end of file |