diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-04-30 00:21:52 +0200 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-04-30 00:21:52 +0200 |
| commit | 58e023e0cf581db71b4bf3c341b781a2f09ff50a (patch) | |
| tree | cf070b07edb9d8e3e0cb3dd849d26cf8228377c5 /core/compress | |
| parent | 222bab501ce886692f300b02d15b9e7099457406 (diff) | |
Add `compress` and `image` to core.
Diffstat (limited to 'core/compress')
| -rw-r--r-- | core/compress/common.odin | 203 | ||||
| -rw-r--r-- | core/compress/gzip/example.odin | 70 | ||||
| -rw-r--r-- | core/compress/gzip/gzip.odin | 314 | ||||
| -rw-r--r-- | core/compress/zlib/example.odin | 42 | ||||
| -rw-r--r-- | core/compress/zlib/zlib.odin | 602 |
5 files changed, 1231 insertions, 0 deletions
diff --git a/core/compress/common.odin b/core/compress/common.odin new file mode 100644 index 000000000..99b054903 --- /dev/null +++ b/core/compress/common.odin @@ -0,0 +1,203 @@ +package compress + +import "core:io" +import "core:image" + +// Error helper, e.g. is_kind(err, General_Error.OK); +is_kind :: proc(u: $U, x: $V) -> bool { + v, ok := u.(V); + return ok && v == x; +} + +Error :: union { + General_Error, + Deflate_Error, + ZLIB_Error, + GZIP_Error, + ZIP_Error, + /* + This is here because png.load will return a this type of error union, + as it may involve an I/O error, a Deflate error, etc. + */ + image.PNG_Error, +} + +General_Error :: enum { + OK = 0, + File_Not_Found, + Cannot_Open_File, + File_Too_Short, + Stream_Too_Short, + Output_Too_Short, + Unknown_Compression_Method, + Checksum_Failed, + Incompatible_Options, + Unimplemented, +} + +GZIP_Error :: enum { + Invalid_GZIP_Signature, + Reserved_Flag_Set, + Invalid_Extra_Data, + Original_Name_Too_Long, + Comment_Too_Long, + Payload_Length_Invalid, + Payload_CRC_Invalid, +} + +ZIP_Error :: enum { + Invalid_ZIP_File_Signature, + Unexpected_Signature, + Insert_Next_Disk, + Expected_End_of_Central_Directory_Record, +} + +ZLIB_Error :: enum { + Unsupported_Window_Size, + FDICT_Unsupported, + Unsupported_Compression_Level, + Code_Buffer_Malformed, +} + +Deflate_Error :: enum { + Huffman_Bad_Sizes, + Huffman_Bad_Code_Lengths, + Inflate_Error, + Bad_Distance, + Bad_Huffman_Code, + Len_Nlen_Mismatch, + BType_3, +} + +// General context for ZLIB, LZW, etc. +Context :: struct { + code_buffer: u32, + num_bits: i8, + /* + num_bits will be set to -100 if the buffer is malformed + */ + eof: b8, + + input: io.Stream, + output: io.Stream, + bytes_written: i64, + // Used to update hash as we write instead of all at once + rolling_hash: u32, + + // Sliding window buffer. Size must be a power of two. + window_size: i64, + last: ^[dynamic]byte, +} + +// Stream helpers +/* + TODO: These need to be optimized. + + Streams should really only check if a certain method is available once, perhaps even during setup. + + Bit and byte readers may be merged so that reading bytes will grab them from the bit buffer first. + This simplifies end-of-stream handling where bits may be left in the bit buffer. +*/ + +read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) { + b := make([]u8, size_of(T), context.temp_allocator); + r, e1 := io.to_reader(c.input); + _, e2 := io.read(r, b); + if !e1 || e2 != .None { + return T{}, e2; + } + + res = (^T)(raw_data(b))^; + return res, .None; +} + +read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) { + return read_data(z, u8); +} + +peek_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) { + // Get current position to read from. + curr, e1 := c.input->impl_seek(0, .Current); + if e1 != .None { + return T{}, e1; + } + r, e2 := io.to_reader_at(c.input); + if !e2 { + return T{}, .Empty; + } + b := make([]u8, size_of(T), context.temp_allocator); + _, e3 := io.read_at(r, b, curr); + if e3 != .None { + return T{}, .Empty; + } + + res = (^T)(raw_data(b))^; + return res, .None; +} + +// Sliding window read back +peek_back_byte :: proc(c: ^Context, offset: i64) -> (res: u8, err: io.Error) { + // Look back into the sliding window. + return c.last[offset % c.window_size], .None; +} + +// Generalized bit reader LSB +refill_lsb :: proc(z: ^Context, width := i8(24)) { + for { + if z.num_bits > width { + break; + } + if z.code_buffer == 0 && z.num_bits == -1 { + z.num_bits = 0; + } + if z.code_buffer >= 1 << uint(z.num_bits) { + // Code buffer is malformed. + z.num_bits = -100; + return; + } + c, err := read_u8(z); + if err != .None { + // This is fine at the end of the file. + z.num_bits = -42; + z.eof = true; + return; + } + z.code_buffer |= (u32(c) << u8(z.num_bits)); + z.num_bits += 8; + } +} + +consume_bits_lsb :: #force_inline proc(z: ^Context, width: u8) { + z.code_buffer >>= width; + z.num_bits -= i8(width); +} + +peek_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 { + if z.num_bits < i8(width) { + refill_lsb(z); + } + // assert(z.num_bits >= i8(width)); + return z.code_buffer & ~(~u32(0) << width); +} + +peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 { + assert(z.num_bits >= i8(width)); + return z.code_buffer & ~(~u32(0) << width); +} + +read_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 { + k := peek_bits_lsb(z, width); + consume_bits_lsb(z, width); + return k; +} + +read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 { + k := peek_bits_no_refill_lsb(z, width); + consume_bits_lsb(z, width); + return k; +} + +discard_to_next_byte_lsb :: proc(z: ^Context) { + discard := u8(z.num_bits & 7); + consume_bits_lsb(z, discard); +}
\ No newline at end of file diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin new file mode 100644 index 000000000..fa0cba9db --- /dev/null +++ b/core/compress/gzip/example.odin @@ -0,0 +1,70 @@ +//+ignore +package gzip + +import "core:compress/gzip" +import "core:bytes" +import "core:os" + +// Small GZIP file with fextra, fname and fcomment present. +@private +TEST: []u8 = { + 0x1f, 0x8b, 0x08, 0x1c, 0xcb, 0x3b, 0x3a, 0x5a, + 0x02, 0x03, 0x07, 0x00, 0x61, 0x62, 0x03, 0x00, + 0x63, 0x64, 0x65, 0x66, 0x69, 0x6c, 0x65, 0x6e, + 0x61, 0x6d, 0x65, 0x00, 0x54, 0x68, 0x69, 0x73, + 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x6f, + 0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x00, 0x2b, 0x48, + 0xac, 0xcc, 0xc9, 0x4f, 0x4c, 0x01, 0x00, 0x15, + 0x6a, 0x2c, 0x42, 0x07, 0x00, 0x00, 0x00, +}; + +main :: proc() { + // Set up output buffer. + buf: bytes.Buffer; + defer bytes.buffer_destroy(&buf); + + stdout :: proc(s: string) { + os.write_string(os.stdout, s); + } + stderr :: proc(s: string) { + os.write_string(os.stderr, s); + } + + args := os.args; + + if len(args) < 2 { + stderr("No input file specified.\n"); + err := gzip.load(&TEST, &buf); + if gzip.is_kind(err, gzip.E_General.OK) { + stdout("Displaying test vector: "); + stdout(bytes.buffer_to_string(&buf)); + stdout("\n"); + } + } + + // The rest are all files. + args = args[1:]; + err: gzip.Error; + + for file in args { + if file == "-" { + // Read from stdin + s := os.stream_from_handle(os.stdin); + err = gzip.load(&s, &buf); + } else { + err = gzip.load(file, &buf); + } + if !gzip.is_kind(err, gzip.E_General.OK) { + if gzip.is_kind(err, gzip.E_General.File_Not_Found) { + stderr("File not found: "); + stderr(file); + stderr("\n"); + os.exit(1); + } + stderr("GZIP returned an error.\n"); + os.exit(2); + } + stdout(bytes.buffer_to_string(&buf)); + } + os.exit(0); +}
\ No newline at end of file diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin new file mode 100644 index 000000000..3278d7c1d --- /dev/null +++ b/core/compress/gzip/gzip.odin @@ -0,0 +1,314 @@ +package gzip + +import "core:compress/zlib" +import "core:compress" +import "core:os" +import "core:io" +import "core:bytes" +import "core:hash" + +/* + + This package implements support for the GZIP file format v4.3, + as specified in RFC 1952. + + It is implemented in such a way that it lends itself naturally + to be the input to a complementary TAR implementation. + +*/ + +Magic :: enum u16le { + GZIP = 0x8b << 8 | 0x1f, +} + +Header :: struct #packed { + magic: Magic, + compression_method: Compression, + flags: Header_Flags, + modification_time: u32le, + xfl: Compression_Flags, + os: OS, +} +#assert(size_of(Header) == 10); + +Header_Flag :: enum u8 { + // Order is important + text = 0, + header_crc = 1, + extra = 2, + name = 3, + comment = 4, + reserved_1 = 5, + reserved_2 = 6, + reserved_3 = 7, +} +Header_Flags :: distinct bit_set[Header_Flag; u8]; + +OS :: enum u8 { + FAT = 0, + Amiga = 1, + VMS = 2, + Unix = 3, + VM_CMS = 4, + Atari_TOS = 5, + HPFS = 6, + Macintosh = 7, + Z_System = 8, + CP_M = 9, + TOPS_20 = 10, + NTFS = 11, + QDOS = 12, + Acorn_RISCOS = 13, + _Unknown = 14, + Unknown = 255, +} +OS_Name :: #partial [OS]string{ + .FAT = "FAT", + .Amiga = "Amiga", + .VMS = "VMS/OpenVMS", + .Unix = "Unix", + .VM_CMS = "VM/CMS", + .Atari_TOS = "Atari TOS", + .HPFS = "HPFS", + .Macintosh = "Macintosh", + .Z_System = "Z-System", + .CP_M = "CP/M", + .TOPS_20 = "TOPS-20", + .NTFS = "NTFS", + .QDOS = "QDOS", + .Acorn_RISCOS = "Acorn RISCOS", + .Unknown = "Unknown", +}; + +Compression :: enum u8 { + DEFLATE = 8, +} + +Compression_Flags :: enum u8 { + Maximum_Compression = 2, + Fastest_Compression = 4, +} + +Error :: compress.Error; +E_General :: compress.General_Error; +E_GZIP :: compress.GZIP_Error; +E_ZLIB :: compress.ZLIB_Error; +E_Deflate :: compress.Deflate_Error; +is_kind :: compress.is_kind; + +load_from_slice :: proc(slice: ^[]u8, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) { + + r := bytes.Reader{}; + bytes.reader_init(&r, slice^); + stream := bytes.reader_to_stream(&r); + + err = load_from_stream(&stream, buf, allocator); + + return err; +} + +load_from_file :: proc(filename: string, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) { + data, ok := os.read_entire_file(filename, context.temp_allocator); + if ok { + err = load_from_slice(&data, buf, allocator); + return; + } else { + return E_General.File_Not_Found; + } +} + +load_from_stream :: proc(stream: ^io.Stream, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) { + + ctx := compress.Context{ + input = stream^, + }; + buf := buf; + ws := bytes.buffer_to_stream(buf); + ctx.output = ws; + + header, e := compress.read_data(&ctx, Header); + if e != .None { + return E_General.File_Too_Short; + } + + if header.magic != .GZIP { + return E_GZIP.Invalid_GZIP_Signature; + } + if header.compression_method != .DEFLATE { + return E_General.Unknown_Compression_Method; + } + + if header.os >= ._Unknown { + header.os = .Unknown; + } + + if .reserved_1 in header.flags || .reserved_2 in header.flags || .reserved_3 in header.flags { + return E_GZIP.Reserved_Flag_Set; + } + + // printf("signature: %v\n", header.magic); + // printf("compression: %v\n", header.compression_method); + // printf("flags: %v\n", header.flags); + // printf("modification time: %v\n", time.unix(i64(header.modification_time), 0)); + // printf("xfl: %v (%v)\n", header.xfl, int(header.xfl)); + // printf("os: %v\n", OS_Name[header.os]); + + if .extra in header.flags { + xlen, e_extra := compress.read_data(&ctx, u16le); + if e_extra != .None { + return E_General.Stream_Too_Short; + } + // printf("Extra data present (%v bytes)\n", xlen); + if xlen < 4 { + // Minimum length is 2 for ID + 2 for a field length, if set to zero. + return E_GZIP.Invalid_Extra_Data; + } + + field_id: [2]u8; + field_length: u16le; + field_error: io.Error; + + for xlen >= 4 { + // println("Parsing Extra field(s)."); + field_id, field_error = compress.read_data(&ctx, [2]u8); + if field_error != .None { + // printf("Parsing Extra returned: %v\n", field_error); + return E_General.Stream_Too_Short; + } + xlen -= 2; + + field_length, field_error = compress.read_data(&ctx, u16le); + if field_error != .None { + // printf("Parsing Extra returned: %v\n", field_error); + return E_General.Stream_Too_Short; + } + xlen -= 2; + + if xlen <= 0 { + // We're not going to try and recover by scanning for a ZLIB header. + // Who knows what else is wrong with this file. + return E_GZIP.Invalid_Extra_Data; + } + + // printf(" Field \"%v\" of length %v found: ", string(field_id[:]), field_length); + if field_length > 0 { + field_data := make([]u8, field_length, context.temp_allocator); + _, field_error = ctx.input->impl_read(field_data); + if field_error != .None { + // printf("Parsing Extra returned: %v\n", field_error); + return E_General.Stream_Too_Short; + } + xlen -= field_length; + + // printf("%v\n", string(field_data)); + } + + if xlen != 0 { + return E_GZIP.Invalid_Extra_Data; + } + } + } + + if .name in header.flags { + // Should be enough. + name: [1024]u8; + b: [1]u8; + i := 0; + name_error: io.Error; + + for i < len(name) { + _, name_error = ctx.input->impl_read(b[:]); + if name_error != .None { + return E_General.Stream_Too_Short; + } + if b == 0 { + break; + } + name[i] = b[0]; + i += 1; + if i >= len(name) { + return E_GZIP.Original_Name_Too_Long; + } + } + // printf("Original filename: %v\n", string(name[:i])); + } + + if .comment in header.flags { + // Should be enough. + comment: [1024]u8; + b: [1]u8; + i := 0; + comment_error: io.Error; + + for i < len(comment) { + _, comment_error = ctx.input->impl_read(b[:]); + if comment_error != .None { + return E_General.Stream_Too_Short; + } + if b == 0 { + break; + } + comment[i] = b[0]; + i += 1; + if i >= len(comment) { + return E_GZIP.Comment_Too_Long; + } + } + // printf("Comment: %v\n", string(comment[:i])); + } + + if .header_crc in header.flags { + crc16: [2]u8; + crc_error: io.Error; + _, crc_error = ctx.input->impl_read(crc16[:]); + if crc_error != .None { + return E_General.Stream_Too_Short; + } + /* + We don't actually check the CRC16 (lower 2 bytes of CRC32 of header data until the CRC field). + If we find a gzip file in the wild that sets this field, we can add proper support for it. + */ + } + + /* + We should have arrived at the ZLIB payload. + */ + + zlib_error := zlib.inflate_raw(&ctx); + + // fmt.printf("ZLIB returned: %v\n", zlib_error); + + if !is_kind(zlib_error, E_General.OK) || zlib_error == nil { + return zlib_error; + } + + /* + Read CRC32 using the ctx bit reader because zlib may leave bytes in there. + */ + compress.discard_to_next_byte_lsb(&ctx); + + payload_crc_b: [4]u8; + payload_len_b: [4]u8; + for i in 0..3 { + payload_crc_b[i] = u8(compress.read_bits_lsb(&ctx, 8)); + } + payload_crc := transmute(u32le)payload_crc_b; + for i in 0..3 { + payload_len_b[i] = u8(compress.read_bits_lsb(&ctx, 8)); + } + payload_len := int(transmute(u32le)payload_len_b); + + payload := bytes.buffer_to_bytes(buf); + crc32 := u32le(hash.crc32(payload)); + + if crc32 != payload_crc { + return E_GZIP.Payload_CRC_Invalid; + } + + if len(payload) != payload_len { + return E_GZIP.Payload_Length_Invalid; + } + return E_General.OK; +} + +load :: proc{load_from_file, load_from_slice, load_from_stream};
\ No newline at end of file diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin new file mode 100644 index 000000000..a7fa76d62 --- /dev/null +++ b/core/compress/zlib/example.odin @@ -0,0 +1,42 @@ +//+ignore +package zlib + +import "core:compress/zlib" +import "core:bytes" +import "core:fmt" + +main :: proc() { + + ODIN_DEMO: []u8 = { + 120, 156, 101, 144, 77, 110, 131, 48, 16, 133, 215, 204, 41, 158, 44, + 69, 73, 32, 148, 182, 75, 35, 14, 208, 125, 47, 96, 185, 195, 143, + 130, 13, 50, 38, 81, 84, 101, 213, 75, 116, 215, 43, 246, 8, 53, + 82, 126, 8, 181, 188, 152, 153, 111, 222, 147, 159, 123, 165, 247, 170, + 98, 24, 213, 88, 162, 198, 244, 157, 243, 16, 186, 115, 44, 75, 227, + 5, 77, 115, 72, 137, 222, 117, 122, 179, 197, 39, 69, 161, 170, 156, + 50, 144, 5, 68, 130, 4, 49, 126, 127, 190, 191, 144, 34, 19, 57, + 69, 74, 235, 209, 140, 173, 242, 157, 155, 54, 158, 115, 162, 168, 12, + 181, 239, 246, 108, 17, 188, 174, 242, 224, 20, 13, 199, 198, 235, 250, + 194, 166, 129, 86, 3, 99, 157, 172, 37, 230, 62, 73, 129, 151, 252, + 70, 211, 5, 77, 31, 104, 188, 160, 113, 129, 215, 59, 205, 22, 52, + 123, 160, 83, 142, 255, 242, 89, 123, 93, 149, 200, 50, 188, 85, 54, + 252, 18, 248, 192, 238, 228, 235, 198, 86, 224, 118, 224, 176, 113, 166, + 112, 67, 106, 227, 159, 122, 215, 88, 95, 110, 196, 123, 205, 183, 224, + 98, 53, 8, 104, 213, 234, 201, 147, 7, 248, 192, 14, 170, 29, 25, + 171, 15, 18, 59, 138, 112, 63, 23, 205, 110, 254, 136, 109, 78, 231, + 63, 234, 138, 133, 204, + }; + + buf: bytes.Buffer; + + // We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one. + err := zlib.inflate(&ODIN_DEMO, &buf); + defer bytes.buffer_destroy(&buf); + + if !zlib.is_kind(err, zlib.E_General.OK) { + fmt.printf("\nError: %v\n", err); + } + s := bytes.buffer_to_string(&buf); + fmt.printf("Input: %v bytes, output (%v bytes):\n%v\n", len(ODIN_DEMO), len(s), s); + assert(len(s) == 438); +}
\ No newline at end of file diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin new file mode 100644 index 000000000..34a7984a7 --- /dev/null +++ b/core/compress/zlib/zlib.odin @@ -0,0 +1,602 @@ +package zlib + +import "core:compress" + +import "core:mem" +import "core:io" +import "core:bytes" +import "core:hash" +/* + zlib.inflate decompresses a ZLIB stream passed in as a []u8 or io.Stream. + Returns: Error. You can use zlib.is_kind or compress.is_kind to easily test for OK. +*/ + +Context :: compress.Context; + +Compression_Method :: enum u8 { + DEFLATE = 8, + Reserved = 15, +} + +Compression_Level :: enum u8 { + Fastest = 0, + Fast = 1, + Default = 2, + Maximum = 3, +} + +Options :: struct { + window_size: u16, + level: u8, +} + +Error :: compress.Error; +E_General :: compress.General_Error; +E_ZLIB :: compress.ZLIB_Error; +E_Deflate :: compress.Deflate_Error; +is_kind :: compress.is_kind; + +DEFLATE_MAX_CHUNK_SIZE :: 65535; +DEFLATE_MAX_LITERAL_SIZE :: 65535; +DEFLATE_MAX_DISTANCE :: 32768; +DEFLATE_MAX_LENGTH :: 258; + +HUFFMAN_MAX_BITS :: 16; +HUFFMAN_FAST_BITS :: 9; +HUFFMAN_FAST_MASK :: ((1 << HUFFMAN_FAST_BITS) - 1); + +Z_LENGTH_BASE := [31]u16{ + 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0, +}; + +Z_LENGTH_EXTRA := [31]u8{ + 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0, +}; + +Z_DIST_BASE := [32]u16{ + 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, + 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0, +}; + +Z_DIST_EXTRA := [32]u8{ + 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,0,0, +}; + +Z_LENGTH_DEZIGZAG := []u8{ + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, +}; + +Z_FIXED_LENGTH := [288]u8{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8, +}; + +Z_FIXED_DIST := [32]u8{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +}; + +/* + Accelerate all cases in default tables. +*/ +ZFAST_BITS :: 9; +ZFAST_MASK :: ((1 << ZFAST_BITS) - 1); + +/* + ZLIB-style Huffman encoding. + JPEG packs from left, ZLIB from right. We can't share code. +*/ +Huffman_Table :: struct { + fast: [1 << ZFAST_BITS]u16, + firstcode: [16]u16, + maxcode: [17]int, + firstsymbol: [16]u16, + size: [288]u8, + value: [288]u16, +}; + +// Implementation starts here + +z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) { + assert(bits <= 16); + // NOTE: Can optimize with llvm.bitreverse.i64 or some bit twiddling + // by reversing all of the bits and masking out the unneeded ones. + r = n; + r = ((r & 0xAAAA) >> 1) | ((r & 0x5555) << 1); + r = ((r & 0xCCCC) >> 2) | ((r & 0x3333) << 2); + r = ((r & 0xF0F0) >> 4) | ((r & 0x0F0F) << 4); + r = ((r & 0xFF00) >> 8) | ((r & 0x00FF) << 8); + + r >>= (16 - bits); + return; +} + +write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check { + c := c; + buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1}; + z.rolling_hash = hash.adler32(buf, z.rolling_hash); + + _, e := z.output->impl_write(buf); + if e != .None { + return e; + } + z.last[z.bytes_written % z.window_size] = c; + + z.bytes_written += 1; + return .None; +} + +allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_Table, err: Error) { + + z = new(Huffman_Table, allocator); + return z, E_General.OK; +} + +build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) { + sizes: [HUFFMAN_MAX_BITS+1]int; + next_code: [HUFFMAN_MAX_BITS]int; + + k := int(0); + + mem.zero_slice(sizes[:]); + mem.zero_slice(z.fast[:]); + + for v, _ in code_lengths { + sizes[v] += 1; + } + sizes[0] = 0; + + for i in 1..16 { + if sizes[i] > (1 << uint(i)) { + return E_Deflate.Huffman_Bad_Sizes; + } + } + code := int(0); + + for i in 1..<16 { + next_code[i] = code; + z.firstcode[i] = u16(code); + z.firstsymbol[i] = u16(k); + code = code + sizes[i]; + if sizes[i] != 0 { + if (code - 1 >= (1 << u16(i))) { + return E_Deflate.Huffman_Bad_Code_Lengths; + } + } + z.maxcode[i] = code << (16 - uint(i)); + code <<= 1; + k += int(sizes[i]); + } + + z.maxcode[16] = 0x10000; // Sentinel + c: int; + + for v, ci in code_lengths { + if v != 0 { + c = next_code[v] - int(z.firstcode[v]) + int(z.firstsymbol[v]); + fastv := u16((u16(v) << 9) | u16(ci)); + z.size[c] = u8(v); + z.value[c] = u16(ci); + if (v <= ZFAST_BITS) { + j := z_bit_reverse(u16(next_code[v]), v); + for j < (1 << ZFAST_BITS) { + z.fast[j] = fastv; + j += (1 << v); + } + } + next_code[v] += 1; + } + } + return E_General.OK; +} + +decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check { + + r = 0; + err = E_General.OK; + + k: int; + s: u8; + + code := u16(compress.peek_bits_lsb(z, 16)); + + k = int(z_bit_reverse(code, 16)); + + #no_bounds_check for s = HUFFMAN_FAST_BITS+1; ; { + if k < t.maxcode[s] { + break; + } + s += 1; + } + if (s >= 16) { + return 0, E_Deflate.Bad_Huffman_Code; + } + // code size is s, so: + b := (k >> (16-s)) - int(t.firstcode[s]) + int(t.firstsymbol[s]); + if b >= size_of(t.size) { + return 0, E_Deflate.Bad_Huffman_Code; + } + if t.size[b] != s { + return 0, E_Deflate.Bad_Huffman_Code; + } + + compress.consume_bits_lsb(z, s); + + r = t.value[b]; + return r, E_General.OK; +} + +decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check { + + if z.num_bits < 16 { + if z.num_bits == -100 { + return 0, E_ZLIB.Code_Buffer_Malformed; + } + compress.refill_lsb(z); + if z.eof { + return 0, E_General.Stream_Too_Short; + } + } + #no_bounds_check b := t.fast[z.code_buffer & ZFAST_MASK]; + if b != 0 { + s := u8(b >> ZFAST_BITS); + compress.consume_bits_lsb(z, s); + return b & 511, E_General.OK; + } + return decode_huffman_slowpath(z, t); +} + +parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check { + + #no_bounds_check for { + value, e := decode_huffman(z, z_repeat); + if !is_kind(e, E_General.OK) { + return err; + } + if value < 256 { + e := write_byte(z, u8(value)); + if e != .None { + return E_General.Output_Too_Short; + } + } else { + if value == 256 { + // End of block + return E_General.OK; + } + + value -= 257; + length := Z_LENGTH_BASE[value]; + if Z_LENGTH_EXTRA[value] > 0 { + length += u16(compress.read_bits_lsb(z, Z_LENGTH_EXTRA[value])); + } + + value, e = decode_huffman(z, z_offset); + if !is_kind(e, E_General.OK) { + return E_Deflate.Bad_Huffman_Code; + } + + distance := Z_DIST_BASE[value]; + if Z_DIST_EXTRA[value] > 0 { + distance += u16(compress.read_bits_lsb(z, Z_DIST_EXTRA[value])); + } + + if z.bytes_written < i64(distance) { + // Distance is longer than we've decoded so far. + return E_Deflate.Bad_Distance; + } + + offset := i64(z.bytes_written - i64(distance)); + /* + These might be sped up with a repl_byte call that copies + from the already written output more directly, and that + update the Adler checksum once after. + + That way we'd suffer less Stream vtable overhead. + */ + if distance == 1 { + /* + Replicate the last outputted byte, length times. + */ + if length > 0 { + b, e := compress.peek_back_byte(z, offset); + if e != .None { + return E_General.Output_Too_Short; + } + #no_bounds_check for _ in 0..<length { + write_byte(z, b); + } + } + } else { + if length > 0 { + #no_bounds_check for _ in 0..<length { + b, e := compress.peek_back_byte(z, offset); + if e != .None { + return E_General.Output_Too_Short; + } + write_byte(z, b); + offset += 1; + } + } + } + } + } +} + +inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := context.allocator) -> (err: Error) #no_bounds_check { + /* + ctx.input must be an io.Stream backed by an implementation that supports: + - read + - size + + ctx.output must be an io.Stream backed by an implementation that supports: + - write + + raw determines whether the ZLIB header is processed, or we're inflating a raw + DEFLATE stream. + */ + + if !raw { + data_size := io.size(ctx.input); + if data_size < 6 { + return E_General.Stream_Too_Short; + } + + cmf, _ := compress.read_u8(ctx); + + method := Compression_Method(cmf & 0xf); + if method != .DEFLATE { + return E_General.Unknown_Compression_Method; + } + + cinfo := (cmf >> 4) & 0xf; + if cinfo > 7 { + return E_ZLIB.Unsupported_Window_Size; + } + ctx.window_size = 1 << (cinfo + 8); + + flg, _ := compress.read_u8(ctx); + + fcheck := flg & 0x1f; + fcheck_computed := (cmf << 8 | flg) & 0x1f; + if fcheck != fcheck_computed { + return E_General.Checksum_Failed; + } + + fdict := (flg >> 5) & 1; + /* + We don't handle built-in dictionaries for now. + They're application specific and PNG doesn't use them. + */ + if fdict != 0 { + return E_ZLIB.FDICT_Unsupported; + } + + // flevel := Compression_Level((flg >> 6) & 3); + /* + Inflate can consume bits belonging to the Adler checksum. + We pass the entire stream to Inflate and will unget bytes if we need to + at the end to compare checksums. + */ + + // Seed the Adler32 rolling checksum. + ctx.rolling_hash = 1; + } + + // Parse ZLIB stream without header. + err = inflate_raw(ctx); + if !is_kind(err, E_General.OK) { + return err; + } + + if !raw { + compress.discard_to_next_byte_lsb(ctx); + + adler32 := compress.read_bits_lsb(ctx, 8) << 24 | compress.read_bits_lsb(ctx, 8) << 16 | compress.read_bits_lsb(ctx, 8) << 8 | compress.read_bits_lsb(ctx, 8); + if ctx.rolling_hash != u32(adler32) { + return E_General.Checksum_Failed; + } + } + return E_General.OK; +} + +// @(optimization_mode="speed") +inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> (err: Error) #no_bounds_check { + final := u32(0); + type := u32(0); + + z.num_bits = 0; + z.code_buffer = 0; + + z_repeat: ^Huffman_Table; + z_offset: ^Huffman_Table; + codelength_ht: ^Huffman_Table; + + z_repeat, err = allocate_huffman_table(allocator=context.allocator); + if !is_kind(err, E_General.OK) { + return err; + } + z_offset, err = allocate_huffman_table(allocator=context.allocator); + if !is_kind(err, E_General.OK) { + return err; + } + codelength_ht, err = allocate_huffman_table(allocator=context.allocator); + if !is_kind(err, E_General.OK) { + return err; + } + defer free(z_repeat); + defer free(z_offset); + defer free(codelength_ht); + + if z.window_size == 0 { + z.window_size = DEFLATE_MAX_DISTANCE; + } + + // Allocate rolling window buffer. + last_b := mem.make_dynamic_array_len_cap([dynamic]u8, z.window_size, z.window_size, allocator); + z.last = &last_b; + defer delete(last_b); + + for { + final = compress.read_bits_lsb(z, 1); + type = compress.read_bits_lsb(z, 2); + + // log.debugf("Final: %v | Type: %v\n", final, type); + + if type == 0 { + // Uncompressed block + + // Discard bits until next byte boundary + compress.discard_to_next_byte_lsb(z); + + uncompressed_len := int(compress.read_bits_lsb(z, 16)); + length_check := int(compress.read_bits_lsb(z, 16)); + if uncompressed_len != ~length_check { + return E_Deflate.Len_Nlen_Mismatch; + } + + /* + TODO: Maybe speed this up with a stream-to-stream copy (read_from) + and a single Adler32 update after. + */ + #no_bounds_check for uncompressed_len > 0 { + compress.refill_lsb(z); + lit := compress.read_bits_lsb(z, 8); + write_byte(z, u8(lit)); + uncompressed_len -= 1; + } + } else if type == 3 { + return E_Deflate.BType_3; + } else { + // log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type); + if type == 1 { + // Use fixed code lengths. + err = build_huffman(z_repeat, Z_FIXED_LENGTH[:]); + if !is_kind(err, E_General.OK) { + return err; + } + err = build_huffman(z_offset, Z_FIXED_DIST[:]); + if !is_kind(err, E_General.OK) { + return err; + } + } else { + lencodes: [286+32+137]u8; + codelength_sizes: [19]u8; + + //i: u32; + n: u32; + + compress.refill_lsb(z, 14); + hlit := compress.read_bits_no_refill_lsb(z, 5) + 257; + hdist := compress.read_bits_no_refill_lsb(z, 5) + 1; + hclen := compress.read_bits_no_refill_lsb(z, 4) + 4; + ntot := hlit + hdist; + + #no_bounds_check for i in 0..<hclen { + s := compress.read_bits_lsb(z, 3); + codelength_sizes[Z_LENGTH_DEZIGZAG[i]] = u8(s); + } + err = build_huffman(codelength_ht, codelength_sizes[:]); + if !is_kind(err, E_General.OK) { + return err; + } + + n = 0; + c: u16; + + for n < ntot { + c, err = decode_huffman(z, codelength_ht); + if !is_kind(err, E_General.OK) { + return err; + } + + if c < 0 || c >= 19 { + return E_Deflate.Huffman_Bad_Code_Lengths; + } + if c < 16 { + lencodes[n] = u8(c); + n += 1; + } else { + fill := u8(0); + compress.refill_lsb(z, 7); + if c == 16 { + c = u16(compress.read_bits_no_refill_lsb(z, 2) + 3); + if n == 0 { + return E_Deflate.Huffman_Bad_Code_Lengths; + } + fill = lencodes[n - 1]; + } else if c == 17 { + c = u16(compress.read_bits_no_refill_lsb(z, 3) + 3); + } else if c == 18 { + c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11); + } else { + return E_Deflate.Huffman_Bad_Code_Lengths; + } + + if ntot - n < u32(c) { + return E_Deflate.Huffman_Bad_Code_Lengths; + } + + nc := n + u32(c); + #no_bounds_check for ; n < nc; n += 1 { + lencodes[n] = fill; + } + } + } + + if n != ntot { + return E_Deflate.Huffman_Bad_Code_Lengths; + } + + err = build_huffman(z_repeat, lencodes[:hlit]); + if !is_kind(err, E_General.OK) { + return err; + } + + err = build_huffman(z_offset, lencodes[hlit:ntot]); + if !is_kind(err, E_General.OK) { + return err; + } + } + err = parse_huffman_block(z, z_repeat, z_offset); + // log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type); + if !is_kind(err, E_General.OK) { + return err; + } + } + if final == 1 { + break; + } + } + return E_General.OK; +} + +inflate_from_byte_array :: proc(input: ^[]u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) { + ctx := Context{}; + + r := bytes.Reader{}; + bytes.reader_init(&r, input^); + rs := bytes.reader_to_stream(&r); + ctx.input = rs; + + buf := buf; + ws := bytes.buffer_to_stream(buf); + ctx.output = ws; + + err = inflate_from_stream(&ctx, raw); + + return err; +} + +inflate_from_byte_array_raw :: proc(input: ^[]u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) { + return inflate_from_byte_array(input, buf, true); +} + +inflate :: proc{inflate_from_stream, inflate_from_byte_array}; +inflate_raw :: proc{inflate_from_stream_raw, inflate_from_byte_array_raw};
\ No newline at end of file |