aboutsummaryrefslogtreecommitdiff
path: root/core/compress
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2021-04-30 00:21:52 +0200
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2021-04-30 00:21:52 +0200
commit58e023e0cf581db71b4bf3c341b781a2f09ff50a (patch)
treecf070b07edb9d8e3e0cb3dd849d26cf8228377c5 /core/compress
parent222bab501ce886692f300b02d15b9e7099457406 (diff)
Add `compress` and `image` to core.
Diffstat (limited to 'core/compress')
-rw-r--r--core/compress/common.odin203
-rw-r--r--core/compress/gzip/example.odin70
-rw-r--r--core/compress/gzip/gzip.odin314
-rw-r--r--core/compress/zlib/example.odin42
-rw-r--r--core/compress/zlib/zlib.odin602
5 files changed, 1231 insertions, 0 deletions
diff --git a/core/compress/common.odin b/core/compress/common.odin
new file mode 100644
index 000000000..99b054903
--- /dev/null
+++ b/core/compress/common.odin
@@ -0,0 +1,203 @@
+package compress
+
+import "core:io"
+import "core:image"
+
+// Error helper, e.g. is_kind(err, General_Error.OK);
+is_kind :: proc(u: $U, x: $V) -> bool {
+ v, ok := u.(V);
+ return ok && v == x;
+}
+
+Error :: union {
+ General_Error,
+ Deflate_Error,
+ ZLIB_Error,
+ GZIP_Error,
+ ZIP_Error,
+ /*
+ This is here because png.load will return a this type of error union,
+ as it may involve an I/O error, a Deflate error, etc.
+ */
+ image.PNG_Error,
+}
+
+General_Error :: enum {
+ OK = 0,
+ File_Not_Found,
+ Cannot_Open_File,
+ File_Too_Short,
+ Stream_Too_Short,
+ Output_Too_Short,
+ Unknown_Compression_Method,
+ Checksum_Failed,
+ Incompatible_Options,
+ Unimplemented,
+}
+
+GZIP_Error :: enum {
+ Invalid_GZIP_Signature,
+ Reserved_Flag_Set,
+ Invalid_Extra_Data,
+ Original_Name_Too_Long,
+ Comment_Too_Long,
+ Payload_Length_Invalid,
+ Payload_CRC_Invalid,
+}
+
+ZIP_Error :: enum {
+ Invalid_ZIP_File_Signature,
+ Unexpected_Signature,
+ Insert_Next_Disk,
+ Expected_End_of_Central_Directory_Record,
+}
+
+ZLIB_Error :: enum {
+ Unsupported_Window_Size,
+ FDICT_Unsupported,
+ Unsupported_Compression_Level,
+ Code_Buffer_Malformed,
+}
+
+Deflate_Error :: enum {
+ Huffman_Bad_Sizes,
+ Huffman_Bad_Code_Lengths,
+ Inflate_Error,
+ Bad_Distance,
+ Bad_Huffman_Code,
+ Len_Nlen_Mismatch,
+ BType_3,
+}
+
+// General context for ZLIB, LZW, etc.
+Context :: struct {
+ code_buffer: u32,
+ num_bits: i8,
+ /*
+ num_bits will be set to -100 if the buffer is malformed
+ */
+ eof: b8,
+
+ input: io.Stream,
+ output: io.Stream,
+ bytes_written: i64,
+ // Used to update hash as we write instead of all at once
+ rolling_hash: u32,
+
+ // Sliding window buffer. Size must be a power of two.
+ window_size: i64,
+ last: ^[dynamic]byte,
+}
+
+// Stream helpers
+/*
+ TODO: These need to be optimized.
+
+ Streams should really only check if a certain method is available once, perhaps even during setup.
+
+ Bit and byte readers may be merged so that reading bytes will grab them from the bit buffer first.
+ This simplifies end-of-stream handling where bits may be left in the bit buffer.
+*/
+
+read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+ b := make([]u8, size_of(T), context.temp_allocator);
+ r, e1 := io.to_reader(c.input);
+ _, e2 := io.read(r, b);
+ if !e1 || e2 != .None {
+ return T{}, e2;
+ }
+
+ res = (^T)(raw_data(b))^;
+ return res, .None;
+}
+
+read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
+ return read_data(z, u8);
+}
+
+peek_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+ // Get current position to read from.
+ curr, e1 := c.input->impl_seek(0, .Current);
+ if e1 != .None {
+ return T{}, e1;
+ }
+ r, e2 := io.to_reader_at(c.input);
+ if !e2 {
+ return T{}, .Empty;
+ }
+ b := make([]u8, size_of(T), context.temp_allocator);
+ _, e3 := io.read_at(r, b, curr);
+ if e3 != .None {
+ return T{}, .Empty;
+ }
+
+ res = (^T)(raw_data(b))^;
+ return res, .None;
+}
+
+// Sliding window read back
+peek_back_byte :: proc(c: ^Context, offset: i64) -> (res: u8, err: io.Error) {
+ // Look back into the sliding window.
+ return c.last[offset % c.window_size], .None;
+}
+
+// Generalized bit reader LSB
+refill_lsb :: proc(z: ^Context, width := i8(24)) {
+ for {
+ if z.num_bits > width {
+ break;
+ }
+ if z.code_buffer == 0 && z.num_bits == -1 {
+ z.num_bits = 0;
+ }
+ if z.code_buffer >= 1 << uint(z.num_bits) {
+ // Code buffer is malformed.
+ z.num_bits = -100;
+ return;
+ }
+ c, err := read_u8(z);
+ if err != .None {
+ // This is fine at the end of the file.
+ z.num_bits = -42;
+ z.eof = true;
+ return;
+ }
+ z.code_buffer |= (u32(c) << u8(z.num_bits));
+ z.num_bits += 8;
+ }
+}
+
+consume_bits_lsb :: #force_inline proc(z: ^Context, width: u8) {
+ z.code_buffer >>= width;
+ z.num_bits -= i8(width);
+}
+
+peek_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+ if z.num_bits < i8(width) {
+ refill_lsb(z);
+ }
+ // assert(z.num_bits >= i8(width));
+ return z.code_buffer & ~(~u32(0) << width);
+}
+
+peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+ assert(z.num_bits >= i8(width));
+ return z.code_buffer & ~(~u32(0) << width);
+}
+
+read_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+ k := peek_bits_lsb(z, width);
+ consume_bits_lsb(z, width);
+ return k;
+}
+
+read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+ k := peek_bits_no_refill_lsb(z, width);
+ consume_bits_lsb(z, width);
+ return k;
+}
+
+discard_to_next_byte_lsb :: proc(z: ^Context) {
+ discard := u8(z.num_bits & 7);
+ consume_bits_lsb(z, discard);
+} \ No newline at end of file
diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin
new file mode 100644
index 000000000..fa0cba9db
--- /dev/null
+++ b/core/compress/gzip/example.odin
@@ -0,0 +1,70 @@
+//+ignore
+package gzip
+
+import "core:compress/gzip"
+import "core:bytes"
+import "core:os"
+
+// Small GZIP file with fextra, fname and fcomment present.
+@private
+TEST: []u8 = {
+ 0x1f, 0x8b, 0x08, 0x1c, 0xcb, 0x3b, 0x3a, 0x5a,
+ 0x02, 0x03, 0x07, 0x00, 0x61, 0x62, 0x03, 0x00,
+ 0x63, 0x64, 0x65, 0x66, 0x69, 0x6c, 0x65, 0x6e,
+ 0x61, 0x6d, 0x65, 0x00, 0x54, 0x68, 0x69, 0x73,
+ 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x6f,
+ 0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x00, 0x2b, 0x48,
+ 0xac, 0xcc, 0xc9, 0x4f, 0x4c, 0x01, 0x00, 0x15,
+ 0x6a, 0x2c, 0x42, 0x07, 0x00, 0x00, 0x00,
+};
+
+main :: proc() {
+ // Set up output buffer.
+ buf: bytes.Buffer;
+ defer bytes.buffer_destroy(&buf);
+
+ stdout :: proc(s: string) {
+ os.write_string(os.stdout, s);
+ }
+ stderr :: proc(s: string) {
+ os.write_string(os.stderr, s);
+ }
+
+ args := os.args;
+
+ if len(args) < 2 {
+ stderr("No input file specified.\n");
+ err := gzip.load(&TEST, &buf);
+ if gzip.is_kind(err, gzip.E_General.OK) {
+ stdout("Displaying test vector: ");
+ stdout(bytes.buffer_to_string(&buf));
+ stdout("\n");
+ }
+ }
+
+ // The rest are all files.
+ args = args[1:];
+ err: gzip.Error;
+
+ for file in args {
+ if file == "-" {
+ // Read from stdin
+ s := os.stream_from_handle(os.stdin);
+ err = gzip.load(&s, &buf);
+ } else {
+ err = gzip.load(file, &buf);
+ }
+ if !gzip.is_kind(err, gzip.E_General.OK) {
+ if gzip.is_kind(err, gzip.E_General.File_Not_Found) {
+ stderr("File not found: ");
+ stderr(file);
+ stderr("\n");
+ os.exit(1);
+ }
+ stderr("GZIP returned an error.\n");
+ os.exit(2);
+ }
+ stdout(bytes.buffer_to_string(&buf));
+ }
+ os.exit(0);
+} \ No newline at end of file
diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin
new file mode 100644
index 000000000..3278d7c1d
--- /dev/null
+++ b/core/compress/gzip/gzip.odin
@@ -0,0 +1,314 @@
+package gzip
+
+import "core:compress/zlib"
+import "core:compress"
+import "core:os"
+import "core:io"
+import "core:bytes"
+import "core:hash"
+
+/*
+
+ This package implements support for the GZIP file format v4.3,
+ as specified in RFC 1952.
+
+ It is implemented in such a way that it lends itself naturally
+ to be the input to a complementary TAR implementation.
+
+*/
+
+Magic :: enum u16le {
+ GZIP = 0x8b << 8 | 0x1f,
+}
+
+Header :: struct #packed {
+ magic: Magic,
+ compression_method: Compression,
+ flags: Header_Flags,
+ modification_time: u32le,
+ xfl: Compression_Flags,
+ os: OS,
+}
+#assert(size_of(Header) == 10);
+
+Header_Flag :: enum u8 {
+ // Order is important
+ text = 0,
+ header_crc = 1,
+ extra = 2,
+ name = 3,
+ comment = 4,
+ reserved_1 = 5,
+ reserved_2 = 6,
+ reserved_3 = 7,
+}
+Header_Flags :: distinct bit_set[Header_Flag; u8];
+
+OS :: enum u8 {
+ FAT = 0,
+ Amiga = 1,
+ VMS = 2,
+ Unix = 3,
+ VM_CMS = 4,
+ Atari_TOS = 5,
+ HPFS = 6,
+ Macintosh = 7,
+ Z_System = 8,
+ CP_M = 9,
+ TOPS_20 = 10,
+ NTFS = 11,
+ QDOS = 12,
+ Acorn_RISCOS = 13,
+ _Unknown = 14,
+ Unknown = 255,
+}
+OS_Name :: #partial [OS]string{
+ .FAT = "FAT",
+ .Amiga = "Amiga",
+ .VMS = "VMS/OpenVMS",
+ .Unix = "Unix",
+ .VM_CMS = "VM/CMS",
+ .Atari_TOS = "Atari TOS",
+ .HPFS = "HPFS",
+ .Macintosh = "Macintosh",
+ .Z_System = "Z-System",
+ .CP_M = "CP/M",
+ .TOPS_20 = "TOPS-20",
+ .NTFS = "NTFS",
+ .QDOS = "QDOS",
+ .Acorn_RISCOS = "Acorn RISCOS",
+ .Unknown = "Unknown",
+};
+
+Compression :: enum u8 {
+ DEFLATE = 8,
+}
+
+Compression_Flags :: enum u8 {
+ Maximum_Compression = 2,
+ Fastest_Compression = 4,
+}
+
+Error :: compress.Error;
+E_General :: compress.General_Error;
+E_GZIP :: compress.GZIP_Error;
+E_ZLIB :: compress.ZLIB_Error;
+E_Deflate :: compress.Deflate_Error;
+is_kind :: compress.is_kind;
+
+load_from_slice :: proc(slice: ^[]u8, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+
+ r := bytes.Reader{};
+ bytes.reader_init(&r, slice^);
+ stream := bytes.reader_to_stream(&r);
+
+ err = load_from_stream(&stream, buf, allocator);
+
+ return err;
+}
+
+load_from_file :: proc(filename: string, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+ data, ok := os.read_entire_file(filename, context.temp_allocator);
+ if ok {
+ err = load_from_slice(&data, buf, allocator);
+ return;
+ } else {
+ return E_General.File_Not_Found;
+ }
+}
+
+load_from_stream :: proc(stream: ^io.Stream, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+
+ ctx := compress.Context{
+ input = stream^,
+ };
+ buf := buf;
+ ws := bytes.buffer_to_stream(buf);
+ ctx.output = ws;
+
+ header, e := compress.read_data(&ctx, Header);
+ if e != .None {
+ return E_General.File_Too_Short;
+ }
+
+ if header.magic != .GZIP {
+ return E_GZIP.Invalid_GZIP_Signature;
+ }
+ if header.compression_method != .DEFLATE {
+ return E_General.Unknown_Compression_Method;
+ }
+
+ if header.os >= ._Unknown {
+ header.os = .Unknown;
+ }
+
+ if .reserved_1 in header.flags || .reserved_2 in header.flags || .reserved_3 in header.flags {
+ return E_GZIP.Reserved_Flag_Set;
+ }
+
+ // printf("signature: %v\n", header.magic);
+ // printf("compression: %v\n", header.compression_method);
+ // printf("flags: %v\n", header.flags);
+ // printf("modification time: %v\n", time.unix(i64(header.modification_time), 0));
+ // printf("xfl: %v (%v)\n", header.xfl, int(header.xfl));
+ // printf("os: %v\n", OS_Name[header.os]);
+
+ if .extra in header.flags {
+ xlen, e_extra := compress.read_data(&ctx, u16le);
+ if e_extra != .None {
+ return E_General.Stream_Too_Short;
+ }
+ // printf("Extra data present (%v bytes)\n", xlen);
+ if xlen < 4 {
+ // Minimum length is 2 for ID + 2 for a field length, if set to zero.
+ return E_GZIP.Invalid_Extra_Data;
+ }
+
+ field_id: [2]u8;
+ field_length: u16le;
+ field_error: io.Error;
+
+ for xlen >= 4 {
+ // println("Parsing Extra field(s).");
+ field_id, field_error = compress.read_data(&ctx, [2]u8);
+ if field_error != .None {
+ // printf("Parsing Extra returned: %v\n", field_error);
+ return E_General.Stream_Too_Short;
+ }
+ xlen -= 2;
+
+ field_length, field_error = compress.read_data(&ctx, u16le);
+ if field_error != .None {
+ // printf("Parsing Extra returned: %v\n", field_error);
+ return E_General.Stream_Too_Short;
+ }
+ xlen -= 2;
+
+ if xlen <= 0 {
+ // We're not going to try and recover by scanning for a ZLIB header.
+ // Who knows what else is wrong with this file.
+ return E_GZIP.Invalid_Extra_Data;
+ }
+
+ // printf(" Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
+ if field_length > 0 {
+ field_data := make([]u8, field_length, context.temp_allocator);
+ _, field_error = ctx.input->impl_read(field_data);
+ if field_error != .None {
+ // printf("Parsing Extra returned: %v\n", field_error);
+ return E_General.Stream_Too_Short;
+ }
+ xlen -= field_length;
+
+ // printf("%v\n", string(field_data));
+ }
+
+ if xlen != 0 {
+ return E_GZIP.Invalid_Extra_Data;
+ }
+ }
+ }
+
+ if .name in header.flags {
+ // Should be enough.
+ name: [1024]u8;
+ b: [1]u8;
+ i := 0;
+ name_error: io.Error;
+
+ for i < len(name) {
+ _, name_error = ctx.input->impl_read(b[:]);
+ if name_error != .None {
+ return E_General.Stream_Too_Short;
+ }
+ if b == 0 {
+ break;
+ }
+ name[i] = b[0];
+ i += 1;
+ if i >= len(name) {
+ return E_GZIP.Original_Name_Too_Long;
+ }
+ }
+ // printf("Original filename: %v\n", string(name[:i]));
+ }
+
+ if .comment in header.flags {
+ // Should be enough.
+ comment: [1024]u8;
+ b: [1]u8;
+ i := 0;
+ comment_error: io.Error;
+
+ for i < len(comment) {
+ _, comment_error = ctx.input->impl_read(b[:]);
+ if comment_error != .None {
+ return E_General.Stream_Too_Short;
+ }
+ if b == 0 {
+ break;
+ }
+ comment[i] = b[0];
+ i += 1;
+ if i >= len(comment) {
+ return E_GZIP.Comment_Too_Long;
+ }
+ }
+ // printf("Comment: %v\n", string(comment[:i]));
+ }
+
+ if .header_crc in header.flags {
+ crc16: [2]u8;
+ crc_error: io.Error;
+ _, crc_error = ctx.input->impl_read(crc16[:]);
+ if crc_error != .None {
+ return E_General.Stream_Too_Short;
+ }
+ /*
+ We don't actually check the CRC16 (lower 2 bytes of CRC32 of header data until the CRC field).
+ If we find a gzip file in the wild that sets this field, we can add proper support for it.
+ */
+ }
+
+ /*
+ We should have arrived at the ZLIB payload.
+ */
+
+ zlib_error := zlib.inflate_raw(&ctx);
+
+ // fmt.printf("ZLIB returned: %v\n", zlib_error);
+
+ if !is_kind(zlib_error, E_General.OK) || zlib_error == nil {
+ return zlib_error;
+ }
+
+ /*
+ Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
+ */
+ compress.discard_to_next_byte_lsb(&ctx);
+
+ payload_crc_b: [4]u8;
+ payload_len_b: [4]u8;
+ for i in 0..3 {
+ payload_crc_b[i] = u8(compress.read_bits_lsb(&ctx, 8));
+ }
+ payload_crc := transmute(u32le)payload_crc_b;
+ for i in 0..3 {
+ payload_len_b[i] = u8(compress.read_bits_lsb(&ctx, 8));
+ }
+ payload_len := int(transmute(u32le)payload_len_b);
+
+ payload := bytes.buffer_to_bytes(buf);
+ crc32 := u32le(hash.crc32(payload));
+
+ if crc32 != payload_crc {
+ return E_GZIP.Payload_CRC_Invalid;
+ }
+
+ if len(payload) != payload_len {
+ return E_GZIP.Payload_Length_Invalid;
+ }
+ return E_General.OK;
+}
+
+load :: proc{load_from_file, load_from_slice, load_from_stream}; \ No newline at end of file
diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin
new file mode 100644
index 000000000..a7fa76d62
--- /dev/null
+++ b/core/compress/zlib/example.odin
@@ -0,0 +1,42 @@
+//+ignore
+package zlib
+
+import "core:compress/zlib"
+import "core:bytes"
+import "core:fmt"
+
+main :: proc() {
+
+ ODIN_DEMO: []u8 = {
+ 120, 156, 101, 144, 77, 110, 131, 48, 16, 133, 215, 204, 41, 158, 44,
+ 69, 73, 32, 148, 182, 75, 35, 14, 208, 125, 47, 96, 185, 195, 143,
+ 130, 13, 50, 38, 81, 84, 101, 213, 75, 116, 215, 43, 246, 8, 53,
+ 82, 126, 8, 181, 188, 152, 153, 111, 222, 147, 159, 123, 165, 247, 170,
+ 98, 24, 213, 88, 162, 198, 244, 157, 243, 16, 186, 115, 44, 75, 227,
+ 5, 77, 115, 72, 137, 222, 117, 122, 179, 197, 39, 69, 161, 170, 156,
+ 50, 144, 5, 68, 130, 4, 49, 126, 127, 190, 191, 144, 34, 19, 57,
+ 69, 74, 235, 209, 140, 173, 242, 157, 155, 54, 158, 115, 162, 168, 12,
+ 181, 239, 246, 108, 17, 188, 174, 242, 224, 20, 13, 199, 198, 235, 250,
+ 194, 166, 129, 86, 3, 99, 157, 172, 37, 230, 62, 73, 129, 151, 252,
+ 70, 211, 5, 77, 31, 104, 188, 160, 113, 129, 215, 59, 205, 22, 52,
+ 123, 160, 83, 142, 255, 242, 89, 123, 93, 149, 200, 50, 188, 85, 54,
+ 252, 18, 248, 192, 238, 228, 235, 198, 86, 224, 118, 224, 176, 113, 166,
+ 112, 67, 106, 227, 159, 122, 215, 88, 95, 110, 196, 123, 205, 183, 224,
+ 98, 53, 8, 104, 213, 234, 201, 147, 7, 248, 192, 14, 170, 29, 25,
+ 171, 15, 18, 59, 138, 112, 63, 23, 205, 110, 254, 136, 109, 78, 231,
+ 63, 234, 138, 133, 204,
+ };
+
+ buf: bytes.Buffer;
+
+ // We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one.
+ err := zlib.inflate(&ODIN_DEMO, &buf);
+ defer bytes.buffer_destroy(&buf);
+
+ if !zlib.is_kind(err, zlib.E_General.OK) {
+ fmt.printf("\nError: %v\n", err);
+ }
+ s := bytes.buffer_to_string(&buf);
+ fmt.printf("Input: %v bytes, output (%v bytes):\n%v\n", len(ODIN_DEMO), len(s), s);
+ assert(len(s) == 438);
+} \ No newline at end of file
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
new file mode 100644
index 000000000..34a7984a7
--- /dev/null
+++ b/core/compress/zlib/zlib.odin
@@ -0,0 +1,602 @@
+package zlib
+
+import "core:compress"
+
+import "core:mem"
+import "core:io"
+import "core:bytes"
+import "core:hash"
+/*
+ zlib.inflate decompresses a ZLIB stream passed in as a []u8 or io.Stream.
+ Returns: Error. You can use zlib.is_kind or compress.is_kind to easily test for OK.
+*/
+
+Context :: compress.Context;
+
+Compression_Method :: enum u8 {
+ DEFLATE = 8,
+ Reserved = 15,
+}
+
+Compression_Level :: enum u8 {
+ Fastest = 0,
+ Fast = 1,
+ Default = 2,
+ Maximum = 3,
+}
+
+Options :: struct {
+ window_size: u16,
+ level: u8,
+}
+
+Error :: compress.Error;
+E_General :: compress.General_Error;
+E_ZLIB :: compress.ZLIB_Error;
+E_Deflate :: compress.Deflate_Error;
+is_kind :: compress.is_kind;
+
+DEFLATE_MAX_CHUNK_SIZE :: 65535;
+DEFLATE_MAX_LITERAL_SIZE :: 65535;
+DEFLATE_MAX_DISTANCE :: 32768;
+DEFLATE_MAX_LENGTH :: 258;
+
+HUFFMAN_MAX_BITS :: 16;
+HUFFMAN_FAST_BITS :: 9;
+HUFFMAN_FAST_MASK :: ((1 << HUFFMAN_FAST_BITS) - 1);
+
+Z_LENGTH_BASE := [31]u16{
+ 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,
+ 67,83,99,115,131,163,195,227,258,0,0,
+};
+
+Z_LENGTH_EXTRA := [31]u8{
+ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0,
+};
+
+Z_DIST_BASE := [32]u16{
+ 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
+ 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0,
+};
+
+Z_DIST_EXTRA := [32]u8{
+ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,0,0,
+};
+
+Z_LENGTH_DEZIGZAG := []u8{
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
+};
+
+Z_FIXED_LENGTH := [288]u8{
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,
+};
+
+Z_FIXED_DIST := [32]u8{
+ 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+};
+
+/*
+ Accelerate all cases in default tables.
+*/
+ZFAST_BITS :: 9;
+ZFAST_MASK :: ((1 << ZFAST_BITS) - 1);
+
+/*
+ ZLIB-style Huffman encoding.
+ JPEG packs from left, ZLIB from right. We can't share code.
+*/
+Huffman_Table :: struct {
+ fast: [1 << ZFAST_BITS]u16,
+ firstcode: [16]u16,
+ maxcode: [17]int,
+ firstsymbol: [16]u16,
+ size: [288]u8,
+ value: [288]u16,
+};
+
+// Implementation starts here
+
+z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
+ assert(bits <= 16);
+ // NOTE: Can optimize with llvm.bitreverse.i64 or some bit twiddling
+ // by reversing all of the bits and masking out the unneeded ones.
+ r = n;
+ r = ((r & 0xAAAA) >> 1) | ((r & 0x5555) << 1);
+ r = ((r & 0xCCCC) >> 2) | ((r & 0x3333) << 2);
+ r = ((r & 0xF0F0) >> 4) | ((r & 0x0F0F) << 4);
+ r = ((r & 0xFF00) >> 8) | ((r & 0x00FF) << 8);
+
+ r >>= (16 - bits);
+ return;
+}
+
+write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
+ c := c;
+ buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
+ z.rolling_hash = hash.adler32(buf, z.rolling_hash);
+
+ _, e := z.output->impl_write(buf);
+ if e != .None {
+ return e;
+ }
+ z.last[z.bytes_written % z.window_size] = c;
+
+ z.bytes_written += 1;
+ return .None;
+}
+
+allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_Table, err: Error) {
+
+ z = new(Huffman_Table, allocator);
+ return z, E_General.OK;
+}
+
+build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
+ sizes: [HUFFMAN_MAX_BITS+1]int;
+ next_code: [HUFFMAN_MAX_BITS]int;
+
+ k := int(0);
+
+ mem.zero_slice(sizes[:]);
+ mem.zero_slice(z.fast[:]);
+
+ for v, _ in code_lengths {
+ sizes[v] += 1;
+ }
+ sizes[0] = 0;
+
+ for i in 1..16 {
+ if sizes[i] > (1 << uint(i)) {
+ return E_Deflate.Huffman_Bad_Sizes;
+ }
+ }
+ code := int(0);
+
+ for i in 1..<16 {
+ next_code[i] = code;
+ z.firstcode[i] = u16(code);
+ z.firstsymbol[i] = u16(k);
+ code = code + sizes[i];
+ if sizes[i] != 0 {
+ if (code - 1 >= (1 << u16(i))) {
+ return E_Deflate.Huffman_Bad_Code_Lengths;
+ }
+ }
+ z.maxcode[i] = code << (16 - uint(i));
+ code <<= 1;
+ k += int(sizes[i]);
+ }
+
+ z.maxcode[16] = 0x10000; // Sentinel
+ c: int;
+
+ for v, ci in code_lengths {
+ if v != 0 {
+ c = next_code[v] - int(z.firstcode[v]) + int(z.firstsymbol[v]);
+ fastv := u16((u16(v) << 9) | u16(ci));
+ z.size[c] = u8(v);
+ z.value[c] = u16(ci);
+ if (v <= ZFAST_BITS) {
+ j := z_bit_reverse(u16(next_code[v]), v);
+ for j < (1 << ZFAST_BITS) {
+ z.fast[j] = fastv;
+ j += (1 << v);
+ }
+ }
+ next_code[v] += 1;
+ }
+ }
+ return E_General.OK;
+}
+
+decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+
+ r = 0;
+ err = E_General.OK;
+
+ k: int;
+ s: u8;
+
+ code := u16(compress.peek_bits_lsb(z, 16));
+
+ k = int(z_bit_reverse(code, 16));
+
+ #no_bounds_check for s = HUFFMAN_FAST_BITS+1; ; {
+ if k < t.maxcode[s] {
+ break;
+ }
+ s += 1;
+ }
+ if (s >= 16) {
+ return 0, E_Deflate.Bad_Huffman_Code;
+ }
+ // code size is s, so:
+ b := (k >> (16-s)) - int(t.firstcode[s]) + int(t.firstsymbol[s]);
+ if b >= size_of(t.size) {
+ return 0, E_Deflate.Bad_Huffman_Code;
+ }
+ if t.size[b] != s {
+ return 0, E_Deflate.Bad_Huffman_Code;
+ }
+
+ compress.consume_bits_lsb(z, s);
+
+ r = t.value[b];
+ return r, E_General.OK;
+}
+
+decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+
+ if z.num_bits < 16 {
+ if z.num_bits == -100 {
+ return 0, E_ZLIB.Code_Buffer_Malformed;
+ }
+ compress.refill_lsb(z);
+ if z.eof {
+ return 0, E_General.Stream_Too_Short;
+ }
+ }
+ #no_bounds_check b := t.fast[z.code_buffer & ZFAST_MASK];
+ if b != 0 {
+ s := u8(b >> ZFAST_BITS);
+ compress.consume_bits_lsb(z, s);
+ return b & 511, E_General.OK;
+ }
+ return decode_huffman_slowpath(z, t);
+}
+
+parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
+
+ #no_bounds_check for {
+ value, e := decode_huffman(z, z_repeat);
+ if !is_kind(e, E_General.OK) {
+ return err;
+ }
+ if value < 256 {
+ e := write_byte(z, u8(value));
+ if e != .None {
+ return E_General.Output_Too_Short;
+ }
+ } else {
+ if value == 256 {
+ // End of block
+ return E_General.OK;
+ }
+
+ value -= 257;
+ length := Z_LENGTH_BASE[value];
+ if Z_LENGTH_EXTRA[value] > 0 {
+ length += u16(compress.read_bits_lsb(z, Z_LENGTH_EXTRA[value]));
+ }
+
+ value, e = decode_huffman(z, z_offset);
+ if !is_kind(e, E_General.OK) {
+ return E_Deflate.Bad_Huffman_Code;
+ }
+
+ distance := Z_DIST_BASE[value];
+ if Z_DIST_EXTRA[value] > 0 {
+ distance += u16(compress.read_bits_lsb(z, Z_DIST_EXTRA[value]));
+ }
+
+ if z.bytes_written < i64(distance) {
+ // Distance is longer than we've decoded so far.
+ return E_Deflate.Bad_Distance;
+ }
+
+ offset := i64(z.bytes_written - i64(distance));
+ /*
+ These might be sped up with a repl_byte call that copies
+ from the already written output more directly, and that
+ update the Adler checksum once after.
+
+ That way we'd suffer less Stream vtable overhead.
+ */
+ if distance == 1 {
+ /*
+ Replicate the last outputted byte, length times.
+ */
+ if length > 0 {
+ b, e := compress.peek_back_byte(z, offset);
+ if e != .None {
+ return E_General.Output_Too_Short;
+ }
+ #no_bounds_check for _ in 0..<length {
+ write_byte(z, b);
+ }
+ }
+ } else {
+ if length > 0 {
+ #no_bounds_check for _ in 0..<length {
+ b, e := compress.peek_back_byte(z, offset);
+ if e != .None {
+ return E_General.Output_Too_Short;
+ }
+ write_byte(z, b);
+ offset += 1;
+ }
+ }
+ }
+ }
+ }
+}
+
+inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+ /*
+ ctx.input must be an io.Stream backed by an implementation that supports:
+ - read
+ - size
+
+ ctx.output must be an io.Stream backed by an implementation that supports:
+ - write
+
+ raw determines whether the ZLIB header is processed, or we're inflating a raw
+ DEFLATE stream.
+ */
+
+ if !raw {
+ data_size := io.size(ctx.input);
+ if data_size < 6 {
+ return E_General.Stream_Too_Short;
+ }
+
+ cmf, _ := compress.read_u8(ctx);
+
+ method := Compression_Method(cmf & 0xf);
+ if method != .DEFLATE {
+ return E_General.Unknown_Compression_Method;
+ }
+
+ cinfo := (cmf >> 4) & 0xf;
+ if cinfo > 7 {
+ return E_ZLIB.Unsupported_Window_Size;
+ }
+ ctx.window_size = 1 << (cinfo + 8);
+
+ flg, _ := compress.read_u8(ctx);
+
+ fcheck := flg & 0x1f;
+ fcheck_computed := (cmf << 8 | flg) & 0x1f;
+ if fcheck != fcheck_computed {
+ return E_General.Checksum_Failed;
+ }
+
+ fdict := (flg >> 5) & 1;
+ /*
+ We don't handle built-in dictionaries for now.
+ They're application specific and PNG doesn't use them.
+ */
+ if fdict != 0 {
+ return E_ZLIB.FDICT_Unsupported;
+ }
+
+ // flevel := Compression_Level((flg >> 6) & 3);
+ /*
+ Inflate can consume bits belonging to the Adler checksum.
+ We pass the entire stream to Inflate and will unget bytes if we need to
+ at the end to compare checksums.
+ */
+
+ // Seed the Adler32 rolling checksum.
+ ctx.rolling_hash = 1;
+ }
+
+ // Parse ZLIB stream without header.
+ err = inflate_raw(ctx);
+ if !is_kind(err, E_General.OK) {
+ return err;
+ }
+
+ if !raw {
+ compress.discard_to_next_byte_lsb(ctx);
+
+ adler32 := compress.read_bits_lsb(ctx, 8) << 24 | compress.read_bits_lsb(ctx, 8) << 16 | compress.read_bits_lsb(ctx, 8) << 8 | compress.read_bits_lsb(ctx, 8);
+ if ctx.rolling_hash != u32(adler32) {
+ return E_General.Checksum_Failed;
+ }
+ }
+ return E_General.OK;
+}
+
+// @(optimization_mode="speed")
+inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+ final := u32(0);
+ type := u32(0);
+
+ z.num_bits = 0;
+ z.code_buffer = 0;
+
+ z_repeat: ^Huffman_Table;
+ z_offset: ^Huffman_Table;
+ codelength_ht: ^Huffman_Table;
+
+ z_repeat, err = allocate_huffman_table(allocator=context.allocator);
+ if !is_kind(err, E_General.OK) {
+ return err;
+ }
+ z_offset, err = allocate_huffman_table(allocator=context.allocator);
+ if !is_kind(err, E_General.OK) {
+ return err;
+ }
+ codelength_ht, err = allocate_huffman_table(allocator=context.allocator);
+ if !is_kind(err, E_General.OK) {
+ return err;
+ }
+ defer free(z_repeat);
+ defer free(z_offset);
+ defer free(codelength_ht);
+
+ if z.window_size == 0 {
+ z.window_size = DEFLATE_MAX_DISTANCE;
+ }
+
+ // Allocate rolling window buffer.
+ last_b := mem.make_dynamic_array_len_cap([dynamic]u8, z.window_size, z.window_size, allocator);
+ z.last = &last_b;
+ defer delete(last_b);
+
+ for {
+ final = compress.read_bits_lsb(z, 1);
+ type = compress.read_bits_lsb(z, 2);
+
+ // log.debugf("Final: %v | Type: %v\n", final, type);
+
+ if type == 0 {
+ // Uncompressed block
+
+ // Discard bits until next byte boundary
+ compress.discard_to_next_byte_lsb(z);
+
+ uncompressed_len := int(compress.read_bits_lsb(z, 16));
+ length_check := int(compress.read_bits_lsb(z, 16));
+ if uncompressed_len != ~length_check {
+ return E_Deflate.Len_Nlen_Mismatch;
+ }
+
+ /*
+ TODO: Maybe speed this up with a stream-to-stream copy (read_from)
+ and a single Adler32 update after.
+ */
+ #no_bounds_check for uncompressed_len > 0 {
+ compress.refill_lsb(z);
+ lit := compress.read_bits_lsb(z, 8);
+ write_byte(z, u8(lit));
+ uncompressed_len -= 1;
+ }
+ } else if type == 3 {
+ return E_Deflate.BType_3;
+ } else {
+ // log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
+ if type == 1 {
+ // Use fixed code lengths.
+ err = build_huffman(z_repeat, Z_FIXED_LENGTH[:]);
+ if !is_kind(err, E_General.OK) {
+ return err;
+ }
+ err = build_huffman(z_offset, Z_FIXED_DIST[:]);
+ if !is_kind(err, E_General.OK) {
+ return err;
+ }
+ } else {
+ lencodes: [286+32+137]u8;
+ codelength_sizes: [19]u8;
+
+ //i: u32;
+ n: u32;
+
+ compress.refill_lsb(z, 14);
+ hlit := compress.read_bits_no_refill_lsb(z, 5) + 257;
+ hdist := compress.read_bits_no_refill_lsb(z, 5) + 1;
+ hclen := compress.read_bits_no_refill_lsb(z, 4) + 4;
+ ntot := hlit + hdist;
+
+ #no_bounds_check for i in 0..<hclen {
+ s := compress.read_bits_lsb(z, 3);
+ codelength_sizes[Z_LENGTH_DEZIGZAG[i]] = u8(s);
+ }
+ err = build_huffman(codelength_ht, codelength_sizes[:]);
+ if !is_kind(err, E_General.OK) {
+ return err;
+ }
+
+ n = 0;
+ c: u16;
+
+ for n < ntot {
+ c, err = decode_huffman(z, codelength_ht);
+ if !is_kind(err, E_General.OK) {
+ return err;
+ }
+
+ if c < 0 || c >= 19 {
+ return E_Deflate.Huffman_Bad_Code_Lengths;
+ }
+ if c < 16 {
+ lencodes[n] = u8(c);
+ n += 1;
+ } else {
+ fill := u8(0);
+ compress.refill_lsb(z, 7);
+ if c == 16 {
+ c = u16(compress.read_bits_no_refill_lsb(z, 2) + 3);
+ if n == 0 {
+ return E_Deflate.Huffman_Bad_Code_Lengths;
+ }
+ fill = lencodes[n - 1];
+ } else if c == 17 {
+ c = u16(compress.read_bits_no_refill_lsb(z, 3) + 3);
+ } else if c == 18 {
+ c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11);
+ } else {
+ return E_Deflate.Huffman_Bad_Code_Lengths;
+ }
+
+ if ntot - n < u32(c) {
+ return E_Deflate.Huffman_Bad_Code_Lengths;
+ }
+
+ nc := n + u32(c);
+ #no_bounds_check for ; n < nc; n += 1 {
+ lencodes[n] = fill;
+ }
+ }
+ }
+
+ if n != ntot {
+ return E_Deflate.Huffman_Bad_Code_Lengths;
+ }
+
+ err = build_huffman(z_repeat, lencodes[:hlit]);
+ if !is_kind(err, E_General.OK) {
+ return err;
+ }
+
+ err = build_huffman(z_offset, lencodes[hlit:ntot]);
+ if !is_kind(err, E_General.OK) {
+ return err;
+ }
+ }
+ err = parse_huffman_block(z, z_repeat, z_offset);
+ // log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
+ if !is_kind(err, E_General.OK) {
+ return err;
+ }
+ }
+ if final == 1 {
+ break;
+ }
+ }
+ return E_General.OK;
+}
+
+inflate_from_byte_array :: proc(input: ^[]u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) {
+ ctx := Context{};
+
+ r := bytes.Reader{};
+ bytes.reader_init(&r, input^);
+ rs := bytes.reader_to_stream(&r);
+ ctx.input = rs;
+
+ buf := buf;
+ ws := bytes.buffer_to_stream(buf);
+ ctx.output = ws;
+
+ err = inflate_from_stream(&ctx, raw);
+
+ return err;
+}
+
+inflate_from_byte_array_raw :: proc(input: ^[]u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) {
+ return inflate_from_byte_array(input, buf, true);
+}
+
+inflate :: proc{inflate_from_stream, inflate_from_byte_array};
+inflate_raw :: proc{inflate_from_stream_raw, inflate_from_byte_array_raw}; \ No newline at end of file