Add `compress` and `image` to core.

author: Jeroen van Rijn <Kelimion@users.noreply.github.com> 2021-04-30 00:21:52 +0200
committer: Jeroen van Rijn <Kelimion@users.noreply.github.com> 2021-04-30 00:21:52 +0200
commit: 58e023e0cf581db71b4bf3c341b781a2f09ff50a (patch)
tree: cf070b07edb9d8e3e0cb3dd849d26cf8228377c5 /core/compress
parent: 222bab501ce886692f300b02d15b9e7099457406 (diff)
5 files changed, 1231 insertions, 0 deletions
diff --git a/core/compress/common.odin b/core/compress/common.odin
new file mode 100644
index 000000000..99b054903
--- /dev/null
+++ b/core/compress/common.odin
@@ -0,0 +1,203 @@
+package compress
+
+import "core:io"
+import "core:image"
+
+// Error helper, e.g. is_kind(err, General_Error.OK);
+is_kind :: proc(u: $U, x: $V) -> bool {
+    v, ok := u.(V);
+    return ok && v == x;
+}
+
+Error :: union {
+	General_Error,
+	Deflate_Error,
+	ZLIB_Error,
+	GZIP_Error,
+	ZIP_Error,
+	/*
+		This is here because png.load will return a this type of error union,
+		as it may involve an I/O error, a Deflate error, etc.
+	*/
+	image.PNG_Error,
+}
+
+General_Error :: enum {
+	OK = 0,
+	File_Not_Found,
+	Cannot_Open_File,
+	File_Too_Short,
+	Stream_Too_Short,
+	Output_Too_Short,
+	Unknown_Compression_Method,
+	Checksum_Failed,
+	Incompatible_Options,
+	Unimplemented,
+}
+
+GZIP_Error :: enum {
+	Invalid_GZIP_Signature,
+	Reserved_Flag_Set,
+	Invalid_Extra_Data,
+	Original_Name_Too_Long,
+	Comment_Too_Long,
+	Payload_Length_Invalid,
+	Payload_CRC_Invalid,
+}
+
+ZIP_Error :: enum {
+	Invalid_ZIP_File_Signature,
+	Unexpected_Signature,
+	Insert_Next_Disk,
+	Expected_End_of_Central_Directory_Record,
+}
+
+ZLIB_Error :: enum {
+	Unsupported_Window_Size,
+	FDICT_Unsupported,
+	Unsupported_Compression_Level,
+	Code_Buffer_Malformed,
+}
+
+Deflate_Error :: enum {
+	Huffman_Bad_Sizes,
+	Huffman_Bad_Code_Lengths,
+	Inflate_Error,
+	Bad_Distance,
+	Bad_Huffman_Code,
+	Len_Nlen_Mismatch,
+	BType_3,
+}
+
+// General context for ZLIB, LZW, etc.
+Context :: struct {
+	code_buffer: u32,
+	num_bits: i8,
+	/*
+		num_bits will be set to -100 if the buffer is malformed
+	*/
+	eof: b8,
+
+	input: io.Stream,
+	output: io.Stream,
+	bytes_written: i64,
+	// Used to update hash as we write instead of all at once
+	rolling_hash: u32,
+
+	// Sliding window buffer. Size must be a power of two.
+	window_size: i64,
+	last: ^[dynamic]byte,
+}
+
+// Stream helpers
+/*
+	TODO: These need to be optimized.
+
+	Streams should really only check if a certain method is available once, perhaps even during setup.
+
+	Bit and byte readers may be merged so that reading bytes will grab them from the bit buffer first.
+	This simplifies end-of-stream handling where bits may be left in the bit buffer.
+*/
+
+read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+	b := make([]u8, size_of(T), context.temp_allocator);
+	r, e1 := io.to_reader(c.input);
+	_, e2 := io.read(r, b);
+	if !e1 || e2 != .None {
+		return T{}, e2;
+	}
+
+	res = (^T)(raw_data(b))^;
+	return res, .None;
+}
+
+read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
+	return read_data(z, u8);
+}
+
+peek_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+	// Get current position to read from.
+	curr, e1 := c.input->impl_seek(0, .Current);
+	if e1 != .None {
+		return T{}, e1;
+	}
+	r, e2 := io.to_reader_at(c.input);
+	if !e2 {
+		return T{}, .Empty;
+	}
+	b := make([]u8, size_of(T), context.temp_allocator);
+	_, e3 := io.read_at(r, b, curr);
+	if e3 != .None {
+		return T{}, .Empty;
+	}
+
+	res = (^T)(raw_data(b))^;
+	return res, .None;
+}
+
+// Sliding window read back
+peek_back_byte :: proc(c: ^Context, offset: i64) -> (res: u8, err: io.Error) {
+	// Look back into the sliding window.
+	return c.last[offset % c.window_size], .None;
+}
+
+// Generalized bit reader LSB
+refill_lsb :: proc(z: ^Context, width := i8(24)) {
+	for {
+		if z.num_bits > width {
+			break;
+		}
+		if z.code_buffer == 0 && z.num_bits == -1 {
+			z.num_bits = 0;
+		}
+		if z.code_buffer >= 1 << uint(z.num_bits) {
+			// Code buffer is malformed.
+			z.num_bits = -100;
+        	return;
+		}
+		c, err := read_u8(z);
+		if err != .None {
+			// This is fine at the end of the file.
+			z.num_bits = -42;
+			z.eof = true;
+			return;
+		}
+		z.code_buffer |= (u32(c) << u8(z.num_bits));
+		z.num_bits += 8;
+	}
+}
+
+consume_bits_lsb :: #force_inline proc(z: ^Context, width: u8) {
+	z.code_buffer >>= width;
+	z.num_bits -= i8(width);
+}
+
+peek_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	if z.num_bits < i8(width) {
+		refill_lsb(z);
+	}
+	// assert(z.num_bits >= i8(width));
+	return z.code_buffer & ~(~u32(0) << width);
+}
+
+peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	assert(z.num_bits >= i8(width));
+	return z.code_buffer & ~(~u32(0) << width);
+}
+
+read_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	k := peek_bits_lsb(z, width);
+	consume_bits_lsb(z, width);
+	return k;
+}
+
+read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	k := peek_bits_no_refill_lsb(z, width);
+	consume_bits_lsb(z, width);
+	return k;
+}
+
+discard_to_next_byte_lsb :: proc(z: ^Context) {
+	discard := u8(z.num_bits & 7);
+	consume_bits_lsb(z, discard);
+}
+\ No newline at end of file
diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin
new file mode 100644
index 000000000..fa0cba9db
--- /dev/null
+++ b/core/compress/gzip/example.odin
@@ -0,0 +1,70 @@
+//+ignore
+package gzip
+
+import "core:compress/gzip"
+import "core:bytes"
+import "core:os"
+
+// Small GZIP file with fextra, fname and fcomment present.
+@private
+TEST: []u8 = {
+	0x1f, 0x8b, 0x08, 0x1c, 0xcb, 0x3b, 0x3a, 0x5a,
+	0x02, 0x03, 0x07, 0x00, 0x61, 0x62, 0x03, 0x00,
+	0x63, 0x64, 0x65, 0x66, 0x69, 0x6c, 0x65, 0x6e,
+	0x61, 0x6d, 0x65, 0x00, 0x54, 0x68, 0x69, 0x73,
+	0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x6f,
+	0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x00, 0x2b, 0x48,
+	0xac, 0xcc, 0xc9, 0x4f, 0x4c, 0x01, 0x00, 0x15,
+	0x6a, 0x2c, 0x42, 0x07, 0x00, 0x00, 0x00,
+};
+
+main :: proc() {
+	// Set up output buffer.
+	buf: bytes.Buffer;
+	defer bytes.buffer_destroy(&buf);
+
+	stdout :: proc(s: string) {
+		os.write_string(os.stdout, s);
+	}
+	stderr :: proc(s: string) {
+		os.write_string(os.stderr, s);
+	}
+
+	args := os.args;
+
+	if len(args) < 2 {
+		stderr("No input file specified.\n");
+		err := gzip.load(&TEST, &buf);
+		if gzip.is_kind(err, gzip.E_General.OK) {
+			stdout("Displaying test vector: ");
+			stdout(bytes.buffer_to_string(&buf));
+			stdout("\n");
+		}
+	}
+
+	// The rest are all files.
+	args = args[1:];
+	err: gzip.Error;
+
+	for file in args {
+		if file == "-" {
+			// Read from stdin
+			s := os.stream_from_handle(os.stdin);
+			err = gzip.load(&s, &buf);
+		} else {
+			err = gzip.load(file, &buf);
+		}
+		if !gzip.is_kind(err, gzip.E_General.OK) {
+			if gzip.is_kind(err, gzip.E_General.File_Not_Found) {
+				stderr("File not found: ");
+				stderr(file);
+				stderr("\n");
+				os.exit(1);
+			}
+			stderr("GZIP returned an error.\n");
+			os.exit(2);
+		}
+		stdout(bytes.buffer_to_string(&buf));
+	}
+	os.exit(0);
+}
+\ No newline at end of file
diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin
new file mode 100644
index 000000000..3278d7c1d
--- /dev/null
+++ b/core/compress/gzip/gzip.odin
@@ -0,0 +1,314 @@
+package gzip
+
+import "core:compress/zlib"
+import "core:compress"
+import "core:os"
+import "core:io"
+import "core:bytes"
+import "core:hash"
+
+/*
+
+	This package implements support for the GZIP file format v4.3,
+	as specified in RFC 1952.
+
+	It is implemented in such a way that it lends itself naturally
+	to be the input to a complementary TAR implementation.
+
+*/
+
+Magic :: enum u16le {
+	GZIP = 0x8b << 8 | 0x1f,
+}
+
+Header :: struct #packed {
+	magic: Magic,
+	compression_method: Compression,
+	flags: Header_Flags,
+	modification_time: u32le,
+	xfl: Compression_Flags,
+	os: OS,
+}
+#assert(size_of(Header) == 10);
+
+Header_Flag :: enum u8 {
+	// Order is important
+	text       = 0,
+	header_crc = 1,
+	extra      = 2,
+	name       = 3,
+	comment    = 4,
+	reserved_1 = 5,
+	reserved_2 = 6,
+	reserved_3 = 7,
+}
+Header_Flags :: distinct bit_set[Header_Flag; u8];
+
+OS :: enum u8 {
+	FAT = 0,
+	Amiga = 1,
+	VMS = 2,
+	Unix = 3,
+	VM_CMS = 4,
+	Atari_TOS = 5,
+	HPFS = 6,
+	Macintosh = 7,
+	Z_System = 8,
+	CP_M = 9,
+	TOPS_20 = 10,
+	NTFS = 11,
+	QDOS = 12,
+	Acorn_RISCOS = 13,
+	_Unknown = 14,
+	Unknown = 255,
+}
+OS_Name :: #partial [OS]string{
+	.FAT   = "FAT",
+	.Amiga = "Amiga",
+	.VMS   = "VMS/OpenVMS",
+	.Unix = "Unix",
+	.VM_CMS = "VM/CMS",
+	.Atari_TOS = "Atari TOS",
+	.HPFS = "HPFS",
+	.Macintosh = "Macintosh",
+	.Z_System = "Z-System",
+	.CP_M = "CP/M",
+	.TOPS_20 = "TOPS-20",
+	.NTFS = "NTFS",
+	.QDOS = "QDOS",
+	.Acorn_RISCOS = "Acorn RISCOS",
+	.Unknown = "Unknown",
+};
+
+Compression :: enum u8 {
+	DEFLATE = 8,
+}
+
+Compression_Flags :: enum u8 {
+	Maximum_Compression = 2,
+	Fastest_Compression = 4,
+}
+
+Error     :: compress.Error;
+E_General :: compress.General_Error;
+E_GZIP    :: compress.GZIP_Error;
+E_ZLIB    :: compress.ZLIB_Error;
+E_Deflate :: compress.Deflate_Error;
+is_kind   :: compress.is_kind;
+
+load_from_slice :: proc(slice: ^[]u8, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+
+	r := bytes.Reader{};
+	bytes.reader_init(&r, slice^);
+	stream := bytes.reader_to_stream(&r);
+
+	err = load_from_stream(&stream, buf, allocator);
+
+	return err;
+}
+
+load_from_file :: proc(filename: string, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+	data, ok := os.read_entire_file(filename, context.temp_allocator);
+	if ok {
+		err = load_from_slice(&data, buf, allocator);
+		return;
+	} else {
+		return E_General.File_Not_Found;
+	}
+}
+
+load_from_stream :: proc(stream: ^io.Stream, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+
+	ctx := compress.Context{
+		input  = stream^,
+	};
+	buf := buf;
+	ws := bytes.buffer_to_stream(buf);
+	ctx.output = ws;
+
+	header, e := compress.read_data(&ctx, Header);
+	if e != .None {
+		return E_General.File_Too_Short;
+	}
+
+	if header.magic != .GZIP {
+		return E_GZIP.Invalid_GZIP_Signature;
+	}
+	if header.compression_method != .DEFLATE {
+		return E_General.Unknown_Compression_Method;
+	}
+
+	if header.os >= ._Unknown {
+		header.os = .Unknown;
+	}
+
+	if .reserved_1 in header.flags || .reserved_2 in header.flags || .reserved_3 in header.flags {
+		return E_GZIP.Reserved_Flag_Set;
+	}
+
+	// printf("signature: %v\n", header.magic);
+	// printf("compression: %v\n", header.compression_method);
+	// printf("flags: %v\n", header.flags);
+	// printf("modification time: %v\n", time.unix(i64(header.modification_time), 0));
+	// printf("xfl: %v (%v)\n", header.xfl, int(header.xfl));
+	// printf("os: %v\n", OS_Name[header.os]);
+
+	if .extra in header.flags {
+		xlen, e_extra := compress.read_data(&ctx, u16le);
+		if e_extra != .None {
+			return E_General.Stream_Too_Short;
+		}
+		// printf("Extra data present (%v bytes)\n", xlen);
+		if xlen < 4 {
+			// Minimum length is 2 for ID + 2 for a field length, if set to zero.
+			return E_GZIP.Invalid_Extra_Data;
+		}
+
+		field_id:     [2]u8;
+		field_length: u16le;
+		field_error: io.Error;
+
+		for xlen >= 4 {
+			// println("Parsing Extra field(s).");
+			field_id, field_error = compress.read_data(&ctx, [2]u8);
+			if field_error != .None {
+				// printf("Parsing Extra returned: %v\n", field_error);
+				return E_General.Stream_Too_Short;
+			}
+			xlen -= 2;
+
+			field_length, field_error = compress.read_data(&ctx, u16le);
+			if field_error != .None {
+				// printf("Parsing Extra returned: %v\n", field_error);
+				return E_General.Stream_Too_Short;
+			}
+			xlen -= 2;
+
+			if xlen <= 0 {
+				// We're not going to try and recover by scanning for a ZLIB header.
+				// Who knows what else is wrong with this file.
+				return E_GZIP.Invalid_Extra_Data;
+			}
+
+			// printf("    Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
+			if field_length > 0 {
+				field_data := make([]u8, field_length, context.temp_allocator);
+				_, field_error = ctx.input->impl_read(field_data);
+				if field_error != .None {
+					// printf("Parsing Extra returned: %v\n", field_error);
+					return E_General.Stream_Too_Short;
+				}
+				xlen -= field_length;
+
+				// printf("%v\n", string(field_data));
+	 		}
+
+			if xlen != 0 {
+				return E_GZIP.Invalid_Extra_Data;
+			}
+		}
+	}
+
+	if .name in header.flags {
+		// Should be enough.
+		name: [1024]u8;
+		b: [1]u8;
+		i := 0;
+		name_error: io.Error;
+
+		for i < len(name) {
+			_, name_error = ctx.input->impl_read(b[:]);
+			if name_error != .None {
+				return E_General.Stream_Too_Short;
+			}
+			if b == 0 {
+				break;
+			}
+			name[i] = b[0];
+			i += 1;
+			if i >= len(name) {
+				return E_GZIP.Original_Name_Too_Long;
+			}
+		}
+		// printf("Original filename: %v\n", string(name[:i]));
+	}
+
+	if .comment in header.flags {
+		// Should be enough.
+		comment: [1024]u8;
+		b: [1]u8;
+		i := 0;
+		comment_error: io.Error;
+
+		for i < len(comment) {
+			_, comment_error = ctx.input->impl_read(b[:]);
+			if comment_error != .None {
+				return E_General.Stream_Too_Short;
+			}
+			if b == 0 {
+				break;
+			}
+			comment[i] = b[0];
+			i += 1;
+			if i >= len(comment) {
+				return E_GZIP.Comment_Too_Long;
+			}
+		}
+		// printf("Comment: %v\n", string(comment[:i]));
+	}
+
+	if .header_crc in header.flags {
+		crc16: [2]u8;
+		crc_error: io.Error;
+		_, crc_error = ctx.input->impl_read(crc16[:]);
+		if crc_error != .None {
+			return E_General.Stream_Too_Short;
+		}
+		/*
+			We don't actually check the CRC16 (lower 2 bytes of CRC32 of header data until the CRC field).
+			If we find a gzip file in the wild that sets this field, we can add proper support for it.
+		*/
+	}
+
+	/*
+		We should have arrived at the ZLIB payload.
+	*/
+
+	zlib_error := zlib.inflate_raw(&ctx);
+
+	// fmt.printf("ZLIB returned: %v\n", zlib_error);
+
+	if !is_kind(zlib_error, E_General.OK) || zlib_error == nil {
+		return zlib_error;
+	}
+
+	/*
+		Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
+	*/
+	compress.discard_to_next_byte_lsb(&ctx);
+
+	payload_crc_b: [4]u8;
+	payload_len_b: [4]u8;
+	for i in 0..3 {
+		payload_crc_b[i] = u8(compress.read_bits_lsb(&ctx, 8));
+	}
+	payload_crc := transmute(u32le)payload_crc_b;
+	for i in 0..3 {
+		payload_len_b[i] = u8(compress.read_bits_lsb(&ctx, 8));
+	}
+	payload_len := int(transmute(u32le)payload_len_b);
+
+	payload := bytes.buffer_to_bytes(buf);
+	crc32 := u32le(hash.crc32(payload));
+
+	if crc32 != payload_crc {
+		return E_GZIP.Payload_CRC_Invalid;
+	}
+
+	if len(payload) != payload_len {
+		return E_GZIP.Payload_Length_Invalid;
+	}
+	return E_General.OK;
+}
+
+load :: proc{load_from_file, load_from_slice, load_from_stream};
+\ No newline at end of file
diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin
new file mode 100644
index 000000000..a7fa76d62
--- /dev/null
+++ b/core/compress/zlib/example.odin
@@ -0,0 +1,42 @@
+//+ignore
+package zlib
+
+import "core:compress/zlib"
+import "core:bytes"
+import "core:fmt"
+
+main :: proc() {
+
+	ODIN_DEMO: []u8 = {
+		120, 156, 101, 144,  77, 110, 131,  48,  16, 133, 215, 204,  41, 158,  44,
+ 		 69,  73,  32, 148, 182,  75,  35,  14, 208, 125,  47,  96, 185, 195, 143,
+		130,  13,  50,  38,  81,  84, 101, 213,  75, 116, 215,  43, 246,   8,  53,
+ 		 82, 126,   8, 181, 188, 152, 153, 111, 222, 147, 159, 123, 165, 247, 170,
+ 		 98,  24, 213,  88, 162, 198, 244, 157, 243,  16, 186, 115,  44,  75, 227,
+  		  5,  77, 115,  72, 137, 222, 117, 122, 179, 197,  39,  69, 161, 170, 156,
+ 		 50, 144,   5,  68, 130,   4,  49, 126, 127, 190, 191, 144,  34,  19,  57,
+ 		 69,  74, 235, 209, 140, 173, 242, 157, 155,  54, 158, 115, 162, 168,  12,
+		181, 239, 246, 108,  17, 188, 174, 242, 224,  20,  13, 199, 198, 235, 250,
+		194, 166, 129,  86,   3,  99, 157, 172,  37, 230,  62,  73, 129, 151, 252,
+		 70, 211,   5,  77,  31, 104, 188, 160, 113, 129, 215,  59, 205,  22,  52,
+		123, 160,  83, 142, 255, 242,  89, 123,  93, 149, 200,  50, 188,  85,  54,
+		252,  18, 248, 192, 238, 228, 235, 198,  86, 224, 118, 224, 176, 113, 166,
+		112,  67, 106, 227, 159, 122, 215,  88,  95, 110, 196, 123, 205, 183, 224,
+ 		 98,  53,   8, 104, 213, 234, 201, 147,   7, 248, 192,  14, 170,  29,  25,
+		171,  15,  18,  59, 138, 112,  63,  23, 205, 110, 254, 136, 109,  78, 231,
+ 		 63, 234, 138, 133, 204,
+	};
+
+	buf: bytes.Buffer;
+
+	// We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one.
+	err := zlib.inflate(&ODIN_DEMO, &buf);
+	defer bytes.buffer_destroy(&buf);
+
+	if !zlib.is_kind(err, zlib.E_General.OK) {
+		fmt.printf("\nError: %v\n", err);
+	}
+	s := bytes.buffer_to_string(&buf);
+	fmt.printf("Input: %v bytes, output (%v bytes):\n%v\n", len(ODIN_DEMO), len(s), s);
+	assert(len(s) == 438);
+}
+\ No newline at end of file
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
new file mode 100644
index 000000000..34a7984a7
--- /dev/null
+++ b/core/compress/zlib/zlib.odin
@@ -0,0 +1,602 @@
+package zlib
+
+import "core:compress"
+
+import "core:mem"
+import "core:io"
+import "core:bytes"
+import "core:hash"
+/*
+	zlib.inflate decompresses a ZLIB stream passed in as a []u8 or io.Stream.
+	Returns: Error. You can use zlib.is_kind or compress.is_kind to easily test for OK.
+*/
+
+Context :: compress.Context;
+
+Compression_Method :: enum u8 {
+	DEFLATE  = 8,
+	Reserved = 15,
+}
+
+Compression_Level :: enum u8 {
+	Fastest = 0,
+	Fast    = 1,
+    Default = 2,
+    Maximum = 3,
+}
+
+Options :: struct {
+	window_size: u16,
+	level: u8,
+}
+
+Error     :: compress.Error;
+E_General :: compress.General_Error;
+E_ZLIB    :: compress.ZLIB_Error;
+E_Deflate :: compress.Deflate_Error;
+is_kind   :: compress.is_kind;
+
+DEFLATE_MAX_CHUNK_SIZE   :: 65535;
+DEFLATE_MAX_LITERAL_SIZE :: 65535;
+DEFLATE_MAX_DISTANCE     :: 32768;
+DEFLATE_MAX_LENGTH       :: 258;
+
+HUFFMAN_MAX_BITS  :: 16;
+HUFFMAN_FAST_BITS :: 9;
+HUFFMAN_FAST_MASK :: ((1 << HUFFMAN_FAST_BITS) - 1);
+
+Z_LENGTH_BASE := [31]u16{
+	3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,
+	67,83,99,115,131,163,195,227,258,0,0,
+};
+
+Z_LENGTH_EXTRA := [31]u8{
+	0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0,
+};
+
+Z_DIST_BASE := [32]u16{
+	1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
+	257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0,
+};
+
+Z_DIST_EXTRA := [32]u8{
+	0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,0,0,
+};
+
+Z_LENGTH_DEZIGZAG := []u8{
+	16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
+};
+
+Z_FIXED_LENGTH := [288]u8{
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,
+};
+
+Z_FIXED_DIST := [32]u8{
+   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+};
+
+/*
+	Accelerate all cases in default tables.
+*/
+ZFAST_BITS :: 9;
+ZFAST_MASK :: ((1 << ZFAST_BITS) - 1);
+
+/*
+	ZLIB-style Huffman encoding.
+	JPEG packs from left, ZLIB from right. We can't share code.
+*/
+Huffman_Table :: struct {
+   fast: [1 << ZFAST_BITS]u16,
+   firstcode: [16]u16,
+   maxcode: [17]int,
+   firstsymbol: [16]u16,
+   size: [288]u8,
+   value: [288]u16,
+};
+
+// Implementation starts here
+
+z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
+	assert(bits <= 16);
+	// NOTE: Can optimize with llvm.bitreverse.i64 or some bit twiddling
+	// by reversing all of the bits and masking out the unneeded ones.
+	r = n;
+	r = ((r & 0xAAAA) >>  1) | ((r & 0x5555) << 1);
+	r = ((r & 0xCCCC) >>  2) | ((r & 0x3333) << 2);
+	r = ((r & 0xF0F0) >>  4) | ((r & 0x0F0F) << 4);
+	r = ((r & 0xFF00) >>  8) | ((r & 0x00FF) << 8);
+
+	r >>= (16 - bits);
+	return;
+}
+
+write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
+	c := c;
+	buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
+	z.rolling_hash = hash.adler32(buf, z.rolling_hash);
+
+	_, e := z.output->impl_write(buf);
+	if e != .None {
+		return e;
+	}
+	z.last[z.bytes_written % z.window_size] = c;
+
+	z.bytes_written += 1;
+	return .None;
+}
+
+allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_Table, err: Error) {
+
+	z = new(Huffman_Table, allocator);
+	return z, E_General.OK;
+}
+
+build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
+	sizes:     [HUFFMAN_MAX_BITS+1]int;
+	next_code: [HUFFMAN_MAX_BITS]int;
+
+	k := int(0);
+
+	mem.zero_slice(sizes[:]);
+	mem.zero_slice(z.fast[:]);
+
+	for v, _ in code_lengths {
+		sizes[v] += 1;
+	}
+	sizes[0] = 0;
+
+	for i in 1..16 {
+		if sizes[i] > (1 << uint(i)) {
+			return E_Deflate.Huffman_Bad_Sizes;
+		}
+	}
+	code := int(0);
+
+	for i in 1..<16 {
+		next_code[i]     = code;
+		z.firstcode[i]   = u16(code);
+		z.firstsymbol[i] = u16(k);
+		code = code + sizes[i];
+		if sizes[i] != 0 {
+			if (code - 1 >= (1 << u16(i))) {
+				return E_Deflate.Huffman_Bad_Code_Lengths;
+			}
+		}
+		z.maxcode[i] = code << (16 - uint(i));
+		code <<= 1;
+		k += int(sizes[i]);
+	}
+
+	z.maxcode[16] = 0x10000; // Sentinel
+	c: int;
+
+	for v, ci in code_lengths {
+		if v != 0 {
+			c = next_code[v] - int(z.firstcode[v]) + int(z.firstsymbol[v]);
+			fastv := u16((u16(v) << 9) | u16(ci));
+			z.size[c]  = u8(v);
+			z.value[c] = u16(ci);
+			if (v <= ZFAST_BITS) {
+				j := z_bit_reverse(u16(next_code[v]), v);
+				for j < (1 << ZFAST_BITS) {
+					z.fast[j] = fastv;
+					j += (1 << v);
+				}
+			}
+			next_code[v] += 1;
+		}
+	}
+	return E_General.OK;
+}
+
+decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+
+	r   = 0;
+	err = E_General.OK;
+
+	k: int;
+	s: u8;
+
+	code := u16(compress.peek_bits_lsb(z, 16));
+
+	k = int(z_bit_reverse(code, 16));
+
+	#no_bounds_check for s = HUFFMAN_FAST_BITS+1; ; {
+		if k < t.maxcode[s] {
+			break;
+		}
+		s += 1;
+	}
+	if (s >= 16) {
+		return 0, E_Deflate.Bad_Huffman_Code;
+	}
+   	// code size is s, so:
+   	b := (k >> (16-s)) - int(t.firstcode[s]) + int(t.firstsymbol[s]);
+   	if b >= size_of(t.size) {
+   		return 0, E_Deflate.Bad_Huffman_Code;	
+   	}
+   	if t.size[b] != s {
+   		return 0, E_Deflate.Bad_Huffman_Code;
+   	}
+
+   	compress.consume_bits_lsb(z, s);
+
+   	r = t.value[b];
+   	return r, E_General.OK;
+}
+
+decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+
+	if z.num_bits < 16 {
+		if z.num_bits == -100 {
+			return 0, E_ZLIB.Code_Buffer_Malformed;
+		}
+		compress.refill_lsb(z);
+		if z.eof {
+			return 0, E_General.Stream_Too_Short;
+		}
+	}
+	#no_bounds_check b := t.fast[z.code_buffer & ZFAST_MASK];
+	if b != 0 {
+		s := u8(b >> ZFAST_BITS);
+		compress.consume_bits_lsb(z, s);
+		return b & 511, E_General.OK;
+	}
+	return decode_huffman_slowpath(z, t);
+}
+
+parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
+
+	#no_bounds_check for {
+		value, e := decode_huffman(z, z_repeat);
+		if !is_kind(e, E_General.OK) {
+			return err;
+		}
+		if value < 256 {
+			e := write_byte(z, u8(value));
+			if e != .None {
+				return E_General.Output_Too_Short;
+			}
+		} else {
+			if value == 256 {
+         		// End of block
+         		return E_General.OK;
+			}
+
+			value -= 257;
+			length := Z_LENGTH_BASE[value];
+			if Z_LENGTH_EXTRA[value] > 0 {
+				length += u16(compress.read_bits_lsb(z, Z_LENGTH_EXTRA[value]));
+			}
+
+			value, e = decode_huffman(z, z_offset);
+			if !is_kind(e, E_General.OK) {
+				return E_Deflate.Bad_Huffman_Code;
+			}
+
+			distance := Z_DIST_BASE[value];
+			if Z_DIST_EXTRA[value] > 0 {
+				distance += u16(compress.read_bits_lsb(z, Z_DIST_EXTRA[value]));
+			}
+
+			if z.bytes_written < i64(distance) {
+				// Distance is longer than we've decoded so far.
+				return E_Deflate.Bad_Distance;
+			}
+
+			offset := i64(z.bytes_written - i64(distance));
+			/*
+				These might be sped up with a repl_byte call that copies
+				from the already written output more directly, and that
+				update the Adler checksum once after.
+
+				That way we'd suffer less Stream vtable overhead.
+			*/
+			if distance == 1 {
+				/*
+					Replicate the last outputted byte, length times.
+				*/
+				if length > 0 {
+					b, e := compress.peek_back_byte(z, offset);
+					if e != .None {
+						return E_General.Output_Too_Short;
+					}
+					#no_bounds_check for _ in 0..<length {
+						write_byte(z, b);
+					}
+				}
+			} else {
+				if length > 0 {
+					#no_bounds_check for _ in 0..<length {
+						b, e := compress.peek_back_byte(z, offset);
+						if e != .None {
+							return E_General.Output_Too_Short;
+						}
+						write_byte(z, b);
+						offset += 1;
+					}
+				}
+			}
+		}
+	}
+}
+
+inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+	/*
+		ctx.input must be an io.Stream backed by an implementation that supports:
+		- read
+		- size
+
+		ctx.output must be an io.Stream backed by an implementation that supports:
+		- write
+
+		raw determines whether the ZLIB header is processed, or we're inflating a raw
+		DEFLATE stream.
+	*/
+
+	if !raw {
+		data_size := io.size(ctx.input);
+		if data_size < 6 {
+			return E_General.Stream_Too_Short;
+		}
+
+		cmf, _ := compress.read_u8(ctx);
+
+		method := Compression_Method(cmf & 0xf);
+		if method != .DEFLATE {
+			return E_General.Unknown_Compression_Method;
+		}
+
+		cinfo  := (cmf >> 4) & 0xf;
+		if cinfo > 7 {
+			return E_ZLIB.Unsupported_Window_Size;
+		}
+		ctx.window_size = 1 << (cinfo + 8);
+
+		flg, _ := compress.read_u8(ctx);
+
+		fcheck  := flg & 0x1f;
+		fcheck_computed := (cmf << 8 | flg) & 0x1f;
+		if fcheck != fcheck_computed {
+			return E_General.Checksum_Failed;
+		}
+
+		fdict   := (flg >> 5) & 1;
+		/* 
+			We don't handle built-in dictionaries for now.
+			They're application specific and PNG doesn't use them.
+		*/
+		if fdict != 0 {
+			return E_ZLIB.FDICT_Unsupported;
+		}
+
+		// flevel  := Compression_Level((flg >> 6) & 3);
+		/*
+			Inflate can consume bits belonging to the Adler checksum.
+			We pass the entire stream to Inflate and will unget bytes if we need to
+			at the end to compare checksums.
+		*/
+
+		// Seed the Adler32 rolling checksum.
+		ctx.rolling_hash = 1;
+	}
+
+ 	// Parse ZLIB stream without header.
+	err = inflate_raw(ctx);
+	if !is_kind(err, E_General.OK) {
+		return err;
+	}
+
+	if !raw {
+		compress.discard_to_next_byte_lsb(ctx);
+
+		adler32 := compress.read_bits_lsb(ctx, 8) << 24 | compress.read_bits_lsb(ctx, 8) << 16 | compress.read_bits_lsb(ctx, 8) << 8 | compress.read_bits_lsb(ctx, 8);
+		if ctx.rolling_hash != u32(adler32) {
+			return E_General.Checksum_Failed;
+		}
+	}
+	return E_General.OK;
+}
+
+// @(optimization_mode="speed")
+inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+	final := u32(0);
+	type := u32(0);
+
+	z.num_bits = 0;
+	z.code_buffer = 0;
+
+	z_repeat:      ^Huffman_Table;
+	z_offset:      ^Huffman_Table;
+	codelength_ht: ^Huffman_Table;
+
+	z_repeat, err = allocate_huffman_table(allocator=context.allocator);
+	if !is_kind(err, E_General.OK) {
+		return err;
+	}
+	z_offset, err = allocate_huffman_table(allocator=context.allocator);
+	if !is_kind(err, E_General.OK) {
+		return err;
+	}
+	codelength_ht, err = allocate_huffman_table(allocator=context.allocator);
+	if !is_kind(err, E_General.OK) {
+		return err;
+	}
+	defer free(z_repeat);
+	defer free(z_offset);
+	defer free(codelength_ht);
+
+	if z.window_size == 0 {
+		z.window_size = DEFLATE_MAX_DISTANCE;
+	}
+
+	// Allocate rolling window buffer.
+	last_b := mem.make_dynamic_array_len_cap([dynamic]u8, z.window_size, z.window_size, allocator);
+	z.last = &last_b;
+	defer delete(last_b);
+
+	for {
+		final = compress.read_bits_lsb(z, 1);
+		type  = compress.read_bits_lsb(z, 2);
+
+		// log.debugf("Final: %v | Type: %v\n", final, type);
+
+		if type == 0 {
+			// Uncompressed block
+
+			// Discard bits until next byte boundary
+			compress.discard_to_next_byte_lsb(z);
+
+			uncompressed_len  := int(compress.read_bits_lsb(z, 16));
+			length_check      := int(compress.read_bits_lsb(z, 16));
+			if uncompressed_len != ~length_check {
+				return E_Deflate.Len_Nlen_Mismatch;
+			}
+
+			/*
+				TODO: Maybe speed this up with a stream-to-stream copy (read_from)
+				and a single Adler32 update after.
+			*/
+			#no_bounds_check for uncompressed_len > 0 {
+				compress.refill_lsb(z);
+				lit := compress.read_bits_lsb(z, 8);
+				write_byte(z, u8(lit));
+				uncompressed_len -= 1;
+			}
+		} else if type == 3 {
+			return E_Deflate.BType_3;
+		} else {
+			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
+			if type == 1 {
+				// Use fixed code lengths.
+				err = build_huffman(z_repeat, Z_FIXED_LENGTH[:]);
+				if !is_kind(err, E_General.OK) {
+					return err;
+				}
+				err = build_huffman(z_offset, Z_FIXED_DIST[:]);
+				if !is_kind(err, E_General.OK) {
+					return err;
+				}
+			} else {
+				lencodes: [286+32+137]u8;
+			   	codelength_sizes: [19]u8;
+
+			   	//i: u32;
+			   	n: u32;
+
+			   	compress.refill_lsb(z, 14);
+				hlit  := compress.read_bits_no_refill_lsb(z, 5) + 257;
+				hdist := compress.read_bits_no_refill_lsb(z, 5) + 1;
+				hclen := compress.read_bits_no_refill_lsb(z, 4) + 4;
+				ntot  := hlit + hdist;
+
+				#no_bounds_check for i in 0..<hclen {
+					s := compress.read_bits_lsb(z, 3);
+					codelength_sizes[Z_LENGTH_DEZIGZAG[i]] = u8(s);
+				}
+				err = build_huffman(codelength_ht, codelength_sizes[:]);
+				if !is_kind(err, E_General.OK) {
+					return err;
+				}
+
+				n = 0;
+				c: u16;
+
+				for n < ntot {
+					c, err = decode_huffman(z, codelength_ht);
+					if !is_kind(err, E_General.OK) {
+						return err;
+					}
+
+					if c < 0 || c >= 19 {
+						return E_Deflate.Huffman_Bad_Code_Lengths;
+					}
+					if c < 16 {
+						lencodes[n] = u8(c);
+						n += 1;
+					} else {
+						fill := u8(0);
+						compress.refill_lsb(z, 7);
+						if c == 16 {
+							c = u16(compress.read_bits_no_refill_lsb(z, 2) + 3);
+			            	if n == 0 {
+			            		return E_Deflate.Huffman_Bad_Code_Lengths;
+			            	}
+			            	fill = lencodes[n - 1];
+						} else if c == 17 {
+			            	c = u16(compress.read_bits_no_refill_lsb(z, 3) + 3);
+			        	} else if c == 18 {
+			            	c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11);
+			        	} else {
+			            	return E_Deflate.Huffman_Bad_Code_Lengths;
+			            }
+
+						if ntot - n < u32(c) {
+				        	return E_Deflate.Huffman_Bad_Code_Lengths;
+				        }
+
+				        nc := n + u32(c);
+				        #no_bounds_check for ; n < nc; n += 1 {
+				        	lencodes[n] = fill;
+				        }
+					}
+				}
+
+				if n != ntot {
+			   		return E_Deflate.Huffman_Bad_Code_Lengths;
+			   	}
+
+				err = build_huffman(z_repeat, lencodes[:hlit]);
+				if !is_kind(err, E_General.OK) {
+					return err;
+				}
+
+				err = build_huffman(z_offset, lencodes[hlit:ntot]);
+				if !is_kind(err, E_General.OK) {
+					return err;
+				}
+			}
+			err = parse_huffman_block(z, z_repeat, z_offset);
+			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
+			if !is_kind(err, E_General.OK) {
+				return err;
+			}
+		}
+		if final == 1 {
+			break;
+		}
+	}
+	return E_General.OK;
+}
+
+inflate_from_byte_array :: proc(input: ^[]u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) {
+	ctx := Context{};
+
+	r := bytes.Reader{};
+	bytes.reader_init(&r, input^);
+	rs := bytes.reader_to_stream(&r);
+	ctx.input = rs;
+
+	buf := buf;
+	ws := bytes.buffer_to_stream(buf);
+	ctx.output = ws;
+
+	err = inflate_from_stream(&ctx, raw);
+
+	return err;
+}
+
+inflate_from_byte_array_raw :: proc(input: ^[]u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) {
+	return inflate_from_byte_array(input, buf, true);
+}
+
+inflate     :: proc{inflate_from_stream, inflate_from_byte_array};
+inflate_raw :: proc{inflate_from_stream_raw, inflate_from_byte_array_raw};
+\ No newline at end of file
author	Jeroen van Rijn <Kelimion@users.noreply.github.com>	2021-04-30 00:21:52 +0200
committer	Jeroen van Rijn <Kelimion@users.noreply.github.com>	2021-04-30 00:21:52 +0200
commit	58e023e0cf581db71b4bf3c341b781a2f09ff50a (patch)
tree	cf070b07edb9d8e3e0cb3dd849d26cf8228377c5 /core/compress
parent	222bab501ce886692f300b02d15b9e7099457406 (diff)