Merge pull request #1046 from Kelimion/zlib_optimize

ZLIB: Optimize
author: Jeroen van Rijn <Kelimion@users.noreply.github.com> 2021-06-27 16:54:15 +0200
committer: GitHub <noreply@github.com> 2021-06-27 16:54:15 +0200
commit: 095605b7db41f85bbd0ca566459cb095eb32b45e (patch)
tree: 3dfeedfaa2cf71e340f4d0a2e46d8fb3e257a4bd
parent: 76d3bab955d33abb6d4cab0b95beedd6393c96da (diff)
parent: 6836b501afdedb8fec583400ee86d379938434bc (diff)
7 files changed, 552 insertions, 332 deletions
diff --git a/core/compress/common.odin b/core/compress/common.odin
index df798e751..ca63168a9 100644
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -10,8 +10,40 @@ package compress
 
 import "core:io"
 import "core:image"
+import "core:bytes"
+
+/*
+	These settings bound how much compression algorithms will allocate for their output buffer.
+	If streaming their output, these are unnecessary and will be ignored.
+
+*/
+
+/*
+	When a decompression routine doesn't stream its output, but writes to a buffer,
+	we pre-allocate an output buffer to speed up decompression. The default is 1 MiB.
+*/
+COMPRESS_OUTPUT_ALLOCATE_MIN :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MIN, 1 << 20));
+
+/*
+	This bounds the maximum a buffer will resize to as needed, or the maximum we'll
+	pre-allocate if you inform the decompression routine you know the payload size.
+
+	For reference, the largest payload size of a GZIP file is 4 GiB.
+
+*/
+when size_of(uintptr) == 8 {
+	/*
+		For 64-bit platforms, we set the default max buffer size to 4 GiB,
+		which is GZIP and PKZIP's max payload size.
+	*/	
+	COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 32));
+} else {
+	/*
+		For 32-bit platforms, we set the default max buffer size to 512 MiB.
+	*/
+	COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 29));
+}
 
-// when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" }
 
 Error :: union {
 	General_Error,
@@ -36,6 +68,13 @@ General_Error :: enum {
 	Checksum_Failed,
 	Incompatible_Options,
 	Unimplemented,
+
+
+	/*
+		Memory errors
+	*/
+	Allocation_Failed,
+	Resize_Failed,
 }
 
 GZIP_Error :: enum {
@@ -46,6 +85,20 @@ GZIP_Error :: enum {
 	Comment_Too_Long,
 	Payload_Length_Invalid,
 	Payload_CRC_Invalid,
+
+	/*
+		GZIP's payload can be a maximum of max(u32le), or 4 GiB.
+		If you tell it you expect it to contain more, that's obviously an error.
+	*/
+	Payload_Size_Exceeds_Max_Payload,
+	/*
+		For buffered instead of streamed output, the payload size can't exceed
+		the max set by the `COMPRESS_OUTPUT_ALLOCATE_MAX` switch in compress/common.odin.
+
+		You can tweak this setting using `-define:COMPRESS_OUTPUT_ALLOCATE_MAX=size_in_bytes`
+	*/
+	Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX,
+
 }
 
 ZIP_Error :: enum {
@@ -74,67 +127,69 @@ Deflate_Error :: enum {
 
 
 // General I/O context for ZLIB, LZW, etc.
-Context :: struct #packed {
-	input:             io.Stream,
+Context_Memory_Input :: struct #packed {
 	input_data:        []u8,
-
-	output:            io.Stream,
-	output_buf:        [dynamic]u8,
+	output:            ^bytes.Buffer,
 	bytes_written:     i64,
 
-	/*
-		If we know the data size, we can optimize the reads and writes.
-	*/    
-	size_packed:   i64,
-	size_unpacked: i64,
+	code_buffer:       u64,
+	num_bits:          u64,
 
 	/*
-		Used to update hash as we write instead of all at once.
+		If we know the data size, we can optimize the reads and writes.
 	*/
-	rolling_hash:  u32,
+	size_packed:       i64,
+	size_unpacked:     i64,
+}
+#assert(size_of(Context_Memory_Input) == 64);
+
+Context_Stream_Input :: struct #packed {
+	input_data:        []u8,
+	input:             io.Stream,
+	output:            ^bytes.Buffer,
+	bytes_written:     i64,
+
+	code_buffer:       u64,
+	num_bits:          u64,
+
 	/*
-		Reserved
+		If we know the data size, we can optimize the reads and writes.
 	*/
-	reserved:      [2]u32,
+	size_packed:       i64,
+	size_unpacked:     i64,
+
 	/*
 		Flags:
-			`input_fully_in_memory` tells us whether we're EOF when `input_data` is empty.
-			`input_refills_from_stream` tells us we can then possibly refill from the stream.
+			`input_fully_in_memory`
+				true  = This tells us we read input from `input_data` exclusively. [] = EOF.
+				false = Try to refill `input_data` from the `input` stream.
 	*/
 	input_fully_in_memory: b8,
-	input_refills_from_stream: b8,
-	reserved_flags: [2]b8,
-}
-#assert(size_of(Context) == 128);
 
-/*
-	Compression algorithm context
-*/
-Code_Buffer :: struct #packed {
-	code_buffer: u64,
-	num_bits:    u64,
-	/*
-		Sliding window buffer. Size must be a power of two.
-	*/
-	window_mask: i64,
-	last:        [dynamic]u8,
+	padding: [1]u8,
 }
-#assert(size_of(Code_Buffer) == 64);
 
-// Stream helpers
 /*
-	TODO: These need to be optimized.
-
-	Streams should really only check if a certain method is available once, perhaps even during setup.
+	TODO: The stream versions should really only check if a certain method is available once, perhaps even during setup.
 
 	Bit and byte readers may be merged so that reading bytes will grab them from the bit buffer first.
 	This simplifies end-of-stream handling where bits may be left in the bit buffer.
 */
 
-@(optimization_mode="speed")
-read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Slice"); }
+// TODO: Make these return compress.Error errors.
+
+input_size_from_memory :: proc(z: ^Context_Memory_Input) -> (res: i64, err: Error) {
+	return i64(len(z.input_data)), nil;
+}
 
+input_size_from_stream :: proc(z: ^Context_Stream_Input) -> (res: i64, err: Error) {
+	return io.size(z.input), nil;
+}
+
+input_size :: proc{input_size_from_memory, input_size_from_stream};
+
+@(optimization_mode="speed")
+read_slice_from_memory :: #force_inline proc(z: ^Context_Memory_Input, size: int) -> (res: []u8, err: io.Error) {
 	#no_bounds_check {
 		if len(z.input_data) >= size {
 			res = z.input_data[:size];
@@ -143,17 +198,15 @@ read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.
 		}
 	}
 
-	if z.input_fully_in_memory {
-		if len(z.input_data) == 0 {
-			return []u8{}, .EOF;
-		} else {
-			return []u8{}, .Short_Buffer;
-		}
+	if len(z.input_data) == 0 {
+		return []u8{}, .EOF;
+	} else {
+		return []u8{}, .Short_Buffer;
 	}
+}
 
-	/*
-		TODO: Try to refill z.input_data from stream, using packed_data as a guide.
-	*/
+@(optimization_mode="speed")
+read_slice_from_stream :: #force_inline proc(z: ^Context_Stream_Input, size: int) -> (res: []u8, err: io.Error) {
 	b := make([]u8, size, context.temp_allocator);
 	_, e := z.input->impl_read(b[:]);
 	if e == .None {
@@ -163,10 +216,10 @@ read_slice :: #force_inline proc(z: ^Context, size: int) -> (res: []u8, err: io.
 	return []u8{}, e;
 }
 
-@(optimization_mode="speed")
-read_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Data"); }
+read_slice :: proc{read_slice_from_memory, read_slice_from_stream};
 
+@(optimization_mode="speed")
+read_data :: #force_inline proc(z: ^$C, $T: typeid) -> (res: T, err: io.Error) {
 	b, e := read_slice(z, size_of(T));
 	if e == .None {
 		return (^T)(&b[0])^, .None;
@@ -176,9 +229,7 @@ read_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 }
 
 @(optimization_mode="speed")
-read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read u8"); }
-
+read_u8_from_memory :: #force_inline proc(z: ^Context_Memory_Input) -> (res: u8, err: io.Error) {
 	#no_bounds_check {
 		if len(z.input_data) >= 1 {
 			res = z.input_data[0];
@@ -186,8 +237,12 @@ read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
 			return res, .None;
 		}
 	}
+	return 0, .EOF;
+}
 
-	b, e := read_slice(z, 1);
+@(optimization_mode="speed")
+read_u8_from_stream :: #force_inline proc(z: ^Context_Stream_Input) -> (res: u8, err: io.Error) {
+	b, e := read_slice_from_stream(z, 1);
 	if e == .None {
 		return b[0], .None;
 	}
@@ -195,10 +250,29 @@ read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
 	return 0, e;
 }
 
+read_u8 :: proc{read_u8_from_memory, read_u8_from_stream};
+
+/*
+	You would typically only use this at the end of Inflate, to drain bits from the code buffer
+	preferentially.
+*/
 @(optimization_mode="speed")
-peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Peek Data"); }
+read_u8_prefer_code_buffer_lsb :: #force_inline proc(z: ^$C) -> (res: u8, err: io.Error) {
+	if z.num_bits >= 8 {
+		res = u8(read_bits_no_refill_lsb(z, 8));
+	} else {
+		size, _ := input_size(z);
+		if size > 0 {
+			res, err = read_u8(z);
+		} else {
+			err = .EOF;
+		}
+	}
+	return;
+}
 
+@(optimization_mode="speed")
+peek_data_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid) -> (res: T, err: io.Error) {
 	size :: size_of(T);
 
 	#no_bounds_check {
@@ -208,13 +282,16 @@ peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 		}
 	}
 
-	if z.input_fully_in_memory {
-		if len(z.input_data) < size {
-			return T{}, .EOF;
-		} else {
-			return T{}, .Short_Buffer;
-		}
+	if len(z.input_data) == 0 {
+		return T{}, .EOF;
+	} else {
+		return T{}, .Short_Buffer;
 	}
+}
+
+@(optimization_mode="speed")
+peek_data_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid) -> (res: T, err: io.Error) {
+	size :: size_of(T);
 
 	// Get current position to read from.
 	curr, e1 := z.input->impl_seek(0, .Current);
@@ -239,30 +316,58 @@ peek_data :: #force_inline proc(z: ^Context, $T: typeid) -> (res: T, err: io.Err
 	return res, .None;
 }
 
+peek_data :: proc{peek_data_from_memory, peek_data_from_stream};
+
+
+
 // Sliding window read back
 @(optimization_mode="speed")
-peek_back_byte :: #force_inline proc(cb: ^Code_Buffer, offset: i64) -> (res: u8, err: io.Error) {
+peek_back_byte :: #force_inline proc(z: ^$C, offset: i64) -> (res: u8, err: io.Error) {
 	// Look back into the sliding window.
-	return cb.last[offset & cb.window_mask], .None;
+	return z.output.buf[z.bytes_written - offset], .None;
 }
 
 // Generalized bit reader LSB
 @(optimization_mode="speed")
-refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Refill LSB"); }
+refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width := i8(48)) {
+	refill := u64(width);
+	b      := u64(0);
+
+	if z.num_bits > refill {
+		return;
+	}
+
+	for {
+		if len(z.input_data) != 0 {
+			b = u64(z.input_data[0]);
+			z.input_data = z.input_data[1:];
+		} else {
+			b = 0;
+		}
+
+		z.code_buffer |= b << u8(z.num_bits);
+		z.num_bits += 8;
+		if z.num_bits > refill {
+			break;
+		}
+	}
+}
 
+// Generalized bit reader LSB
+@(optimization_mode="speed")
+refill_lsb_from_stream :: proc(z: ^Context_Stream_Input, width := i8(24)) {
 	refill := u64(width);
 
 	for {
-		if cb.num_bits > refill {
+		if z.num_bits > refill {
 			break;
 		}
-		if cb.code_buffer == 0 && cb.num_bits > 63 {
-			cb.num_bits = 0;
+		if z.code_buffer == 0 && z.num_bits > 63 {
+			z.num_bits = 0;
 		}
-		if cb.code_buffer >= 1 << uint(cb.num_bits) {
+		if z.code_buffer >= 1 << uint(z.num_bits) {
 			// Code buffer is malformed.
-			cb.num_bits = max(u64);
+			z.num_bits = max(u64);
 			return;
 		}
 		b, err := read_u8(z);
@@ -270,48 +375,104 @@ refill_lsb :: proc(z: ^Context, cb: ^Code_Buffer, width := i8(24)) {
 			// This is fine at the end of the file.
 			return;
 		}
-		cb.code_buffer |= (u64(b) << u8(cb.num_bits));
-		cb.num_bits += 8;
+		z.code_buffer |= (u64(b) << u8(z.num_bits));
+		z.num_bits += 8;
 	}
 }
 
+refill_lsb :: proc{refill_lsb_from_memory, refill_lsb_from_stream};
+
+
+@(optimization_mode="speed")
+consume_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) {
+	z.code_buffer >>= width;
+	z.num_bits -= u64(width);
+}
+
+@(optimization_mode="speed")
+consume_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) {
+	z.code_buffer >>= width;
+	z.num_bits -= u64(width);
+}
+
+consume_bits_lsb :: proc{consume_bits_lsb_from_memory, consume_bits_lsb_from_stream};
+
 @(optimization_mode="speed")
-consume_bits_lsb :: #force_inline proc(cb: ^Code_Buffer, width: u8) {
-	cb.code_buffer >>= width;
-	cb.num_bits -= u64(width);
+peek_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
+	if z.num_bits < u64(width) {
+		refill_lsb(z);
+	}
+	return u32(z.code_buffer & ~(~u64(0) << width));
 }
 
 @(optimization_mode="speed")
-peek_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	if cb.num_bits < u64(width) {
-		refill_lsb(z, cb);
+peek_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
+	if z.num_bits < u64(width) {
+		refill_lsb(z);
 	}
-	// assert(z.num_bits >= i8(width));
-	return u32(cb.code_buffer & ~(~u64(0) << width));
+	return u32(z.code_buffer & ~(~u64(0) << width));
+}
+
+peek_bits_lsb :: proc{peek_bits_lsb_from_memory, peek_bits_lsb_from_stream};
+
+@(optimization_mode="speed")
+peek_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
+	assert(z.num_bits >= u64(width));
+	return u32(z.code_buffer & ~(~u64(0) << width));
+}
+
+@(optimization_mode="speed")
+peek_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
+	assert(z.num_bits >= u64(width));
+	return u32(z.code_buffer & ~(~u64(0) << width));
+}
+
+peek_bits_no_refill_lsb :: proc{peek_bits_no_refill_lsb_from_memory, peek_bits_no_refill_lsb_from_stream};
+
+@(optimization_mode="speed")
+read_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
+	k := #force_inline peek_bits_lsb(z, width);
+	#force_inline consume_bits_lsb(z, width);
+	return k;
 }
 
 @(optimization_mode="speed")
-peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	assert(cb.num_bits >= u64(width));
-	return u32(cb.code_buffer & ~(~u64(0) << width));
+read_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
+	k := peek_bits_lsb(z, width);
+	consume_bits_lsb(z, width);
+	return k;
 }
 
+read_bits_lsb :: proc{read_bits_lsb_from_memory, read_bits_lsb_from_stream};
+
 @(optimization_mode="speed")
-read_bits_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	k := peek_bits_lsb(z, cb, width);
-	consume_bits_lsb(cb, width);
+read_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
+	k := #force_inline peek_bits_no_refill_lsb(z, width);
+	#force_inline consume_bits_lsb(z, width);
 	return k;
 }
 
 @(optimization_mode="speed")
-read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, width: u8) -> u32 {
-	k := peek_bits_no_refill_lsb(z, cb, width);
-	consume_bits_lsb(cb, width);
+read_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
+	k := peek_bits_no_refill_lsb(z, width);
+	consume_bits_lsb(z, width);
 	return k;
 }
 
+read_bits_no_refill_lsb :: proc{read_bits_no_refill_lsb_from_memory, read_bits_no_refill_lsb_from_stream};
+
+
+@(optimization_mode="speed")
+discard_to_next_byte_lsb_from_memory :: proc(z: ^Context_Memory_Input) {
+	discard := u8(z.num_bits & 7);
+	#force_inline consume_bits_lsb(z, discard);
+}
+
+
 @(optimization_mode="speed")
-discard_to_next_byte_lsb :: proc(cb: ^Code_Buffer) {
-	discard := u8(cb.num_bits & 7);
-	consume_bits_lsb(cb, discard);
+discard_to_next_byte_lsb_from_stream :: proc(z: ^Context_Stream_Input) {
+	discard := u8(z.num_bits & 7);
+	consume_bits_lsb(z, discard);
 }
+
+discard_to_next_byte_lsb :: proc{discard_to_next_byte_lsb_from_memory, discard_to_next_byte_lsb_from_stream};
+\ No newline at end of file
diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin
index b4fc50ade..9dfd68f23 100644
--- a/core/compress/gzip/example.odin
+++ b/core/compress/gzip/example.odin
@@ -45,7 +45,7 @@ main :: proc() {
 
 	if len(args) < 2 {
 		stderr("No input file specified.\n");
-		err := load(TEST, &buf);
+		err := load(slice=TEST, buf=&buf, known_gzip_size=len(TEST));
 		if err == nil {
 			stdout("Displaying test vector: ");
 			stdout(bytes.buffer_to_string(&buf));
@@ -65,7 +65,7 @@ main :: proc() {
 		if file == "-" {
 			// Read from stdin
 			s := os.stream_from_handle(os.stdin);
-			ctx := &compress.Context{
+			ctx := &compress.Context_Stream_Input{
 				input = s,
 			};
 			err = load(ctx, &buf);
diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin
index a9f833ae4..6a17627bc 100644
--- a/core/compress/gzip/gzip.odin
+++ b/core/compress/gzip/gzip.odin
@@ -99,45 +99,54 @@ E_GZIP    :: compress.GZIP_Error;
 E_ZLIB    :: compress.ZLIB_Error;
 E_Deflate :: compress.Deflate_Error;
 
-load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+GZIP_MAX_PAYLOAD_SIZE :: int(max(u32le));
 
-	r := bytes.Reader{};
-	bytes.reader_init(&r, slice);
-	stream := bytes.reader_to_stream(&r);
+load :: proc{load_from_slice, load_from_file, load_from_context};
 
-	ctx := &compress.Context{
-		input  = stream,
-		input_data = slice,
-		input_fully_in_memory = true,
-		input_refills_from_stream = true,
-	};
-	err = load_from_stream(ctx, buf, allocator);
-
-	return err;
-}
-
-load_from_file :: proc(filename: string, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 	data, ok := os.read_entire_file(filename, allocator);
 	defer delete(data);
 
 	err = E_General.File_Not_Found;
 	if ok {
-		err = load_from_slice(data, buf, allocator);
+		err = load_from_slice(data, buf, len(data), expected_output_size, allocator);
 	}
 	return;
 }
 
-load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
+	buf := buf;
+
+	z := &compress.Context_Memory_Input{
+		input_data = slice,
+		output = buf,
+	};
+	return load_from_context(z, buf, known_gzip_size, expected_output_size, allocator);
+}
+
+load_from_context :: proc(z: ^$C, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) {
 	buf := buf;
-	ws := bytes.buffer_to_stream(buf);
-	ctx.output = ws;
+	expected_output_size := expected_output_size;
+
+	input_data_consumed := 0;
+
+	z.output = buf;
+
+	if expected_output_size > GZIP_MAX_PAYLOAD_SIZE {
+		return E_GZIP.Payload_Size_Exceeds_Max_Payload;
+	}
+
+	if expected_output_size > compress.COMPRESS_OUTPUT_ALLOCATE_MAX {
+		return E_GZIP.Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX;
+	}
 
 	b: []u8;
 
-	header, e := compress.read_data(ctx, Header);
+	header, e := compress.read_data(z, Header);
 	if e != .None {
 		return E_General.File_Too_Short;
 	}
+	input_data_consumed += size_of(Header);
 
 	if header.magic != .GZIP {
 		return E_GZIP.Invalid_GZIP_Signature;
@@ -162,7 +171,9 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 	// printf("os: %v\n", OS_Name[header.os]);
 
 	if .extra in header.flags {
-		xlen, e_extra := compress.read_data(ctx, u16le);
+		xlen, e_extra := compress.read_data(z, u16le);
+		input_data_consumed += 2;
+
 		if e_extra != .None {
 			return E_General.Stream_Too_Short;
 		}
@@ -178,19 +189,21 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 
 		for xlen >= 4 {
 			// println("Parsing Extra field(s).");
-			field_id, field_error = compress.read_data(ctx, [2]u8);
+			field_id, field_error = compress.read_data(z, [2]u8);
 			if field_error != .None {
 				// printf("Parsing Extra returned: %v\n", field_error);
 				return E_General.Stream_Too_Short;
 			}
 			xlen -= 2;
+			input_data_consumed += 2;
 
-			field_length, field_error = compress.read_data(ctx, u16le);
+			field_length, field_error = compress.read_data(z, u16le);
 			if field_error != .None {
 				// printf("Parsing Extra returned: %v\n", field_error);
 				return E_General.Stream_Too_Short;
 			}
 			xlen -= 2;
+			input_data_consumed += 2;
 
 			if xlen <= 0 {
 				// We're not going to try and recover by scanning for a ZLIB header.
@@ -200,12 +213,13 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 
 			// printf("    Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
 			if field_length > 0 {
-				b, field_error = compress.read_slice(ctx, int(field_length));
+				b, field_error = compress.read_slice(z, int(field_length));
 				if field_error != .None {
 					// printf("Parsing Extra returned: %v\n", field_error);
 					return E_General.Stream_Too_Short;
 				}
 				xlen -= field_length;
+				input_data_consumed += int(field_length);
 
 				// printf("%v\n", string(field_data));
 			}
@@ -223,10 +237,11 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 		name_error: io.Error;
 
 		for i < len(name) {
-			b, name_error = compress.read_slice(ctx, 1);
+			b, name_error = compress.read_slice(z, 1);
 			if name_error != .None {
 				return E_General.Stream_Too_Short;
 			}
+			input_data_consumed += 1;
 			if b[0] == 0 {
 				break;
 			}
@@ -246,10 +261,11 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 		comment_error: io.Error;
 
 		for i < len(comment) {
-			b, comment_error = compress.read_slice(ctx, 1);
+			b, comment_error = compress.read_slice(z, 1);
 			if comment_error != .None {
 				return E_General.Stream_Too_Short;
 			}
+			input_data_consumed += 1;
 			if b[0] == 0 {
 				break;
 			}
@@ -264,7 +280,8 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 
 	if .header_crc in header.flags {
 		crc_error: io.Error;
-		_, crc_error = compress.read_slice(ctx, 2);
+		_, crc_error = compress.read_slice(z, 2);
+		input_data_consumed += 2;
 		if crc_error != .None {
 			return E_General.Stream_Too_Short;
 		}
@@ -277,44 +294,74 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator :
 	/*
 		We should have arrived at the ZLIB payload.
 	*/
-	code_buffer := compress.Code_Buffer{};
-	cb := &code_buffer;
+	payload_u32le: u32le;
 
-	zlib_error := zlib.inflate_raw(ctx, &code_buffer);
+	// fmt.printf("known_gzip_size: %v | expected_output_size: %v\n", known_gzip_size, expected_output_size);
+
+	if expected_output_size > -1 {
+		/*
+			We already checked that it's not larger than the output buffer max,
+			or GZIP length field's max.
+
+			We'll just pass it on to `zlib.inflate_raw`;
+		*/
+	} else {
+		/*
+			If we know the size of the GZIP file *and* it is fully in memory,
+			then we can peek at the unpacked size at the end.
+
+			We'll still want to ensure there's capacity left in the output buffer when we write, of course.
+
+		*/
+		if known_gzip_size > -1 {
+			offset := i64(known_gzip_size - input_data_consumed - 4);
+			size, _ := compress.input_size(z);
+			if size >= offset + 4 {
+				length_bytes         := z.input_data[offset:][:4];
+				payload_u32le         = (^u32le)(&length_bytes[0])^;
+				expected_output_size = int(payload_u32le);
+			}
+		} else {
+			/*
+				TODO(Jeroen): When reading a GZIP from a stream, check if impl_seek is present.
+				If so, we can seek to the end, grab the size from the footer, and seek back to payload start.
+			*/
+		}
+	}
+
+	// fmt.printf("GZIP: Expected Payload Size: %v\n", expected_output_size);
+
+	zlib_error := zlib.inflate_raw(z=z, expected_output_size=expected_output_size);
 	if zlib_error != nil {
 		return zlib_error;
 	}
 	/*
 		Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
 	*/
-	compress.discard_to_next_byte_lsb(cb);
+	compress.discard_to_next_byte_lsb(z);
 
 	footer_error: io.Error;
 
 	payload_crc_b: [4]u8;
 	for _, i in payload_crc_b {
-		if cb.num_bits >= 8 {
-			payload_crc_b[i] = u8(compress.read_bits_lsb(ctx, cb, 8));
-		} else {
-			payload_crc_b[i], footer_error = compress.read_u8(ctx);
-		}
+		payload_crc_b[i], footer_error = compress.read_u8_prefer_code_buffer_lsb(z);
 	}
 	payload_crc := transmute(u32le)payload_crc_b;
 
-	payload_len: u32le;
-	payload_len, footer_error = compress.read_data(ctx, u32le);
-
 	payload := bytes.buffer_to_bytes(buf);
-	crc32 := u32le(hash.crc32(payload));
-
+	crc32   := u32le(hash.crc32(payload));
 	if crc32 != payload_crc {
 		return E_GZIP.Payload_CRC_Invalid;
 	}
 
+	payload_len_b: [4]u8;
+	for _, i in payload_len_b {
+		payload_len_b[i], footer_error = compress.read_u8_prefer_code_buffer_lsb(z);
+	}
+	payload_len := transmute(u32le)payload_len_b;
+
 	if len(payload) != int(payload_len) {
 		return E_GZIP.Payload_Length_Invalid;
 	}
 	return nil;
 }
-
-load :: proc{load_from_file, load_from_slice, load_from_stream};
diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin
index 4d951b2f4..aab074fb4 100644
--- a/core/compress/zlib/example.odin
+++ b/core/compress/zlib/example.odin
@@ -35,11 +35,12 @@ main :: proc() {
 		171,  15,  18,  59, 138, 112,  63,  23, 205, 110, 254, 136, 109,  78, 231,
 		 63, 234, 138, 133, 204,
 	};
+	OUTPUT_SIZE :: 438;
 
 	buf: bytes.Buffer;
 
 	// We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one.
-	err := inflate(ODIN_DEMO, &buf);
+	err := inflate(input=ODIN_DEMO, buf=&buf, expected_output_size=OUTPUT_SIZE);
 	defer bytes.buffer_destroy(&buf);
 
 	if err != nil {
@@ -47,5 +48,5 @@ main :: proc() {
 	}
 	s := bytes.buffer_to_string(&buf);
 	fmt.printf("Input: %v bytes, output (%v bytes):\n%v\n", len(ODIN_DEMO), len(s), s);
-	assert(len(s) == 438);
+	assert(len(s) == OUTPUT_SIZE);
 }
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
index ce15ea147..c9439b285 100644
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -13,10 +13,8 @@ import "core:compress"
 
 import "core:mem"
 import "core:io"
-import "core:bytes"
 import "core:hash"
-
-// when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" }
+import "core:bytes"
 
 /*
 	zlib.inflate decompresses a ZLIB stream passed in as a []u8 or io.Stream.
@@ -31,10 +29,6 @@ import "core:hash"
 	`Context.rolling_hash` if not inlining it is still faster.
 
 */
-INLINE_ADLER :: false;
-
-Context     :: compress.Context;
-Code_Buffer :: compress.Code_Buffer;
 
 Compression_Method :: enum u8 {
 	DEFLATE  = 8,
@@ -140,70 +134,105 @@ z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
 	return;
 }
 
+
+@(optimization_mode="speed")
+grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
+	/*
+		That we get here at all means that we didn't pass an expected output size,
+		or that it was too little.
+	*/
+
+	/*
+		Double until we reach the maximum allowed.
+	*/
+	new_size := min(len(buf) << 1, compress.COMPRESS_OUTPUT_ALLOCATE_MAX);
+	resize(buf, new_size);
+	if len(buf) != new_size {
+		/*
+			Resize failed.
+		*/
+		return .Resize_Failed;
+	}
+
+	return nil;
+}
+
+/*
+	TODO: Make these return compress.Error.
+*/
+
 @(optimization_mode="speed")
-write_byte :: #force_inline proc(z: ^Context, cb: ^Code_Buffer, c: u8) -> (err: io.Error) #no_bounds_check {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Write Byte"); }
-	c := c;
-	buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
-	when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); }
-
-	_, e := z.output->impl_write(buf);
-	if e != .None {
-		return e;
+write_byte :: #force_inline proc(z: ^$C, c: u8) -> (err: io.Error) #no_bounds_check {
+	/*
+		Resize if needed.
+	*/
+	if int(z.bytes_written) + 1 >= len(z.output.buf) {
+		e := grow_buffer(&z.output.buf);
+		if e != nil {
+			return .Short_Write;
+		}
 	}
-	cb.last[z.bytes_written & cb.window_mask] = c;
 
+	#no_bounds_check {
+		z.output.buf[z.bytes_written] = c;
+	}
 	z.bytes_written += 1;
 	return .None;
 }
 
 @(optimization_mode="speed")
-repl_byte :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, c: u8) -> (err: io.Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Byte"); }
+repl_byte :: proc(z: ^$C, count: u16, c: u8) -> (err: io.Error) 	#no_bounds_check {
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
 		the output stream, just give it _that_ slice.
 	*/
-	buf := make([]u8, count, context.temp_allocator);
-	#no_bounds_check for i in 0..<count {
-		buf[i] = c;
-		cb.last[z.bytes_written & cb.window_mask] = c;
-		z.bytes_written += 1;
+
+	/*
+	Resize if needed.
+	*/
+	if int(z.bytes_written) + int(count) >= len(z.output.buf) {
+		e := grow_buffer(&z.output.buf);
+		if e != nil {
+			return .Short_Write;
+		}
 	}
-	when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); }
 
-	_, e := z.output->impl_write(buf);
-	if e != .None {
-		return e;
+	#no_bounds_check {
+		for _ in 0..<count {
+			z.output.buf[z.bytes_written] = c;
+			z.bytes_written += 1;
+		}
 	}
+
 	return .None;
 }
 
 @(optimization_mode="speed")
-repl_bytes :: proc(z: ^Context, cb: ^Code_Buffer, count: u16, distance: u16) -> (err: io.Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Bytes"); }
+repl_bytes :: proc(z: ^$C, count: u16, distance: u16) -> (err: io.Error) {
 	/*
 		TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
 		without having to worry about wrapping, so no need for a temp allocation to give to
 		the output stream, just give it _that_ slice.
 	*/
-	buf := make([]u8, count, context.temp_allocator);
 
-	offset := z.bytes_written - i64(distance);
-	#no_bounds_check for i in 0..<count {
-		c := cb.last[offset & cb.window_mask];
+	offset := i64(distance);
 
-		cb.last[z.bytes_written & cb.window_mask] = c;
-		buf[i] = c;
-		z.bytes_written += 1; offset += 1;
+	if int(z.bytes_written) + int(count) >= len(z.output.buf) {
+		e := grow_buffer(&z.output.buf);
+		if e != nil {
+			return .Short_Write;
+		}
 	}
-	when INLINE_ADLER { z.rolling_hash = hash.adler32(buf, z.rolling_hash); }
 
-	_, e := z.output->impl_write(buf);
-	if e != .None {
-		return e;
+	#no_bounds_check {
+		for _ in 0..<count {
+			c := z.output.buf[z.bytes_written - offset];
+			z.output.buf[z.bytes_written] = c;
+			z.bytes_written += 1;
+		}
 	}
+
 	return .None;
 }
 
@@ -214,7 +243,6 @@ allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_T
 
 @(optimization_mode="speed")
 build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Build Huffman Table"); }
 	sizes:     [HUFFMAN_MAX_BITS+1]int;
 	next_code: [HUFFMAN_MAX_BITS]int;
 
@@ -273,9 +301,8 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
 }
 
 @(optimization_mode="speed")
-decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman Slow"); }
-	code := u16(compress.peek_bits_lsb(z, cb, 16));
+decode_huffman_slowpath :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+	code := u16(compress.peek_bits_lsb(z,16));
 
 	k := int(z_bit_reverse(code, 16));
 	s: u8;
@@ -298,43 +325,41 @@ decode_huffman_slowpath :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table
 		return 0, E_Deflate.Bad_Huffman_Code;
 	}
 
-	compress.consume_bits_lsb(cb, s);
+	compress.consume_bits_lsb(z, s);
 
 	r = t.value[b];
 	return r, nil;
 }
 
 @(optimization_mode="speed")
-decode_huffman :: proc(z: ^Context, cb: ^Code_Buffer, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman"); }
-	if cb.num_bits < 16 {
-		if cb.num_bits > 63 {
+decode_huffman :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+	if z.num_bits < 16 {
+		if z.num_bits > 63 {
 			return 0, E_ZLIB.Code_Buffer_Malformed;
 		}
-		compress.refill_lsb(z, cb);
-		if cb.num_bits > 63 {
+		compress.refill_lsb(z);
+		if z.num_bits > 63 {
 			return 0, E_General.Stream_Too_Short;
 		}
 	}
-	#no_bounds_check b := t.fast[cb.code_buffer & ZFAST_MASK];
+	#no_bounds_check b := t.fast[z.code_buffer & ZFAST_MASK];
 	if b != 0 {
 		s := u8(b >> ZFAST_BITS);
-		compress.consume_bits_lsb(cb, s);
+		compress.consume_bits_lsb(z, s);
 		return b & 511, nil;
 	}
-	return decode_huffman_slowpath(z, cb, t);
+	return decode_huffman_slowpath(z, t);
 }
 
 @(optimization_mode="speed")
-parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Parse Huffman Block"); }
+parse_huffman_block :: proc(z: ^$C, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
 	#no_bounds_check for {
-		value, e := decode_huffman(z, cb, z_repeat);
+		value, e := decode_huffman(z, z_repeat);
 		if e != nil {
 			return err;
 		}
 		if value < 256 {
-			e := write_byte(z, cb, u8(value));
+			e := write_byte(z, u8(value));
 			if e != .None {
 				return E_General.Output_Too_Short;
 			}
@@ -347,17 +372,17 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 			value -= 257;
 			length := Z_LENGTH_BASE[value];
 			if Z_LENGTH_EXTRA[value] > 0 {
-				length += u16(compress.read_bits_lsb(z, cb, Z_LENGTH_EXTRA[value]));
+				length += u16(compress.read_bits_lsb(z, Z_LENGTH_EXTRA[value]));
 			}
 
-			value, e = decode_huffman(z, cb, z_offset);
+			value, e = decode_huffman(z, z_offset);
 			if e != nil {
 				return E_Deflate.Bad_Huffman_Code;
 			}
 
 			distance := Z_DIST_BASE[value];
 			if Z_DIST_EXTRA[value] > 0 {
-				distance += u16(compress.read_bits_lsb(z, cb, Z_DIST_EXTRA[value]));
+				distance += u16(compress.read_bits_lsb(z, Z_DIST_EXTRA[value]));
 			}
 
 			if z.bytes_written < i64(distance) {
@@ -365,7 +390,6 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 				return E_Deflate.Bad_Distance;
 			}
 
-			offset := i64(z.bytes_written - i64(distance));
 			/*
 				These might be sped up with a repl_byte call that copies
 				from the already written output more directly, and that
@@ -378,15 +402,15 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 					Replicate the last outputted byte, length times.
 				*/
 				if length > 0 {
-					c := cb.last[offset & cb.window_mask];
-					e := repl_byte(z, cb, length, c);
+					c := z.output.buf[z.bytes_written - i64(distance)];
+					e := repl_byte(z, length, c);
 					if e != .None {
 						return E_General.Output_Too_Short;
 					}
 				}
 			} else {
 				if length > 0 {
-					e := repl_bytes(z, cb, length, distance);
+					e := repl_bytes(z, length, distance);
 					if e != .None {
 						return E_General.Output_Too_Short;
 					}
@@ -397,25 +421,17 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^
 }
 
 @(optimization_mode="speed")
-inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+inflate_from_context :: proc(using ctx: ^compress.Context_Memory_Input, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
 	/*
-		ctx.input must be an io.Stream backed by an implementation that supports:
-		- read
-		- size
-
-		ctx.output must be an io.Stream backed by an implementation that supports:
-		- write
+		ctx.output must be a bytes.Buffer for now. We'll add a separate implementation that writes to a stream.
 
 		raw determines whether the ZLIB header is processed, or we're inflating a raw
 		DEFLATE stream.
 	*/
 
-	code_buffer := Code_Buffer{};
-	cb := &code_buffer;
-
 	if !raw {
-		data_size := io.size(ctx.input);
-		if data_size < 6 {
+		size, size_err := compress.input_size(ctx);
+		if size < 6 || size_err != nil {
 			return E_General.Stream_Too_Short;
 		}
 
@@ -430,8 +446,6 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
 		if cinfo > 7 {
 			return E_ZLIB.Unsupported_Window_Size;
 		}
-		cb.window_mask = i64((1 << (cinfo + 8) - 1));
-
 		flg, _ := compress.read_u8(ctx);
 
 		fcheck  := flg & 0x1f;
@@ -456,40 +470,61 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
 			at the end to compare checksums.
 		*/
 
-		// Seed the Adler32 rolling checksum.
-		ctx.rolling_hash = 1;
 	}
 
 	// Parse ZLIB stream without header.
-	err = inflate_raw(ctx, cb);
+	err = inflate_raw(z=ctx, expected_output_size=expected_output_size);
 	if err != nil {
 		return err;
 	}
 
 	if !raw {
-		compress.discard_to_next_byte_lsb(cb);
-		adler32 := compress.read_bits_lsb(ctx, cb, 8) << 24 | compress.read_bits_lsb(ctx, cb, 8) << 16 | compress.read_bits_lsb(ctx, cb, 8) << 8 | compress.read_bits_lsb(ctx, cb, 8);
+		compress.discard_to_next_byte_lsb(ctx);
 
-		when !INLINE_ADLER {
-			buf := (^bytes.Buffer)(ctx.output.stream_data).buf[:];
-			ctx.rolling_hash = hash.adler32(buf);
+		adler_b: [4]u8;
+		for _, i in adler_b {
+			adler_b[i], _ = compress.read_u8_prefer_code_buffer_lsb(ctx);
 		}
+		adler := transmute(u32be)adler_b;
 
-		if ctx.rolling_hash != u32(adler32) {
+		output_hash := hash.adler32(ctx.output.buf[:]);
+
+		if output_hash != u32(adler) {
 			return E_General.Checksum_Failed;
 		}
 	}
 	return nil;
 }
 
+// TODO: Check alignment of reserve/resize.
+
 @(optimization_mode="speed")
-inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := context.allocator) -> (err: Error) #no_bounds_check {
-	when #config(TRACY_ENABLE, false) { tracy.ZoneN("Inflate Raw"); }
-	final := u32(0);
-	type := u32(0);
+inflate_raw :: proc(z: ^$C, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+	expected_output_size := expected_output_size;
+
+	if expected_output_size <= 0 {
+		/*
+			Always set up a minimum allocation size.
+		*/
+		expected_output_size = compress.COMPRESS_OUTPUT_ALLOCATE_MIN;
+	}
+
+	// fmt.printf("\nZLIB: Expected Payload Size: %v\n\n", expected_output_size);
+
+	if expected_output_size > 0 && expected_output_size <= compress.COMPRESS_OUTPUT_ALLOCATE_MAX {
+		/*
+			Try to pre-allocate the output buffer.
+		*/
+		reserve(&z.output.buf, expected_output_size);
+		resize (&z.output.buf, expected_output_size);
+	};
+
+	if len(z.output.buf) != expected_output_size {
+		return .Resize_Failed;
+	}
 
-	cb.num_bits = 0;
-	cb.code_buffer = 0;
+	z.num_bits    = 0;
+	z.code_buffer = 0;
 
 	z_repeat:      ^Huffman_Table;
 	z_offset:      ^Huffman_Table;
@@ -511,30 +546,24 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont
 	defer free(z_offset);
 	defer free(codelength_ht);
 
-	if cb.window_mask == 0 {
-		cb.window_mask = DEFLATE_MAX_DISTANCE - 1;
-	}
-
-	// Allocate rolling window buffer.
-	cb.last = mem.make_dynamic_array_len_cap([dynamic]u8, cb.window_mask + 1, cb.window_mask + 1, allocator);
-	defer delete(cb.last);
+	final := u32(0);
+	type  := u32(0);
 
 	for {
-		final = compress.read_bits_lsb(z, cb, 1);
-		type  = compress.read_bits_lsb(z, cb, 2);
+		final = compress.read_bits_lsb(z, 1);
+		type  = compress.read_bits_lsb(z, 2);
 
 		// fmt.printf("Final: %v | Type: %v\n", final, type);
 
 		switch type {
 		case 0:
-			when #config(TRACY_ENABLE, false) { tracy.ZoneN("Literal Block"); }
 			// Uncompressed block
 
 			// Discard bits until next byte boundary
-			compress.discard_to_next_byte_lsb(cb);
+			compress.discard_to_next_byte_lsb(z);
 
-			uncompressed_len  := i16(compress.read_bits_lsb(z, cb, 16));
-			length_check      := i16(compress.read_bits_lsb(z, cb, 16));
+			uncompressed_len  := i16(compress.read_bits_lsb(z, 16));
+			length_check      := i16(compress.read_bits_lsb(z, 16));
 
 			// fmt.printf("LEN: %v, ~LEN: %v, NLEN: %v, ~NLEN: %v\n", uncompressed_len, ~uncompressed_len, length_check, ~length_check);
 
@@ -548,15 +577,14 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont
 				and a single Adler32 update after.
 			*/
 			#no_bounds_check for uncompressed_len > 0 {
-				compress.refill_lsb(z, cb);
-				lit := compress.read_bits_lsb(z, cb, 8);
-				write_byte(z, cb, u8(lit));
+				compress.refill_lsb(z);
+				lit := compress.read_bits_lsb(z, 8);
+				write_byte(z, u8(lit));
 				uncompressed_len -= 1;
 			}
 		case 3:
 			return E_Deflate.BType_3;
 		case:
-			when #config(TRACY_ENABLE, false) { tracy.ZoneN("Huffman Block"); }
 			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
 			if type == 1 {
 				// Use fixed code lengths.
@@ -575,14 +603,14 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont
 				//i: u32;
 				n: u32;
 
-				compress.refill_lsb(z, cb, 14);
-				hlit  := compress.read_bits_no_refill_lsb(z, cb, 5) + 257;
-				hdist := compress.read_bits_no_refill_lsb(z, cb, 5) + 1;
-				hclen := compress.read_bits_no_refill_lsb(z, cb, 4) + 4;
+				compress.refill_lsb(z, 14);
+				hlit  := compress.read_bits_no_refill_lsb(z, 5) + 257;
+				hdist := compress.read_bits_no_refill_lsb(z, 5) + 1;
+				hclen := compress.read_bits_no_refill_lsb(z, 4) + 4;
 				ntot  := hlit + hdist;
 
 				#no_bounds_check for i in 0..<hclen {
-					s := compress.read_bits_lsb(z, cb, 3);
+					s := compress.read_bits_lsb(z, 3);
 					codelength_sizes[Z_LENGTH_DEZIGZAG[i]] = u8(s);
 				}
 				err = build_huffman(codelength_ht, codelength_sizes[:]);
@@ -594,7 +622,7 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont
 				c: u16;
 
 				for n < ntot {
-					c, err = decode_huffman(z, cb, codelength_ht);
+					c, err = decode_huffman(z, codelength_ht);
 					if err != nil {
 						return err;
 					}
@@ -607,18 +635,18 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont
 						n += 1;
 					} else {
 						fill := u8(0);
-						compress.refill_lsb(z, cb, 7);
+						compress.refill_lsb(z, 7);
 						switch c {
 						case 16:
-							c = u16(compress.read_bits_no_refill_lsb(z, cb, 2) + 3);
+							c = u16(compress.read_bits_no_refill_lsb(z, 2) + 3);
 							if n == 0 {
 								return E_Deflate.Huffman_Bad_Code_Lengths;
 							}
 							fill = lencodes[n - 1];
 						case 17:
-							c = u16(compress.read_bits_no_refill_lsb(z, cb, 3) + 3);
+							c = u16(compress.read_bits_no_refill_lsb(z, 3) + 3);
 						case 18:
-							c = u16(compress.read_bits_no_refill_lsb(z, cb, 7) + 11);
+							c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11);
 						case:
 								return E_Deflate.Huffman_Bad_Code_Lengths;
 						}
@@ -648,7 +676,7 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont
 					return err;
 				}
 			}
-			err = parse_huffman_block(z, cb, z_repeat, z_offset);
+			err = parse_huffman_block(z, z_repeat, z_offset);
 			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
 			if err != nil {
 				return err;
@@ -659,44 +687,31 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont
 		}
 	}
 
+	if int(z.bytes_written) != len(z.output.buf) {
+		resize(&z.output.buf, int(z.bytes_written));
+	}
+
 	return nil;
 }
 
-inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) {
-	ctx := Context{};
+inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
+	ctx := compress.Context_Memory_Input{};
 
-	r := bytes.Reader{};
-	bytes.reader_init(&r, input);
-	rs := bytes.reader_to_stream(&r);
-	ctx.input = rs;
 	ctx.input_data = input;
-	ctx.input_fully_in_memory = true;
-
-	buf := buf;
-	ws := bytes.buffer_to_stream(buf);
-	ctx.output = ws;
+	ctx.output = buf;
 
-	err = inflate_from_stream(&ctx, raw);
+	err = inflate_from_context(ctx=&ctx, raw=raw, expected_output_size=expected_output_size);
 
 	return err;
 }
 
-inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_Buffer, raw := false) -> (err: Error) {
-	ctx := Context{};
+inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, expected_output_size := -1) -> (err: Error) {
+	ctx := compress.Context_Memory_Input{};
 
-	r := bytes.Reader{};
-	bytes.reader_init(&r, input);
-	rs := bytes.reader_to_stream(&r);
-	ctx.input = rs;
 	ctx.input_data = input;
-	ctx.input_fully_in_memory = true;
-
-	buf := buf;
-	ws := bytes.buffer_to_stream(buf);
-	ctx.output = ws;
+	ctx.output = buf;
 
-	return inflate_from_stream_raw(&ctx, cb);
+	return inflate_raw(z=&ctx, expected_output_size=expected_output_size);
 }
 
-inflate     :: proc{inflate_from_stream, inflate_from_byte_array};
-inflate_raw :: proc{inflate_from_stream_raw, inflate_from_byte_array_raw};
+inflate     :: proc{inflate_from_context, inflate_from_byte_array};
+\ No newline at end of file
diff --git a/core/image/png/example.odin b/core/image/png/example.odin
index 8fca684ab..b84876ac8 100644
--- a/core/image/png/example.odin
+++ b/core/image/png/example.odin
@@ -41,7 +41,7 @@ main :: proc() {
 demo :: proc() {
 	file: string;
 
-	options := image.Options{.return_metadata};
+	options := image.Options{}; // {.return_metadata};
 	err:       compress.Error;
 	img:      ^image.Image;
 
@@ -56,9 +56,9 @@ demo :: proc() {
 		v: ^Info;
 
 		fmt.printf("Image: %vx%vx%v, %v-bit.\n", img.width, img.height, img.channels, img.depth);
-
 		if img.metadata_ptr != nil && img.metadata_type == Info {
 			v = (^Info)(img.metadata_ptr);
+
 			// Handle ancillary chunks as you wish.
 			// We provide helper functions for a few types.
 			for c in v.chunks {
diff --git a/core/image/png/png.odin b/core/image/png/png.odin
index afb71ca54..e3a36c2fe 100644
--- a/core/image/png/png.odin
+++ b/core/image/png/png.odin
@@ -245,7 +245,7 @@ ADAM7_Y_SPACING := []int{ 8,8,8,4,4,2,2 };
 
 // Implementation starts here
 
-read_chunk :: proc(ctx: ^compress.Context) -> (chunk: Chunk, err: Error) {
+read_chunk :: proc(ctx: ^$C) -> (chunk: Chunk, err: Error) {
 	ch, e := compress.read_data(ctx, Chunk_Header);
 	if e != .None {
 		return {}, E_General.Stream_Too_Short;
@@ -274,7 +274,7 @@ read_chunk :: proc(ctx: ^compress.Context) -> (chunk: Chunk, err: Error) {
 	return chunk, nil;
 }
 
-read_header :: proc(ctx: ^compress.Context) -> (IHDR, Error) {
+read_header :: proc(ctx: ^$C) -> (IHDR, Error) {
 	c, e := read_chunk(ctx);
 	if e != nil {
 		return {}, e;
@@ -353,14 +353,8 @@ chunk_type_to_name :: proc(type: ^Chunk_Type) -> string {
 }
 
 load_from_slice :: proc(slice: []u8, options := Options{}, allocator := context.allocator) -> (img: ^Image, err: Error) {
-	r := bytes.Reader{};
-	bytes.reader_init(&r, slice);
-	stream := bytes.reader_to_stream(&r);
-
-	ctx := &compress.Context{
-		input = stream,
+	ctx := &compress.Context_Memory_Input{
 		input_data = slice,
-		input_fully_in_memory = true,
 	};
 
 	/*
@@ -368,7 +362,7 @@ load_from_slice :: proc(slice: []u8, options := Options{}, allocator := context.
 		This way the stream reader could avoid the copy into the temp memory returned by it,
 		and instead return a slice into the original memory that's already owned by the caller.
 	*/
-	img, err = load_from_stream(ctx, options, allocator);
+	img, err = load_from_context(ctx, options, allocator);
 
 	return img, err;
 }
@@ -386,7 +380,7 @@ load_from_file :: proc(filename: string, options := Options{}, allocator := cont
 	}
 }
 
-load_from_stream :: proc(ctx: ^compress.Context, options := Options{}, allocator := context.allocator) -> (img: ^Image, err: Error) {
+load_from_context :: proc(ctx: ^$C, options := Options{}, allocator := context.allocator) -> (img: ^Image, err: Error) {
 	options := options;
 	if .info in options {
 		options |= {.return_metadata, .do_not_decompress_image};
@@ -674,39 +668,41 @@ load_from_stream :: proc(ctx: ^compress.Context, options := Options{}, allocator
 		return img, E_PNG.IDAT_Missing;
 	}
 
-	buf: bytes.Buffer;
-	zlib_error := zlib.inflate(idat, &buf);
-	defer bytes.buffer_destroy(&buf);
+	/*
+		Calculate the expected output size, to help `inflate` make better decisions about the output buffer.
+		We'll also use it to check the returned buffer size is what we expected it to be.
 
-	if zlib_error != nil {
-		return {}, zlib_error;
+		Let's calcalate the expected size of the IDAT based on its dimensions, and whether or not it's interlaced.
+	*/
+	expected_size: int;
+
+	if header.interlace_method != .Adam7 {
+		expected_size = compute_buffer_size(int(header.width), int(header.height), int(img.channels), int(header.bit_depth), 1);
 	} else {
 		/*
-			Let's calcalate the expected size of the IDAT based on its dimensions,
-			and whether or not it's interlaced
+			Because Adam7 divides the image up into sub-images, and each scanline must start
+			with a filter byte, Adam7 interlaced images can have a larger raw size.
 		*/
-		expected_size: int;
-		buf_len := len(buf.buf);
-
-		if header.interlace_method != .Adam7 {
-			expected_size = compute_buffer_size(int(header.width), int(header.height), int(img.channels), int(header.bit_depth), 1);
-		} else {
-			/*
-				Because Adam7 divides the image up into sub-images, and each scanline must start
-				with a filter byte, Adam7 interlaced images can have a larger raw size.
-			*/
-			for p := 0; p < 7; p += 1 {
-				x := (int(header.width)  - ADAM7_X_ORIG[p] + ADAM7_X_SPACING[p] - 1) / ADAM7_X_SPACING[p];
-				y := (int(header.height) - ADAM7_Y_ORIG[p] + ADAM7_Y_SPACING[p] - 1) / ADAM7_Y_SPACING[p];
-				if x > 0 && y > 0 {
-					expected_size += compute_buffer_size(int(x), int(y), int(img.channels), int(header.bit_depth), 1);
-				}
+		for p := 0; p < 7; p += 1 {
+			x := (int(header.width)  - ADAM7_X_ORIG[p] + ADAM7_X_SPACING[p] - 1) / ADAM7_X_SPACING[p];
+			y := (int(header.height) - ADAM7_Y_ORIG[p] + ADAM7_Y_SPACING[p] - 1) / ADAM7_Y_SPACING[p];
+			if x > 0 && y > 0 {
+				expected_size += compute_buffer_size(int(x), int(y), int(img.channels), int(header.bit_depth), 1);
 			}
 		}
+	}
 
-		if expected_size != buf_len {
-			return {}, E_PNG.IDAT_Corrupt;
-		}
+	buf: bytes.Buffer;
+	zlib_error := zlib.inflate(idat, &buf, false, expected_size);
+	defer bytes.buffer_destroy(&buf);
+
+	if zlib_error != nil {
+		return {}, zlib_error;
+	}
+
+	buf_len := len(buf.buf);
+	if expected_size != buf_len {
+		return {}, E_PNG.IDAT_Corrupt;
 	}
 
 	/*
@@ -1657,4 +1653,4 @@ defilter :: proc(img: ^Image, filter_bytes: ^bytes.Buffer, header: ^IHDR, option
 	return nil;
 }
 
-load :: proc{load_from_file, load_from_slice, load_from_stream};
+load :: proc{load_from_file, load_from_slice, load_from_context};
author	Jeroen van Rijn <Kelimion@users.noreply.github.com>	2021-06-27 16:54:15 +0200
committer	GitHub <noreply@github.com>	2021-06-27 16:54:15 +0200
commit	095605b7db41f85bbd0ca566459cb095eb32b45e (patch)
tree	3dfeedfaa2cf71e340f4d0a2e46d8fb3e257a4bd
parent	76d3bab955d33abb6d4cab0b95beedd6393c96da (diff)
parent	6836b501afdedb8fec583400ee86d379938434bc (diff)