diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-06-26 13:17:14 +0200 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-06-26 13:17:14 +0200 |
| commit | 40a12cca53dcecd73740bb4cb704cee25189cfc1 (patch) | |
| tree | 4cccf0656e8ee1cbaef1bd4f6ed189eb1c9d612c /core | |
| parent | ab12ca69af785267324bbe9e5d87f9f11a9ab901 (diff) | |
ZLIB: If output size is known, reserve that much.
Diffstat (limited to 'core')
| -rw-r--r-- | core/compress/common.odin | 54 | ||||
| -rw-r--r-- | core/compress/gzip/example.odin | 2 | ||||
| -rw-r--r-- | core/compress/gzip/gzip.odin | 80 | ||||
| -rw-r--r-- | core/compress/zlib/example.odin | 6 | ||||
| -rw-r--r-- | core/compress/zlib/zlib.odin | 52 |
5 files changed, 166 insertions, 28 deletions
diff --git a/core/compress/common.odin b/core/compress/common.odin index df798e751..a6ae230e5 100644 --- a/core/compress/common.odin +++ b/core/compress/common.odin @@ -11,6 +11,39 @@ package compress import "core:io" import "core:image" +/* + These settings bound how much compression algorithms will allocate for their output buffer. + If streaming their output, these are unnecessary and will be ignored. + +*/ + +/* + When a decompression routine doesn't stream its output, but writes to a buffer, + we pre-allocate an output buffer to speed up decompression. The default is 1 MiB. +*/ +COMPRESS_OUTPUT_ALLOCATE_MIN :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MIN, 1 << 20)); + +/* + This bounds the maximum a buffer will resize to as needed, or the maximum we'll + pre-allocate if you inform the decompression routine you know the payload size. + + For reference, the largest payload size of a GZIP file is 4 GiB. + +*/ +when size_of(uintptr) == 8 { + /* + For 64-bit platforms, we set the default max buffer size to 4 GiB, + which is GZIP and PKZIP's max payload size. + */ + COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 32)); +} else { + /* + For 32-bit platforms, we set the default max buffer size to 512 MiB. + */ + COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 29)); +} + + // when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" } Error :: union { @@ -46,6 +79,20 @@ GZIP_Error :: enum { Comment_Too_Long, Payload_Length_Invalid, Payload_CRC_Invalid, + + /* + GZIP's payload can be a maximum of max(u32le), or 4 GiB. + If you tell it you expect it to contain more, that's obviously an error. + */ + Payload_Size_Exceeds_Max_Payload, + /* + For buffered instead of streamed output, the payload size can't exceed + the max set by the `COMPRESS_OUTPUT_ALLOCATE_MAX` switch in compress/common.odin. + + You can tweak this setting using `-define:COMPRESS_OUTPUT_ALLOCATE_MAX=size_in_bytes` + */ + Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX, + } ZIP_Error :: enum { @@ -79,7 +126,7 @@ Context :: struct #packed { input_data: []u8, output: io.Stream, - output_buf: [dynamic]u8, + output_buf: ^[dynamic]u8, bytes_written: i64, /* @@ -103,9 +150,10 @@ Context :: struct #packed { */ input_fully_in_memory: b8, input_refills_from_stream: b8, - reserved_flags: [2]b8, + output_to_stream: b8, + reserved_flag: b8, } -#assert(size_of(Context) == 128); +// #assert(size_of(Context) == 128); /* Compression algorithm context diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin index b4fc50ade..bfb4267b8 100644 --- a/core/compress/gzip/example.odin +++ b/core/compress/gzip/example.odin @@ -45,7 +45,7 @@ main :: proc() { if len(args) < 2 { stderr("No input file specified.\n"); - err := load(TEST, &buf); + err := load(slice=TEST, buf=&buf, known_gzip_size=len(TEST)); if err == nil { stdout("Displaying test vector: "); stdout(bytes.buffer_to_string(&buf)); diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin index a9f833ae4..4d185fa6e 100644 --- a/core/compress/gzip/gzip.odin +++ b/core/compress/gzip/gzip.odin @@ -21,6 +21,8 @@ import "core:io" import "core:bytes" import "core:hash" +// import "core:fmt" + Magic :: enum u16le { GZIP = 0x8b << 8 | 0x1f, } @@ -99,7 +101,9 @@ E_GZIP :: compress.GZIP_Error; E_ZLIB :: compress.ZLIB_Error; E_Deflate :: compress.Deflate_Error; -load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) { +GZIP_MAX_PAYLOAD_SIZE :: int(max(u32le)); + +load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) { r := bytes.Reader{}; bytes.reader_init(&r, slice); @@ -111,33 +115,47 @@ load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, allocator := context.al input_fully_in_memory = true, input_refills_from_stream = true, }; - err = load_from_stream(ctx, buf, allocator); + + err = load_from_stream(ctx, buf, known_gzip_size, expected_output_size, allocator); return err; } -load_from_file :: proc(filename: string, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) { +load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) { data, ok := os.read_entire_file(filename, allocator); defer delete(data); err = E_General.File_Not_Found; if ok { - err = load_from_slice(data, buf, allocator); + err = load_from_slice(data, buf, len(data), expected_output_size, allocator); } return; } -load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) { +load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) { buf := buf; + expected_output_size := expected_output_size; + + input_data_consumed := 0; + ws := bytes.buffer_to_stream(buf); ctx.output = ws; + if expected_output_size > GZIP_MAX_PAYLOAD_SIZE { + return E_GZIP.Payload_Size_Exceeds_Max_Payload; + } + + if expected_output_size > compress.COMPRESS_OUTPUT_ALLOCATE_MAX { + return E_GZIP.Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX; + } + b: []u8; header, e := compress.read_data(ctx, Header); if e != .None { return E_General.File_Too_Short; } + input_data_consumed += size_of(Header); if header.magic != .GZIP { return E_GZIP.Invalid_GZIP_Signature; @@ -163,6 +181,8 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator : if .extra in header.flags { xlen, e_extra := compress.read_data(ctx, u16le); + input_data_consumed += 2; + if e_extra != .None { return E_General.Stream_Too_Short; } @@ -184,6 +204,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator : return E_General.Stream_Too_Short; } xlen -= 2; + input_data_consumed += 2; field_length, field_error = compress.read_data(ctx, u16le); if field_error != .None { @@ -191,6 +212,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator : return E_General.Stream_Too_Short; } xlen -= 2; + input_data_consumed += 2; if xlen <= 0 { // We're not going to try and recover by scanning for a ZLIB header. @@ -206,6 +228,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator : return E_General.Stream_Too_Short; } xlen -= field_length; + input_data_consumed += int(field_length); // printf("%v\n", string(field_data)); } @@ -227,6 +250,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator : if name_error != .None { return E_General.Stream_Too_Short; } + input_data_consumed += 1; if b[0] == 0 { break; } @@ -250,6 +274,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator : if comment_error != .None { return E_General.Stream_Too_Short; } + input_data_consumed += 1; if b[0] == 0 { break; } @@ -265,6 +290,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator : if .header_crc in header.flags { crc_error: io.Error; _, crc_error = compress.read_slice(ctx, 2); + input_data_consumed += 2; if crc_error != .None { return E_General.Stream_Too_Short; } @@ -280,7 +306,43 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator : code_buffer := compress.Code_Buffer{}; cb := &code_buffer; - zlib_error := zlib.inflate_raw(ctx, &code_buffer); + payload_u32le: u32le; + + // fmt.printf("known_gzip_size: %v | expected_output_size: %v\n", known_gzip_size, expected_output_size); + + if expected_output_size > -1 { + /* + We already checked that it's not larger than the output buffer max, + or GZIP length field's max. + + We'll just pass it on to `zlib.inflate_raw`; + */ + } else { + /* + If we know the size of the GZIP file *and* it is fully in memory, + then we can peek at the unpacked size at the end. + + We'll still want to ensure there's capacity left in the output buffer when we write, of course. + + */ + if ctx.input_fully_in_memory && known_gzip_size > -1 { + offset := known_gzip_size - input_data_consumed - 4; + if len(ctx.input_data) >= offset + 4 { + length_bytes := ctx.input_data[offset:][:4]; + payload_u32le = (^u32le)(&length_bytes[0])^; + expected_output_size = int(payload_u32le); + } + } else { + /* + TODO(Jeroen): When reading a GZIP from a stream, check if impl_seek is present. + If so, we can seek to the end, grab the size from the footer, and seek back to payload start. + */ + } + } + + // fmt.printf("GZIP: Expected Payload Size: %v\n", expected_output_size); + + zlib_error := zlib.inflate_raw(z=ctx, cb=&code_buffer, expected_output_size=expected_output_size); if zlib_error != nil { return zlib_error; } @@ -300,9 +362,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator : } } payload_crc := transmute(u32le)payload_crc_b; - - payload_len: u32le; - payload_len, footer_error = compress.read_data(ctx, u32le); + payload_u32le, footer_error = compress.read_data(ctx, u32le); payload := bytes.buffer_to_bytes(buf); crc32 := u32le(hash.crc32(payload)); @@ -311,7 +371,7 @@ load_from_stream :: proc(ctx: ^compress.Context, buf: ^bytes.Buffer, allocator : return E_GZIP.Payload_CRC_Invalid; } - if len(payload) != int(payload_len) { + if len(payload) != int(payload_u32le) { return E_GZIP.Payload_Length_Invalid; } return nil; diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin index 4d951b2f4..cfbbcd717 100644 --- a/core/compress/zlib/example.odin +++ b/core/compress/zlib/example.odin @@ -35,11 +35,13 @@ main :: proc() { 171, 15, 18, 59, 138, 112, 63, 23, 205, 110, 254, 136, 109, 78, 231, 63, 234, 138, 133, 204, }; + OUTPUT_SIZE :: 438; + buf: bytes.Buffer; // We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one. - err := inflate(ODIN_DEMO, &buf); + err := inflate(input=ODIN_DEMO, buf=&buf, expected_output_size=OUTPUT_SIZE); defer bytes.buffer_destroy(&buf); if err != nil { @@ -47,5 +49,5 @@ main :: proc() { } s := bytes.buffer_to_string(&buf); fmt.printf("Input: %v bytes, output (%v bytes):\n%v\n", len(ODIN_DEMO), len(s), s); - assert(len(s) == 438); + assert(len(s) == OUTPUT_SIZE); } diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin index ce15ea147..b29e65007 100644 --- a/core/compress/zlib/zlib.odin +++ b/core/compress/zlib/zlib.odin @@ -16,6 +16,8 @@ import "core:io" import "core:bytes" import "core:hash" +// import "core:fmt" + // when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" } /* @@ -397,7 +399,7 @@ parse_huffman_block :: proc(z: ^Context, cb: ^Code_Buffer, z_repeat, z_offset: ^ } @(optimization_mode="speed") -inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := context.allocator) -> (err: Error) #no_bounds_check { +inflate_from_stream :: proc(using ctx: ^Context, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check { /* ctx.input must be an io.Stream backed by an implementation that supports: - read @@ -461,7 +463,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont } // Parse ZLIB stream without header. - err = inflate_raw(ctx, cb); + err = inflate_raw(z=ctx, cb=cb, expected_output_size=expected_output_size); if err != nil { return err; } @@ -483,12 +485,29 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont } @(optimization_mode="speed") -inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := context.allocator) -> (err: Error) #no_bounds_check { +inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check { when #config(TRACY_ENABLE, false) { tracy.ZoneN("Inflate Raw"); } - final := u32(0); - type := u32(0); - cb.num_bits = 0; + buf := (^bytes.Buffer)(z.output.stream_data); + z.output_buf = &buf.buf; + + // fmt.printf("ZLIB: Expected Payload Size: %v\n", expected_output_size); + + if expected_output_size > -1 && expected_output_size <= compress.COMPRESS_OUTPUT_ALLOCATE_MAX { + reserve(z.output_buf, expected_output_size); + // resize (z.output_buf, expected_output_size); + } else { + reserve(z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN); + } + + // reserve(&z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN); + // resize (&z.output_buf, compress.COMPRESS_OUTPUT_ALLOCATE_MIN); + // fmt.printf("ZLIB: buf: %v\n", buf); + // fmt.printf("ZLIB: output_buf: %v\n", z.output_buf); + // fmt.printf("ZLIB: z.output: %v\n", z.output); + + + cb.num_bits = 0; cb.code_buffer = 0; z_repeat: ^Huffman_Table; @@ -519,6 +538,10 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont cb.last = mem.make_dynamic_array_len_cap([dynamic]u8, cb.window_mask + 1, cb.window_mask + 1, allocator); defer delete(cb.last); + + final := u32(0); + type := u32(0); + for { final = compress.read_bits_lsb(z, cb, 1); type = compress.read_bits_lsb(z, cb, 2); @@ -659,10 +682,15 @@ inflate_from_stream_raw :: proc(z: ^Context, cb: ^Code_Buffer, allocator := cont } } + // fmt.printf("ZLIB: Bytes written: %v\n", z.bytes_written); + if int(z.bytes_written) != len(buf.buf) { + resize(&buf.buf, int(z.bytes_written)); + } + return nil; } -inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) { +inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false, expected_output_size := -1) -> (err: Error) { ctx := Context{}; r := bytes.Reader{}; @@ -673,15 +701,15 @@ inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false) - ctx.input_fully_in_memory = true; buf := buf; - ws := bytes.buffer_to_stream(buf); + ws := bytes.buffer_to_stream(buf); ctx.output = ws; - err = inflate_from_stream(&ctx, raw); + err = inflate_from_stream(ctx=&ctx, raw=raw, expected_output_size=expected_output_size); return err; } -inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_Buffer, raw := false) -> (err: Error) { +inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_Buffer, raw := false, expected_output_size := -1) -> (err: Error) { ctx := Context{}; r := bytes.Reader{}; @@ -692,10 +720,10 @@ inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, cb: ^Code_B ctx.input_fully_in_memory = true; buf := buf; - ws := bytes.buffer_to_stream(buf); + ws := bytes.buffer_to_stream(buf); ctx.output = ws; - return inflate_from_stream_raw(&ctx, cb); + return inflate_from_stream_raw(z=&ctx, cb=cb, expected_output_size=expected_output_size); } inflate :: proc{inflate_from_stream, inflate_from_byte_array}; |