diff options
| author | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-06-27 13:19:24 +0200 |
|---|---|---|
| committer | Jeroen van Rijn <Kelimion@users.noreply.github.com> | 2021-06-27 13:19:24 +0200 |
| commit | 02f96681856fa4123746bda0f44324a7bdb98528 (patch) | |
| tree | 9be5b7dcac1919b275c1f4827d51ba7514e3cbc4 /core/compress/gzip | |
| parent | 4689a6b341843e18f712025aac049c2938a0ed21 (diff) | |
ZLIB: Split up input from stream and memory into own code paths.
Diffstat (limited to 'core/compress/gzip')
| -rw-r--r-- | core/compress/gzip/example.odin | 2 | ||||
| -rw-r--r-- | core/compress/gzip/gzip.odin | 270 |
2 files changed, 251 insertions, 21 deletions
diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin index bfb4267b8..9dfd68f23 100644 --- a/core/compress/gzip/example.odin +++ b/core/compress/gzip/example.odin @@ -65,7 +65,7 @@ main :: proc() { if file == "-" { // Read from stdin s := os.stream_from_handle(os.stdin); - ctx := &compress.Context{ + ctx := &compress.Context_Stream_Input{ input = s, }; err = load(ctx, &buf); diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin index ac3940763..23446d80b 100644 --- a/core/compress/gzip/gzip.odin +++ b/core/compress/gzip/gzip.odin @@ -103,23 +103,7 @@ E_Deflate :: compress.Deflate_Error; GZIP_MAX_PAYLOAD_SIZE :: int(max(u32le)); -load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) { - - r := bytes.Reader{}; - bytes.reader_init(&r, slice); - stream := bytes.reader_to_stream(&r); - - ctx := &compress.Context{ - input = stream, - input_data = slice, - input_fully_in_memory = true, - output = buf, - }; - - err = load_from_stream(ctx, buf, known_gzip_size, expected_output_size, allocator); - - return err; -} +load :: proc{load_from_slice, load_from_stream, load_from_file}; load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_size := -1, allocator := context.allocator) -> (err: Error) { data, ok := os.read_entire_file(filename, allocator); @@ -132,7 +116,255 @@ load_from_file :: proc(filename: string, buf: ^bytes.Buffer, expected_output_siz return; } -load_from_stream :: proc(z: ^compress.Context, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) { +load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) { + buf := buf; + + z := &compress.Context_Memory_Input{ + input_data = slice, + output = buf, + }; + + expected_output_size := expected_output_size; + input_data_consumed := 0; + z.output = buf; + + if expected_output_size > GZIP_MAX_PAYLOAD_SIZE { + return E_GZIP.Payload_Size_Exceeds_Max_Payload; + } + + if expected_output_size > compress.COMPRESS_OUTPUT_ALLOCATE_MAX { + return E_GZIP.Output_Exceeds_COMPRESS_OUTPUT_ALLOCATE_MAX; + } + + b: []u8; + + header, e := compress.read_data(z, Header); + if e != .None { + return E_General.File_Too_Short; + } + input_data_consumed += size_of(Header); + + if header.magic != .GZIP { + return E_GZIP.Invalid_GZIP_Signature; + } + if header.compression_method != .DEFLATE { + return E_General.Unknown_Compression_Method; + } + + if header.os >= ._Unknown { + header.os = .Unknown; + } + + if .reserved_1 in header.flags || .reserved_2 in header.flags || .reserved_3 in header.flags { + return E_GZIP.Reserved_Flag_Set; + } + + // printf("signature: %v\n", header.magic); + // printf("compression: %v\n", header.compression_method); + // printf("flags: %v\n", header.flags); + // printf("modification time: %v\n", time.unix(i64(header.modification_time), 0)); + // printf("xfl: %v (%v)\n", header.xfl, int(header.xfl)); + // printf("os: %v\n", OS_Name[header.os]); + + if .extra in header.flags { + xlen, e_extra := compress.read_data(z, u16le); + input_data_consumed += 2; + + if e_extra != .None { + return E_General.Stream_Too_Short; + } + // printf("Extra data present (%v bytes)\n", xlen); + if xlen < 4 { + // Minimum length is 2 for ID + 2 for a field length, if set to zero. + return E_GZIP.Invalid_Extra_Data; + } + + field_id: [2]u8; + field_length: u16le; + field_error: io.Error; + + for xlen >= 4 { + // println("Parsing Extra field(s)."); + field_id, field_error = compress.read_data(z, [2]u8); + if field_error != .None { + // printf("Parsing Extra returned: %v\n", field_error); + return E_General.Stream_Too_Short; + } + xlen -= 2; + input_data_consumed += 2; + + field_length, field_error = compress.read_data(z, u16le); + if field_error != .None { + // printf("Parsing Extra returned: %v\n", field_error); + return E_General.Stream_Too_Short; + } + xlen -= 2; + input_data_consumed += 2; + + if xlen <= 0 { + // We're not going to try and recover by scanning for a ZLIB header. + // Who knows what else is wrong with this file. + return E_GZIP.Invalid_Extra_Data; + } + + // printf(" Field \"%v\" of length %v found: ", string(field_id[:]), field_length); + if field_length > 0 { + b, field_error = compress.read_slice(z, int(field_length)); + if field_error != .None { + // printf("Parsing Extra returned: %v\n", field_error); + return E_General.Stream_Too_Short; + } + xlen -= field_length; + input_data_consumed += int(field_length); + + // printf("%v\n", string(field_data)); + } + + if xlen != 0 { + return E_GZIP.Invalid_Extra_Data; + } + } + } + + if .name in header.flags { + // Should be enough. + name: [1024]u8; + i := 0; + name_error: io.Error; + + for i < len(name) { + b, name_error = compress.read_slice(z, 1); + if name_error != .None { + return E_General.Stream_Too_Short; + } + input_data_consumed += 1; + if b[0] == 0 { + break; + } + name[i] = b[0]; + i += 1; + if i >= len(name) { + return E_GZIP.Original_Name_Too_Long; + } + } + // printf("Original filename: %v\n", string(name[:i])); + } + + if .comment in header.flags { + // Should be enough. + comment: [1024]u8; + i := 0; + comment_error: io.Error; + + for i < len(comment) { + b, comment_error = compress.read_slice(z, 1); + if comment_error != .None { + return E_General.Stream_Too_Short; + } + input_data_consumed += 1; + if b[0] == 0 { + break; + } + comment[i] = b[0]; + i += 1; + if i >= len(comment) { + return E_GZIP.Comment_Too_Long; + } + } + // printf("Comment: %v\n", string(comment[:i])); + } + + if .header_crc in header.flags { + crc_error: io.Error; + _, crc_error = compress.read_slice(z, 2); + input_data_consumed += 2; + if crc_error != .None { + return E_General.Stream_Too_Short; + } + /* + We don't actually check the CRC16 (lower 2 bytes of CRC32 of header data until the CRC field). + If we find a gzip file in the wild that sets this field, we can add proper support for it. + */ + } + + /* + We should have arrived at the ZLIB payload. + */ + payload_u32le: u32le; + + // fmt.printf("known_gzip_size: %v | expected_output_size: %v\n", known_gzip_size, expected_output_size); + + if expected_output_size > -1 { + /* + We already checked that it's not larger than the output buffer max, + or GZIP length field's max. + + We'll just pass it on to `zlib.inflate_raw`; + */ + } else { + /* + If we know the size of the GZIP file *and* it is fully in memory, + then we can peek at the unpacked size at the end. + + We'll still want to ensure there's capacity left in the output buffer when we write, of course. + + */ + if known_gzip_size > -1 { + offset := known_gzip_size - input_data_consumed - 4; + if len(z.input_data) >= offset + 4 { + length_bytes := z.input_data[offset:][:4]; + payload_u32le = (^u32le)(&length_bytes[0])^; + expected_output_size = int(payload_u32le); + } + } else { + /* + TODO(Jeroen): When reading a GZIP from a stream, check if impl_seek is present. + If so, we can seek to the end, grab the size from the footer, and seek back to payload start. + */ + } + } + + // fmt.printf("GZIP: Expected Payload Size: %v\n", expected_output_size); + + zlib_error := zlib.inflate_raw(z=z, expected_output_size=expected_output_size); + if zlib_error != nil { + return zlib_error; + } + /* + Read CRC32 using the ctx bit reader because zlib may leave bytes in there. + */ + compress.discard_to_next_byte_lsb(z); + + footer_error: io.Error; + + payload_crc_b: [4]u8; + for _, i in payload_crc_b { + if z.num_bits >= 8 { + payload_crc_b[i] = u8(compress.read_bits_lsb(z, 8)); + } else { + payload_crc_b[i], footer_error = compress.read_u8(z); + } + } + payload_crc := transmute(u32le)payload_crc_b; + payload_u32le, footer_error = compress.read_data(z, u32le); + + payload := bytes.buffer_to_bytes(buf); + + // fmt.printf("GZIP payload: %v\n", string(payload)); + + crc32 := u32le(hash.crc32(payload)); + + if crc32 != payload_crc { + return E_GZIP.Payload_CRC_Invalid; + } + + if len(payload) != int(payload_u32le) { + return E_GZIP.Payload_Length_Invalid; + } + return nil; +} + +load_from_stream :: proc(z: ^compress.Context_Stream_Input, buf: ^bytes.Buffer, known_gzip_size := -1, expected_output_size := -1, allocator := context.allocator) -> (err: Error) { buf := buf; expected_output_size := expected_output_size; @@ -375,5 +607,3 @@ load_from_stream :: proc(z: ^compress.Context, buf: ^bytes.Buffer, known_gzip_si } return nil; } - -load :: proc{load_from_file, load_from_slice, load_from_stream}; |