aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2021-06-21 21:05:52 +0200
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2021-06-21 21:05:52 +0200
commit352494cbb4ad2ddb650b59ce8102da3ea0942e79 (patch)
tree26e0d545c17b6bc64a40550e493a2ba1da648070 /core
parent797c41950a90f75a279a48195baf733903e23ca3 (diff)
ZLIB: Start optimization.
Diffstat (limited to 'core')
-rw-r--r--core/bytes/util.odin14
-rw-r--r--core/compress/common.odin31
-rw-r--r--core/compress/gzip/example.odin11
-rw-r--r--core/compress/gzip/gzip.odin21
-rw-r--r--core/compress/zlib/example.odin10
-rw-r--r--core/compress/zlib/zlib.odin110
-rw-r--r--core/image/common.odin17
-rw-r--r--core/image/png/example.odin11
-rw-r--r--core/image/png/helpers.odin15
-rw-r--r--core/image/png/png.odin9
10 files changed, 211 insertions, 38 deletions
diff --git a/core/bytes/util.odin b/core/bytes/util.odin
index 1749230db..a93e3e479 100644
--- a/core/bytes/util.odin
+++ b/core/bytes/util.odin
@@ -1,12 +1,18 @@
package bytes
-import "core:intrinsics"
-import "core:mem"
-
/*
- Buffer type helpers
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-2 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+
+ `bytes.Buffer` type conversion helpers.
*/
+import "core:intrinsics"
+import "core:mem"
+
need_endian_conversion :: proc($FT: typeid, $TT: typeid) -> (res: bool) {
// true if platform endian
diff --git a/core/compress/common.odin b/core/compress/common.odin
index a0e092643..5a5b9edc6 100644
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -1,8 +1,20 @@
package compress
+/*
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-2 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation, optimization.
+*/
+
import "core:io"
import "core:image"
+when #config(TRACY_ENABLE, false) {
+ import tracy "shared:odin-tracy"
+}
+
Error :: union {
General_Error,
Deflate_Error,
@@ -71,15 +83,24 @@ Context :: struct {
*/
eof: b8,
- input: io.Stream,
+ input: io.Stream,
output: io.Stream,
bytes_written: i64,
- // Used to update hash as we write instead of all at once
+ /*
+ Used to update hash as we write instead of all at once.
+ */
rolling_hash: u32,
// Sliding window buffer. Size must be a power of two.
window_size: i64,
+ window_mask: i64,
last: ^[dynamic]byte,
+
+ /*
+ If we know the raw data size, we can optimize the reads.
+ */
+ uncompressed_size: i64,
+ input_data: []u8,
}
// Stream helpers
@@ -93,6 +114,7 @@ Context :: struct {
*/
read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read Data"); }
b := make([]u8, size_of(T), context.temp_allocator);
r, e1 := io.to_reader(c.input);
_, e2 := io.read(r, b);
@@ -105,10 +127,12 @@ read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Err
}
read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Read u8"); }
return read_data(z, u8);
}
peek_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Peek Data"); }
// Get current position to read from.
curr, e1 := c.input->impl_seek(0, .Current);
if e1 != .None {
@@ -136,6 +160,7 @@ peek_back_byte :: proc(c: ^Context, offset: i64) -> (res: u8, err: io.Error) {
// Generalized bit reader LSB
refill_lsb :: proc(z: ^Context, width := i8(24)) {
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Refill LSB"); }
for {
if z.num_bits > width {
break;
@@ -146,7 +171,7 @@ refill_lsb :: proc(z: ^Context, width := i8(24)) {
if z.code_buffer >= 1 << uint(z.num_bits) {
// Code buffer is malformed.
z.num_bits = -100;
- return;
+ return;
}
c, err := read_u8(z);
if err != .None {
diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin
index 1ab899e00..81935a43a 100644
--- a/core/compress/gzip/example.odin
+++ b/core/compress/gzip/example.odin
@@ -1,6 +1,17 @@
//+ignore
package gzip
+/*
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-2 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+ Ginger Bill: Cosmetic changes.
+
+ A small GZIP implementation as an example.
+*/
+
import "core:compress/gzip"
import "core:bytes"
import "core:os"
diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin
index 82488a5a8..55e00198a 100644
--- a/core/compress/gzip/gzip.odin
+++ b/core/compress/gzip/gzip.odin
@@ -1,5 +1,19 @@
package gzip
+/*
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-2 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+
+ This package implements support for the GZIP file format v4.3,
+ as specified in RFC 1952.
+
+ It is implemented in such a way that it lends itself naturally
+ to be the input to a complementary TAR implementation.
+*/
+
import "core:compress/zlib"
import "core:compress"
import "core:os"
@@ -9,11 +23,6 @@ import "core:hash"
/*
- This package implements support for the GZIP file format v4.3,
- as specified in RFC 1952.
-
- It is implemented in such a way that it lends itself naturally
- to be the input to a complementary TAR implementation.
*/
@@ -200,7 +209,7 @@ load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := con
xlen -= field_length;
// printf("%v\n", string(field_data));
- }
+ }
if xlen != 0 {
return E_GZIP.Invalid_Extra_Data;
diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin
index 9af61e4b3..7c538b7af 100644
--- a/core/compress/zlib/example.odin
+++ b/core/compress/zlib/example.odin
@@ -1,6 +1,16 @@
//+ignore
package zlib
+/*
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-2 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+
+ An example of how to use `zlib.inflate`.
+*/
+
import "core:compress/zlib"
import "core:bytes"
import "core:fmt"
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
index d0e99d820..956ddaca1 100644
--- a/core/compress/zlib/zlib.odin
+++ b/core/compress/zlib/zlib.odin
@@ -1,11 +1,23 @@
package zlib
+/*
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-2 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation, optimization.
+ Ginger Bill: Cosmetic changes.
+*/
+
import "core:compress"
import "core:mem"
import "core:io"
import "core:bytes"
import "core:hash"
+
+when #config(TRACY_ENABLE, false) { import tracy "shared:odin-tracy" }
+
/*
zlib.inflate decompresses a ZLIB stream passed in as a []u8 or io.Stream.
Returns: Error.
@@ -118,6 +130,7 @@ z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
}
write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Write Byte"); }
c := c;
buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
z.rolling_hash = hash.adler32(buf, z.rolling_hash);
@@ -126,17 +139,67 @@ write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_boun
if e != .None {
return e;
}
- z.last[z.bytes_written % z.window_size] = c;
+ z.last[z.bytes_written & z.window_mask] = c;
z.bytes_written += 1;
return .None;
}
+repl_byte :: proc(z: ^Context, count: u16, c: u8) -> (err: io.Error) {
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Byte"); }
+ /*
+ TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
+ without having to worry about wrapping, so no need for a temp allocation to give to
+ the output stream, just give it _that_ slice.
+ */
+ buf := make([]u8, count, context.temp_allocator);
+ #no_bounds_check for i in 0..<count {
+ buf[i] = c;
+ z.last[z.bytes_written & z.window_mask] = c;
+ z.bytes_written += 1;
+ }
+ z.rolling_hash = hash.adler32(buf, z.rolling_hash);
+
+ _, e := z.output->impl_write(buf);
+ if e != .None {
+ return e;
+ }
+ return .None;
+}
+
+repl_bytes :: proc(z: ^Context, count: u16, distance: u16) -> (err: io.Error) {
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Repl Bytes"); }
+ /*
+ TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
+ without having to worry about wrapping, so no need for a temp allocation to give to
+ the output stream, just give it _that_ slice.
+ */
+ buf := make([]u8, count, context.temp_allocator);
+
+ offset := z.bytes_written - i64(distance);
+ #no_bounds_check for i in 0..<count {
+ c := z.last[offset & z.window_mask];
+
+ z.last[z.bytes_written & z.window_mask] = c;
+ buf[i] = c;
+ z.bytes_written += 1; offset += 1;
+ }
+ z.rolling_hash = hash.adler32(buf, z.rolling_hash);
+
+ _, e := z.output->impl_write(buf);
+ if e != .None {
+ return e;
+ }
+ return .None;
+}
+
+
allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_Table, err: Error) {
return new(Huffman_Table, allocator), nil;
}
build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Build Huffman Table"); }
sizes: [HUFFMAN_MAX_BITS+1]int;
next_code: [HUFFMAN_MAX_BITS]int;
@@ -195,6 +258,7 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
}
decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman Slow"); }
code := u16(compress.peek_bits_lsb(z, 16));
k := int(z_bit_reverse(code, 16));
@@ -225,6 +289,7 @@ decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err:
}
decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Decode Huffman"); }
if z.num_bits < 16 {
if z.num_bits == -100 {
return 0, E_ZLIB.Code_Buffer_Malformed;
@@ -244,6 +309,7 @@ decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #
}
parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Parse Huffman Block"); }
#no_bounds_check for {
value, e := decode_huffman(z, z_repeat);
if e != nil {
@@ -256,8 +322,8 @@ parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) ->
}
} else {
if value == 256 {
- // End of block
- return nil;
+ // End of block
+ return nil;
}
value -= 257;
@@ -294,24 +360,30 @@ parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) ->
Replicate the last outputted byte, length times.
*/
if length > 0 {
- b, e := compress.peek_back_byte(z, offset);
- if e != .None {
+ if offset >= 0 && offset < z.window_size {
+ c := z.last[offset];
+ e := repl_byte(z, length, c);
+ if e != .None {
+ return E_General.Output_Too_Short;
+ }
+ } else {
return E_General.Output_Too_Short;
}
- #no_bounds_check for _ in 0..<length {
- write_byte(z, b);
- }
}
} else {
if length > 0 {
- #no_bounds_check for _ in 0..<length {
- b, e := compress.peek_back_byte(z, offset);
- if e != .None {
- return E_General.Output_Too_Short;
- }
- write_byte(z, b);
- offset += 1;
+ e := repl_bytes(z, length, distance);
+ if e != .None {
+ return E_General.Output_Too_Short;
}
+ // #no_bounds_check for _ in 0..<length {
+ // b, e := compress.peek_back_byte(z, offset);
+ // if e != .None {
+ // return E_General.Output_Too_Short;
+ // }
+ // write_byte(z, b);
+ // offset += 1;
+ // }
}
}
}
@@ -378,7 +450,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
ctx.rolling_hash = 1;
}
- // Parse ZLIB stream without header.
+ // Parse ZLIB stream without header.
err = inflate_raw(ctx);
if err != nil {
return err;
@@ -397,6 +469,7 @@ inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := cont
// @(optimization_mode="speed")
inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Inflate Raw"); }
final := u32(0);
type := u32(0);
@@ -426,6 +499,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
if z.window_size == 0 {
z.window_size = DEFLATE_MAX_DISTANCE;
}
+ z.window_mask = z.window_size - 1;
// Allocate rolling window buffer.
last_b := mem.make_dynamic_array_len_cap([dynamic]u8, z.window_size, z.window_size, allocator);
@@ -440,6 +514,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
switch type {
case 0:
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Literal Block"); }
// Uncompressed block
// Discard bits until next byte boundary
@@ -468,6 +543,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
case 3:
return E_Deflate.BType_3;
case:
+ when #config(TRACY_ENABLE, false) { tracy.ZoneN("Huffman Block"); }
// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
if type == 1 {
// Use fixed code lengths.
@@ -531,7 +607,7 @@ inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) ->
case 18:
c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11);
case:
- return E_Deflate.Huffman_Bad_Code_Lengths;
+ return E_Deflate.Huffman_Bad_Code_Lengths;
}
if ntot - n < u32(c) {
diff --git a/core/image/common.odin b/core/image/common.odin
index 8443a2d22..7a678f5b0 100644
--- a/core/image/common.odin
+++ b/core/image/common.odin
@@ -1,5 +1,14 @@
package image
+/*
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-2 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation, optimization.
+ Ginger Bill: Cosmetic changes.
+*/
+
import "core:bytes"
import "core:mem"
@@ -66,10 +75,10 @@ Image_Option:
If the image has an alpha channel, drop it.
You may want to use `.alpha_premultiply` in this case.
- NOTE: For PNG, this also skips handling of the tRNS chunk, if present,
- unless you select `alpha_premultiply`.
- In this case it'll premultiply the specified pixels in question only,
- as the others are implicitly fully opaque.
+ NOTE: For PNG, this also skips handling of the tRNS chunk, if present,
+ unless you select `alpha_premultiply`.
+ In this case it'll premultiply the specified pixels in question only,
+ as the others are implicitly fully opaque.
`.alpha_premultiply`
If the image has an alpha channel, returns image data as follows:
diff --git a/core/image/png/example.odin b/core/image/png/example.odin
index 3dd4af2ff..3891a88e5 100644
--- a/core/image/png/example.odin
+++ b/core/image/png/example.odin
@@ -1,6 +1,17 @@
//+ignore
package png
+/*
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-2 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+ Ginger Bill: Cosmetic changes.
+
+ An example of how to use `png.load`.
+*/
+
import "core:compress"
import "core:image"
import "core:image/png"
diff --git a/core/image/png/helpers.odin b/core/image/png/helpers.odin
index 3a811f5c9..b28e4aead 100644
--- a/core/image/png/helpers.odin
+++ b/core/image/png/helpers.odin
@@ -1,5 +1,16 @@
package png
+/*
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-2 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+ Ginger Bill: Cosmetic changes.
+
+ These are a few useful utility functions to work with PNG images.
+*/
+
import "core:image"
import "core:compress/zlib"
import coretime "core:time"
@@ -8,10 +19,6 @@ import "core:bytes"
import "core:mem"
/*
- These are a few useful utility functions to work with PNG images.
-*/
-
-/*
Cleanup of image-specific data.
There are other helpers for cleanup of PNG-specific data.
Those are named *_destroy, where * is the name of the helper.
diff --git a/core/image/png/png.odin b/core/image/png/png.odin
index 18295793d..b4f25201f 100644
--- a/core/image/png/png.odin
+++ b/core/image/png/png.odin
@@ -1,5 +1,14 @@
package png
+/*
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-2 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+ Ginger Bill: Cosmetic changes.
+*/
+
import "core:compress"
import "core:compress/zlib"
import "core:image"