47 files changed, 4724 insertions, 1253 deletions
diff --git a/core/compress/common.odin b/core/compress/common.odin
new file mode 100644
index 000000000..a0e092643
--- /dev/null
+++ b/core/compress/common.odin
@@ -0,0 +1,196 @@
+package compress
+
+import "core:io"
+import "core:image"
+
+Error :: union {
+	General_Error,
+	Deflate_Error,
+	ZLIB_Error,
+	GZIP_Error,
+	ZIP_Error,
+	/*
+		This is here because png.load will return a this type of error union,
+		as it may involve an I/O error, a Deflate error, etc.
+	*/
+	image.Error,
+}
+
+General_Error :: enum {
+	File_Not_Found,
+	Cannot_Open_File,
+	File_Too_Short,
+	Stream_Too_Short,
+	Output_Too_Short,
+	Unknown_Compression_Method,
+	Checksum_Failed,
+	Incompatible_Options,
+	Unimplemented,
+}
+
+GZIP_Error :: enum {
+	Invalid_GZIP_Signature,
+	Reserved_Flag_Set,
+	Invalid_Extra_Data,
+	Original_Name_Too_Long,
+	Comment_Too_Long,
+	Payload_Length_Invalid,
+	Payload_CRC_Invalid,
+}
+
+ZIP_Error :: enum {
+	Invalid_ZIP_File_Signature,
+	Unexpected_Signature,
+	Insert_Next_Disk,
+	Expected_End_of_Central_Directory_Record,
+}
+
+ZLIB_Error :: enum {
+	Unsupported_Window_Size,
+	FDICT_Unsupported,
+	Unsupported_Compression_Level,
+	Code_Buffer_Malformed,
+}
+
+Deflate_Error :: enum {
+	Huffman_Bad_Sizes,
+	Huffman_Bad_Code_Lengths,
+	Inflate_Error,
+	Bad_Distance,
+	Bad_Huffman_Code,
+	Len_Nlen_Mismatch,
+	BType_3,
+}
+
+// General context for ZLIB, LZW, etc.
+Context :: struct {
+	code_buffer: u32,
+	num_bits: i8,
+	/*
+		num_bits will be set to -100 if the buffer is malformed
+	*/
+	eof: b8,
+
+	input: io.Stream,
+	output: io.Stream,
+	bytes_written: i64,
+	// Used to update hash as we write instead of all at once
+	rolling_hash: u32,
+
+	// Sliding window buffer. Size must be a power of two.
+	window_size: i64,
+	last: ^[dynamic]byte,
+}
+
+// Stream helpers
+/*
+	TODO: These need to be optimized.
+
+	Streams should really only check if a certain method is available once, perhaps even during setup.
+
+	Bit and byte readers may be merged so that reading bytes will grab them from the bit buffer first.
+	This simplifies end-of-stream handling where bits may be left in the bit buffer.
+*/
+
+read_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+	b := make([]u8, size_of(T), context.temp_allocator);
+	r, e1 := io.to_reader(c.input);
+	_, e2 := io.read(r, b);
+	if !e1 || e2 != .None {
+		return T{}, e2;
+	}
+
+	res = (^T)(raw_data(b))^;
+	return res, .None;
+}
+
+read_u8 :: #force_inline proc(z: ^Context) -> (res: u8, err: io.Error) {
+	return read_data(z, u8);
+}
+
+peek_data :: #force_inline proc(c: ^Context, $T: typeid) -> (res: T, err: io.Error) {
+	// Get current position to read from.
+	curr, e1 := c.input->impl_seek(0, .Current);
+	if e1 != .None {
+		return T{}, e1;
+	}
+	r, e2 := io.to_reader_at(c.input);
+	if !e2 {
+		return T{}, .Empty;
+	}
+	b := make([]u8, size_of(T), context.temp_allocator);
+	_, e3 := io.read_at(r, b, curr);
+	if e3 != .None {
+		return T{}, .Empty;
+	}
+
+	res = (^T)(raw_data(b))^;
+	return res, .None;
+}
+
+// Sliding window read back
+peek_back_byte :: proc(c: ^Context, offset: i64) -> (res: u8, err: io.Error) {
+	// Look back into the sliding window.
+	return c.last[offset % c.window_size], .None;
+}
+
+// Generalized bit reader LSB
+refill_lsb :: proc(z: ^Context, width := i8(24)) {
+	for {
+		if z.num_bits > width {
+			break;
+		}
+		if z.code_buffer == 0 && z.num_bits == -1 {
+			z.num_bits = 0;
+		}
+		if z.code_buffer >= 1 << uint(z.num_bits) {
+			// Code buffer is malformed.
+			z.num_bits = -100;
+        	return;
+		}
+		c, err := read_u8(z);
+		if err != .None {
+			// This is fine at the end of the file.
+			z.num_bits = -42;
+			z.eof = true;
+			return;
+		}
+		z.code_buffer |= (u32(c) << u8(z.num_bits));
+		z.num_bits += 8;
+	}
+}
+
+consume_bits_lsb :: #force_inline proc(z: ^Context, width: u8) {
+	z.code_buffer >>= width;
+	z.num_bits -= i8(width);
+}
+
+peek_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	if z.num_bits < i8(width) {
+		refill_lsb(z);
+	}
+	// assert(z.num_bits >= i8(width));
+	return z.code_buffer & ~(~u32(0) << width);
+}
+
+peek_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	assert(z.num_bits >= i8(width));
+	return z.code_buffer & ~(~u32(0) << width);
+}
+
+read_bits_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	k := peek_bits_lsb(z, width);
+	consume_bits_lsb(z, width);
+	return k;
+}
+
+read_bits_no_refill_lsb :: #force_inline proc(z: ^Context, width: u8) -> u32 {
+	k := peek_bits_no_refill_lsb(z, width);
+	consume_bits_lsb(z, width);
+	return k;
+}
+
+discard_to_next_byte_lsb :: proc(z: ^Context) {
+	discard := u8(z.num_bits & 7);
+	consume_bits_lsb(z, discard);
+}
diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin
new file mode 100644
index 000000000..54576c380
--- /dev/null
+++ b/core/compress/gzip/example.odin
@@ -0,0 +1,70 @@
+//+ignore
+package gzip
+
+import "core:compress/gzip"
+import "core:bytes"
+import "core:os"
+
+// Small GZIP file with fextra, fname and fcomment present.
+@private
+TEST: []u8 = {
+	0x1f, 0x8b, 0x08, 0x1c, 0xcb, 0x3b, 0x3a, 0x5a,
+	0x02, 0x03, 0x07, 0x00, 0x61, 0x62, 0x03, 0x00,
+	0x63, 0x64, 0x65, 0x66, 0x69, 0x6c, 0x65, 0x6e,
+	0x61, 0x6d, 0x65, 0x00, 0x54, 0x68, 0x69, 0x73,
+	0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x6f,
+	0x6d, 0x6d, 0x65, 0x6e, 0x74, 0x00, 0x2b, 0x48,
+	0xac, 0xcc, 0xc9, 0x4f, 0x4c, 0x01, 0x00, 0x15,
+	0x6a, 0x2c, 0x42, 0x07, 0x00, 0x00, 0x00,
+};
+
+main :: proc() {
+	// Set up output buffer.
+	buf: bytes.Buffer;
+	defer bytes.buffer_destroy(&buf);
+
+	stdout :: proc(s: string) {
+		os.write_string(os.stdout, s);
+	}
+	stderr :: proc(s: string) {
+		os.write_string(os.stderr, s);
+	}
+
+	args := os.args;
+
+	if len(args) < 2 {
+		stderr("No input file specified.\n");
+		err := gzip.load(TEST, &buf);
+		if err != nil {
+			stdout("Displaying test vector: ");
+			stdout(bytes.buffer_to_string(&buf));
+			stdout("\n");
+		}
+	}
+
+	// The rest are all files.
+	args = args[1:];
+	err: gzip.Error;
+
+	for file in args {
+		if file == "-" {
+			// Read from stdin
+			s := os.stream_from_handle(os.stdin);
+			err = gzip.load(s, &buf);
+		} else {
+			err = gzip.load(file, &buf);
+		}
+		if err != nil {
+			if err != E_General.File_Not_Found {
+				stderr("File not found: ");
+				stderr(file);
+				stderr("\n");
+				os.exit(1);
+			}
+			stderr("GZIP returned an error.\n");
+			os.exit(2);
+		}
+		stdout(bytes.buffer_to_string(&buf));
+	}
+	os.exit(0);
+}
diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin
new file mode 100644
index 000000000..2b5e513c7
--- /dev/null
+++ b/core/compress/gzip/gzip.odin
@@ -0,0 +1,313 @@
+package gzip
+
+import "core:compress/zlib"
+import "core:compress"
+import "core:os"
+import "core:io"
+import "core:bytes"
+import "core:hash"
+
+/*
+
+	This package implements support for the GZIP file format v4.3,
+	as specified in RFC 1952.
+
+	It is implemented in such a way that it lends itself naturally
+	to be the input to a complementary TAR implementation.
+
+*/
+
+Magic :: enum u16le {
+	GZIP = 0x8b << 8 | 0x1f,
+}
+
+Header :: struct #packed {
+	magic: Magic,
+	compression_method: Compression,
+	flags: Header_Flags,
+	modification_time: u32le,
+	xfl: Compression_Flags,
+	os: OS,
+}
+#assert(size_of(Header) == 10);
+
+Header_Flag :: enum u8 {
+	// Order is important
+	text       = 0,
+	header_crc = 1,
+	extra      = 2,
+	name       = 3,
+	comment    = 4,
+	reserved_1 = 5,
+	reserved_2 = 6,
+	reserved_3 = 7,
+}
+Header_Flags :: distinct bit_set[Header_Flag; u8];
+
+OS :: enum u8 {
+	FAT          = 0,
+	Amiga        = 1,
+	VMS          = 2,
+	Unix         = 3,
+	VM_CMS       = 4,
+	Atari_TOS    = 5,
+	HPFS         = 6,
+	Macintosh    = 7,
+	Z_System     = 8,
+	CP_M         = 9,
+	TOPS_20      = 10,
+	NTFS         = 11,
+	QDOS         = 12,
+	Acorn_RISCOS = 13,
+	_Unknown     = 14,
+	Unknown      = 255,
+}
+OS_Name :: #partial [OS]string{
+	.FAT          = "FAT",
+	.Amiga        = "Amiga",
+	.VMS          = "VMS/OpenVMS",
+	.Unix         = "Unix",
+	.VM_CMS       = "VM/CMS",
+	.Atari_TOS    = "Atari TOS",
+	.HPFS         = "HPFS",
+	.Macintosh    = "Macintosh",
+	.Z_System     = "Z-System",
+	.CP_M         = "CP/M",
+	.TOPS_20      = "TOPS-20",
+	.NTFS         = "NTFS",
+	.QDOS         = "QDOS",
+	.Acorn_RISCOS = "Acorn RISCOS",
+	.Unknown      = "Unknown",
+};
+
+Compression :: enum u8 {
+	DEFLATE = 8,
+}
+
+Compression_Flags :: enum u8 {
+	Maximum_Compression = 2,
+	Fastest_Compression = 4,
+}
+
+Error     :: compress.Error;
+E_General :: compress.General_Error;
+E_GZIP    :: compress.GZIP_Error;
+E_ZLIB    :: compress.ZLIB_Error;
+E_Deflate :: compress.Deflate_Error;
+
+load_from_slice :: proc(slice: []u8, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+
+	r := bytes.Reader{};
+	bytes.reader_init(&r, slice);
+	stream := bytes.reader_to_stream(&r);
+
+	err = load_from_stream(stream, buf, allocator);
+
+	return err;
+}
+
+load_from_file :: proc(filename: string, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+	data, ok := os.read_entire_file(filename, allocator);
+	defer delete(data);
+
+	err = E_General.File_Not_Found;
+	if ok {
+		err = load_from_slice(data, buf, allocator);
+	}
+	return;
+}
+
+load_from_stream :: proc(stream: io.Stream, buf: ^bytes.Buffer, allocator := context.allocator) -> (err: Error) {
+	ctx := compress.Context{
+		input  = stream,
+	};
+	buf := buf;
+	ws := bytes.buffer_to_stream(buf);
+	ctx.output = ws;
+
+	header, e := compress.read_data(&ctx, Header);
+	if e != .None {
+		return E_General.File_Too_Short;
+	}
+
+	if header.magic != .GZIP {
+		return E_GZIP.Invalid_GZIP_Signature;
+	}
+	if header.compression_method != .DEFLATE {
+		return E_General.Unknown_Compression_Method;
+	}
+
+	if header.os >= ._Unknown {
+		header.os = .Unknown;
+	}
+
+	if .reserved_1 in header.flags || .reserved_2 in header.flags || .reserved_3 in header.flags {
+		return E_GZIP.Reserved_Flag_Set;
+	}
+
+	// printf("signature: %v\n", header.magic);
+	// printf("compression: %v\n", header.compression_method);
+	// printf("flags: %v\n", header.flags);
+	// printf("modification time: %v\n", time.unix(i64(header.modification_time), 0));
+	// printf("xfl: %v (%v)\n", header.xfl, int(header.xfl));
+	// printf("os: %v\n", OS_Name[header.os]);
+
+	if .extra in header.flags {
+		xlen, e_extra := compress.read_data(&ctx, u16le);
+		if e_extra != .None {
+			return E_General.Stream_Too_Short;
+		}
+		// printf("Extra data present (%v bytes)\n", xlen);
+		if xlen < 4 {
+			// Minimum length is 2 for ID + 2 for a field length, if set to zero.
+			return E_GZIP.Invalid_Extra_Data;
+		}
+
+		field_id:     [2]u8;
+		field_length: u16le;
+		field_error: io.Error;
+
+		for xlen >= 4 {
+			// println("Parsing Extra field(s).");
+			field_id, field_error = compress.read_data(&ctx, [2]u8);
+			if field_error != .None {
+				// printf("Parsing Extra returned: %v\n", field_error);
+				return E_General.Stream_Too_Short;
+			}
+			xlen -= 2;
+
+			field_length, field_error = compress.read_data(&ctx, u16le);
+			if field_error != .None {
+				// printf("Parsing Extra returned: %v\n", field_error);
+				return E_General.Stream_Too_Short;
+			}
+			xlen -= 2;
+
+			if xlen <= 0 {
+				// We're not going to try and recover by scanning for a ZLIB header.
+				// Who knows what else is wrong with this file.
+				return E_GZIP.Invalid_Extra_Data;
+			}
+
+			// printf("    Field \"%v\" of length %v found: ", string(field_id[:]), field_length);
+			if field_length > 0 {
+				field_data := make([]u8, field_length, context.temp_allocator);
+				_, field_error = ctx.input->impl_read(field_data);
+				if field_error != .None {
+					// printf("Parsing Extra returned: %v\n", field_error);
+					return E_General.Stream_Too_Short;
+				}
+				xlen -= field_length;
+
+				// printf("%v\n", string(field_data));
+	 		}
+
+			if xlen != 0 {
+				return E_GZIP.Invalid_Extra_Data;
+			}
+		}
+	}
+
+	if .name in header.flags {
+		// Should be enough.
+		name: [1024]u8;
+		b: [1]u8;
+		i := 0;
+		name_error: io.Error;
+
+		for i < len(name) {
+			_, name_error = ctx.input->impl_read(b[:]);
+			if name_error != .None {
+				return E_General.Stream_Too_Short;
+			}
+			if b == 0 {
+				break;
+			}
+			name[i] = b[0];
+			i += 1;
+			if i >= len(name) {
+				return E_GZIP.Original_Name_Too_Long;
+			}
+		}
+		// printf("Original filename: %v\n", string(name[:i]));
+	}
+
+	if .comment in header.flags {
+		// Should be enough.
+		comment: [1024]u8;
+		b: [1]u8;
+		i := 0;
+		comment_error: io.Error;
+
+		for i < len(comment) {
+			_, comment_error = ctx.input->impl_read(b[:]);
+			if comment_error != .None {
+				return E_General.Stream_Too_Short;
+			}
+			if b == 0 {
+				break;
+			}
+			comment[i] = b[0];
+			i += 1;
+			if i >= len(comment) {
+				return E_GZIP.Comment_Too_Long;
+			}
+		}
+		// printf("Comment: %v\n", string(comment[:i]));
+	}
+
+	if .header_crc in header.flags {
+		crc16: [2]u8;
+		crc_error: io.Error;
+		_, crc_error = ctx.input->impl_read(crc16[:]);
+		if crc_error != .None {
+			return E_General.Stream_Too_Short;
+		}
+		/*
+			We don't actually check the CRC16 (lower 2 bytes of CRC32 of header data until the CRC field).
+			If we find a gzip file in the wild that sets this field, we can add proper support for it.
+		*/
+	}
+
+	/*
+		We should have arrived at the ZLIB payload.
+	*/
+
+	zlib_error := zlib.inflate_raw(&ctx);
+
+	// fmt.printf("ZLIB returned: %v\n", zlib_error);
+
+	if zlib_error != nil {
+		return zlib_error;
+	}
+
+	/*
+		Read CRC32 using the ctx bit reader because zlib may leave bytes in there.
+	*/
+	compress.discard_to_next_byte_lsb(&ctx);
+
+	payload_crc_b: [4]u8;
+	payload_len_b: [4]u8;
+	for i in 0..3 {
+		payload_crc_b[i] = u8(compress.read_bits_lsb(&ctx, 8));
+	}
+	payload_crc := transmute(u32le)payload_crc_b;
+	for i in 0..3 {
+		payload_len_b[i] = u8(compress.read_bits_lsb(&ctx, 8));
+	}
+	payload_len := int(transmute(u32le)payload_len_b);
+
+	payload := bytes.buffer_to_bytes(buf);
+	crc32 := u32le(hash.crc32(payload));
+
+	if crc32 != payload_crc {
+		return E_GZIP.Payload_CRC_Invalid;
+	}
+
+	if len(payload) != payload_len {
+		return E_GZIP.Payload_Length_Invalid;
+	}
+	return nil;
+}
+
+load :: proc{load_from_file, load_from_slice, load_from_stream};
diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin
new file mode 100644
index 000000000..9af61e4b3
--- /dev/null
+++ b/core/compress/zlib/example.odin
@@ -0,0 +1,42 @@
+//+ignore
+package zlib
+
+import "core:compress/zlib"
+import "core:bytes"
+import "core:fmt"
+
+main :: proc() {
+
+	ODIN_DEMO := []u8{
+		120, 156, 101, 144,  77, 110, 131,  48,  16, 133, 215, 204,  41, 158,  44,
+		 69,  73,  32, 148, 182,  75,  35,  14, 208, 125,  47,  96, 185, 195, 143,
+		130,  13,  50,  38,  81,  84, 101, 213,  75, 116, 215,  43, 246,   8,  53,
+		 82, 126,   8, 181, 188, 152, 153, 111, 222, 147, 159, 123, 165, 247, 170,
+		 98,  24, 213,  88, 162, 198, 244, 157, 243,  16, 186, 115,  44,  75, 227,
+		  5,  77, 115,  72, 137, 222, 117, 122, 179, 197,  39,  69, 161, 170, 156,
+		 50, 144,   5,  68, 130,   4,  49, 126, 127, 190, 191, 144,  34,  19,  57,
+		 69,  74, 235, 209, 140, 173, 242, 157, 155,  54, 158, 115, 162, 168,  12,
+		181, 239, 246, 108,  17, 188, 174, 242, 224,  20,  13, 199, 198, 235, 250,
+		194, 166, 129,  86,   3,  99, 157, 172,  37, 230,  62,  73, 129, 151, 252,
+		 70, 211,   5,  77,  31, 104, 188, 160, 113, 129, 215,  59, 205,  22,  52,
+		123, 160,  83, 142, 255, 242,  89, 123,  93, 149, 200,  50, 188,  85,  54,
+		252,  18, 248, 192, 238, 228, 235, 198,  86, 224, 118, 224, 176, 113, 166,
+		112,  67, 106, 227, 159, 122, 215,  88,  95, 110, 196, 123, 205, 183, 224,
+		 98,  53,   8, 104, 213, 234, 201, 147,   7, 248, 192,  14, 170,  29,  25,
+		171,  15,  18,  59, 138, 112,  63,  23, 205, 110, 254, 136, 109,  78, 231,
+		 63, 234, 138, 133, 204,
+	};
+
+	buf: bytes.Buffer;
+
+	// We can pass ", true" to inflate a raw DEFLATE stream instead of a ZLIB wrapped one.
+	err := zlib.inflate(ODIN_DEMO, &buf);
+	defer bytes.buffer_destroy(&buf);
+
+	if err != nil {
+		fmt.printf("\nError: %v\n", err);
+	}
+	s := bytes.buffer_to_string(&buf);
+	fmt.printf("Input: %v bytes, output (%v bytes):\n%v\n", len(ODIN_DEMO), len(s), s);
+	assert(len(s) == 438);
+}
diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin
new file mode 100644
index 000000000..bc19c37ef
--- /dev/null
+++ b/core/compress/zlib/zlib.odin
@@ -0,0 +1,606 @@
+package zlib
+
+import "core:compress"
+
+import "core:mem"
+import "core:io"
+import "core:bytes"
+import "core:hash"
+/*
+	zlib.inflate decompresses a ZLIB stream passed in as a []u8 or io.Stream.
+	Returns: Error.
+*/
+
+Context :: compress.Context;
+
+Compression_Method :: enum u8 {
+	DEFLATE  = 8,
+	Reserved = 15,
+}
+
+Compression_Level :: enum u8 {
+	Fastest = 0,
+	Fast    = 1,
+	Default = 2,
+	Maximum = 3,
+}
+
+Options :: struct {
+	window_size: u16,
+	level: u8,
+}
+
+Error     :: compress.Error;
+E_General :: compress.General_Error;
+E_ZLIB    :: compress.ZLIB_Error;
+E_Deflate :: compress.Deflate_Error;
+
+DEFLATE_MAX_CHUNK_SIZE   :: 65535;
+DEFLATE_MAX_LITERAL_SIZE :: 65535;
+DEFLATE_MAX_DISTANCE     :: 32768;
+DEFLATE_MAX_LENGTH       :: 258;
+
+HUFFMAN_MAX_BITS  :: 16;
+HUFFMAN_FAST_BITS :: 9;
+HUFFMAN_FAST_MASK :: ((1 << HUFFMAN_FAST_BITS) - 1);
+
+Z_LENGTH_BASE := [31]u16{
+	3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,
+	67,83,99,115,131,163,195,227,258,0,0,
+};
+
+Z_LENGTH_EXTRA := [31]u8{
+	0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0,
+};
+
+Z_DIST_BASE := [32]u16{
+	1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
+	257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0,
+};
+
+Z_DIST_EXTRA := [32]u8{
+	0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,0,0,
+};
+
+Z_LENGTH_DEZIGZAG := []u8{
+	16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15,
+};
+
+Z_FIXED_LENGTH := [288]u8{
+	8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+	8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+	8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+	8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+	8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+	9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+	9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+	9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+	7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,
+};
+
+Z_FIXED_DIST := [32]u8{
+	5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+};
+
+/*
+	Accelerate all cases in default tables.
+*/
+ZFAST_BITS :: 9;
+ZFAST_MASK :: ((1 << ZFAST_BITS) - 1);
+
+/*
+	ZLIB-style Huffman encoding.
+	JPEG packs from left, ZLIB from right. We can't share code.
+*/
+Huffman_Table :: struct {
+	fast:        [1 << ZFAST_BITS]u16,
+	firstcode:   [16]u16,
+	maxcode:     [17]int,
+	firstsymbol: [16]u16,
+	size:        [288]u8,
+	value:       [288]u16,
+};
+
+// Implementation starts here
+
+z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
+	assert(bits <= 16);
+	// NOTE: Can optimize with llvm.bitreverse.i64 or some bit twiddling
+	// by reversing all of the bits and masking out the unneeded ones.
+	r = n;
+	r = ((r & 0xAAAA) >>  1) | ((r & 0x5555) << 1);
+	r = ((r & 0xCCCC) >>  2) | ((r & 0x3333) << 2);
+	r = ((r & 0xF0F0) >>  4) | ((r & 0x0F0F) << 4);
+	r = ((r & 0xFF00) >>  8) | ((r & 0x00FF) << 8);
+
+	r >>= (16 - bits);
+	return;
+}
+
+write_byte :: #force_inline proc(z: ^Context, c: u8) -> (err: io.Error) #no_bounds_check {
+	c := c;
+	buf := transmute([]u8)mem.Raw_Slice{data=&c, len=1};
+	z.rolling_hash = hash.adler32(buf, z.rolling_hash);
+
+	_, e := z.output->impl_write(buf);
+	if e != .None {
+		return e;
+	}
+	z.last[z.bytes_written % z.window_size] = c;
+
+	z.bytes_written += 1;
+	return .None;
+}
+
+allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_Table, err: Error) {
+
+	z = new(Huffman_Table, allocator);
+	return z, nil;
+}
+
+build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
+	sizes:     [HUFFMAN_MAX_BITS+1]int;
+	next_code: [HUFFMAN_MAX_BITS]int;
+
+	k := int(0);
+
+	mem.zero_slice(sizes[:]);
+	mem.zero_slice(z.fast[:]);
+
+	for v, _ in code_lengths {
+		sizes[v] += 1;
+	}
+	sizes[0] = 0;
+
+	for i in 1..16 {
+		if sizes[i] > (1 << uint(i)) {
+			return E_Deflate.Huffman_Bad_Sizes;
+		}
+	}
+	code := int(0);
+
+	for i in 1..<16 {
+		next_code[i]     = code;
+		z.firstcode[i]   = u16(code);
+		z.firstsymbol[i] = u16(k);
+		code = code + sizes[i];
+		if sizes[i] != 0 {
+			if (code - 1 >= (1 << u16(i))) {
+				return E_Deflate.Huffman_Bad_Code_Lengths;
+			}
+		}
+		z.maxcode[i] = code << (16 - uint(i));
+		code <<= 1;
+		k += int(sizes[i]);
+	}
+
+	z.maxcode[16] = 0x10000; // Sentinel
+	c: int;
+
+	for v, ci in code_lengths {
+		if v != 0 {
+			c = next_code[v] - int(z.firstcode[v]) + int(z.firstsymbol[v]);
+			fastv := u16((u16(v) << 9) | u16(ci));
+			z.size[c]  = u8(v);
+			z.value[c] = u16(ci);
+			if (v <= ZFAST_BITS) {
+				j := z_bit_reverse(u16(next_code[v]), v);
+				for j < (1 << ZFAST_BITS) {
+					z.fast[j] = fastv;
+					j += (1 << v);
+				}
+			}
+			next_code[v] += 1;
+		}
+	}
+	return nil;
+}
+
+decode_huffman_slowpath :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+
+	r   = 0;
+	err = nil;
+
+	k: int;
+	s: u8;
+
+	code := u16(compress.peek_bits_lsb(z, 16));
+
+	k = int(z_bit_reverse(code, 16));
+
+	#no_bounds_check for s = HUFFMAN_FAST_BITS+1; ; {
+		if k < t.maxcode[s] {
+			break;
+		}
+		s += 1;
+	}
+	if (s >= 16) {
+		return 0, E_Deflate.Bad_Huffman_Code;
+	}
+	// code size is s, so:
+	b := (k >> (16-s)) - int(t.firstcode[s]) + int(t.firstsymbol[s]);
+	if b >= size_of(t.size) {
+		return 0, E_Deflate.Bad_Huffman_Code;
+	}
+	if t.size[b] != s {
+		return 0, E_Deflate.Bad_Huffman_Code;
+	}
+
+	compress.consume_bits_lsb(z, s);
+
+	r = t.value[b];
+	return r, nil;
+}
+
+decode_huffman :: proc(z: ^Context, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
+
+	if z.num_bits < 16 {
+		if z.num_bits == -100 {
+			return 0, E_ZLIB.Code_Buffer_Malformed;
+		}
+		compress.refill_lsb(z);
+		if z.eof {
+			return 0, E_General.Stream_Too_Short;
+		}
+	}
+	#no_bounds_check b := t.fast[z.code_buffer & ZFAST_MASK];
+	if b != 0 {
+		s := u8(b >> ZFAST_BITS);
+		compress.consume_bits_lsb(z, s);
+		return b & 511, nil;
+	}
+	return decode_huffman_slowpath(z, t);
+}
+
+parse_huffman_block :: proc(z: ^Context, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
+	#no_bounds_check for {
+		value, e := decode_huffman(z, z_repeat);
+		if e != nil {
+			return err;
+		}
+		if value < 256 {
+			e := write_byte(z, u8(value));
+			if e != .None {
+				return E_General.Output_Too_Short;
+			}
+		} else {
+			if value == 256 {
+      				// End of block
+      				return nil;
+			}
+
+			value -= 257;
+			length := Z_LENGTH_BASE[value];
+			if Z_LENGTH_EXTRA[value] > 0 {
+				length += u16(compress.read_bits_lsb(z, Z_LENGTH_EXTRA[value]));
+			}
+
+			value, e = decode_huffman(z, z_offset);
+			if e != nil {
+				return E_Deflate.Bad_Huffman_Code;
+			}
+
+			distance := Z_DIST_BASE[value];
+			if Z_DIST_EXTRA[value] > 0 {
+				distance += u16(compress.read_bits_lsb(z, Z_DIST_EXTRA[value]));
+			}
+
+			if z.bytes_written < i64(distance) {
+				// Distance is longer than we've decoded so far.
+				return E_Deflate.Bad_Distance;
+			}
+
+			offset := i64(z.bytes_written - i64(distance));
+			/*
+				These might be sped up with a repl_byte call that copies
+				from the already written output more directly, and that
+				update the Adler checksum once after.
+
+				That way we'd suffer less Stream vtable overhead.
+			*/
+			if distance == 1 {
+				/*
+					Replicate the last outputted byte, length times.
+				*/
+				if length > 0 {
+					b, e := compress.peek_back_byte(z, offset);
+					if e != .None {
+						return E_General.Output_Too_Short;
+					}
+					#no_bounds_check for _ in 0..<length {
+						write_byte(z, b);
+					}
+				}
+			} else {
+				if length > 0 {
+					#no_bounds_check for _ in 0..<length {
+						b, e := compress.peek_back_byte(z, offset);
+						if e != .None {
+							return E_General.Output_Too_Short;
+						}
+						write_byte(z, b);
+						offset += 1;
+					}
+				}
+			}
+		}
+	}
+}
+
+inflate_from_stream :: proc(using ctx: ^Context, raw := false, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+	/*
+		ctx.input must be an io.Stream backed by an implementation that supports:
+		- read
+		- size
+
+		ctx.output must be an io.Stream backed by an implementation that supports:
+		- write
+
+		raw determines whether the ZLIB header is processed, or we're inflating a raw
+		DEFLATE stream.
+	*/
+
+	if !raw {
+		data_size := io.size(ctx.input);
+		if data_size < 6 {
+			return E_General.Stream_Too_Short;
+		}
+
+		cmf, _ := compress.read_u8(ctx);
+
+		method := Compression_Method(cmf & 0xf);
+		if method != .DEFLATE {
+			return E_General.Unknown_Compression_Method;
+		}
+
+		cinfo  := (cmf >> 4) & 0xf;
+		if cinfo > 7 {
+			return E_ZLIB.Unsupported_Window_Size;
+		}
+		ctx.window_size = 1 << (cinfo + 8);
+
+		flg, _ := compress.read_u8(ctx);
+
+		fcheck  := flg & 0x1f;
+		fcheck_computed := (cmf << 8 | flg) & 0x1f;
+		if fcheck != fcheck_computed {
+			return E_General.Checksum_Failed;
+		}
+
+		fdict   := (flg >> 5) & 1;
+		/*
+			We don't handle built-in dictionaries for now.
+			They're application specific and PNG doesn't use them.
+		*/
+		if fdict != 0 {
+			return E_ZLIB.FDICT_Unsupported;
+		}
+
+		// flevel  := Compression_Level((flg >> 6) & 3);
+		/*
+			Inflate can consume bits belonging to the Adler checksum.
+			We pass the entire stream to Inflate and will unget bytes if we need to
+			at the end to compare checksums.
+		*/
+
+		// Seed the Adler32 rolling checksum.
+		ctx.rolling_hash = 1;
+	}
+
+ 	// Parse ZLIB stream without header.
+	err = inflate_raw(ctx);
+	if err != nil {
+		return err;
+	}
+
+	if !raw {
+		compress.discard_to_next_byte_lsb(ctx);
+
+		adler32 := compress.read_bits_lsb(ctx, 8) << 24 | compress.read_bits_lsb(ctx, 8) << 16 | compress.read_bits_lsb(ctx, 8) << 8 | compress.read_bits_lsb(ctx, 8);
+		if ctx.rolling_hash != u32(adler32) {
+			return E_General.Checksum_Failed;
+		}
+	}
+	return nil;
+}
+
+// @(optimization_mode="speed")
+inflate_from_stream_raw :: proc(z: ^Context, allocator := context.allocator) -> (err: Error) #no_bounds_check {
+	final := u32(0);
+	type := u32(0);
+
+	z.num_bits = 0;
+	z.code_buffer = 0;
+
+	z_repeat:      ^Huffman_Table;
+	z_offset:      ^Huffman_Table;
+	codelength_ht: ^Huffman_Table;
+
+	z_repeat, err = allocate_huffman_table(allocator=context.allocator);
+	if err != nil {
+		return err;
+	}
+	z_offset, err = allocate_huffman_table(allocator=context.allocator);
+	if err != nil {
+		return err;
+	}
+	codelength_ht, err = allocate_huffman_table(allocator=context.allocator);
+	if err != nil {
+		return err;
+	}
+	defer free(z_repeat);
+	defer free(z_offset);
+	defer free(codelength_ht);
+
+	if z.window_size == 0 {
+		z.window_size = DEFLATE_MAX_DISTANCE;
+	}
+
+	// Allocate rolling window buffer.
+	last_b := mem.make_dynamic_array_len_cap([dynamic]u8, z.window_size, z.window_size, allocator);
+	z.last = &last_b;
+	defer delete(last_b);
+
+	for {
+		final = compress.read_bits_lsb(z, 1);
+		type  = compress.read_bits_lsb(z, 2);
+
+		// fmt.printf("Final: %v | Type: %v\n", final, type);
+
+		switch type {
+		case 0:
+			// Uncompressed block
+
+			// Discard bits until next byte boundary
+			compress.discard_to_next_byte_lsb(z);
+
+			uncompressed_len  := i16(compress.read_bits_lsb(z, 16));
+			length_check      := i16(compress.read_bits_lsb(z, 16));
+
+			// fmt.printf("LEN: %v, ~LEN: %v, NLEN: %v, ~NLEN: %v\n", uncompressed_len, ~uncompressed_len, length_check, ~length_check);
+
+
+			if ~uncompressed_len != length_check {
+				return E_Deflate.Len_Nlen_Mismatch;
+			}
+
+			/*
+				TODO: Maybe speed this up with a stream-to-stream copy (read_from)
+				and a single Adler32 update after.
+			*/
+			#no_bounds_check for uncompressed_len > 0 {
+				compress.refill_lsb(z);
+				lit := compress.read_bits_lsb(z, 8);
+				write_byte(z, u8(lit));
+				uncompressed_len -= 1;
+			}
+		case 3:
+			return E_Deflate.BType_3;
+		case:
+			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
+			if type == 1 {
+				// Use fixed code lengths.
+				err = build_huffman(z_repeat, Z_FIXED_LENGTH[:]);
+				if err != nil {
+					return err;
+				}
+				err = build_huffman(z_offset, Z_FIXED_DIST[:]);
+				if err != nil {
+					return err;
+				}
+			} else {
+				lencodes: [286+32+137]u8;
+				codelength_sizes: [19]u8;
+
+				//i: u32;
+				n: u32;
+
+				compress.refill_lsb(z, 14);
+				hlit  := compress.read_bits_no_refill_lsb(z, 5) + 257;
+				hdist := compress.read_bits_no_refill_lsb(z, 5) + 1;
+				hclen := compress.read_bits_no_refill_lsb(z, 4) + 4;
+				ntot  := hlit + hdist;
+
+				#no_bounds_check for i in 0..<hclen {
+					s := compress.read_bits_lsb(z, 3);
+					codelength_sizes[Z_LENGTH_DEZIGZAG[i]] = u8(s);
+				}
+				err = build_huffman(codelength_ht, codelength_sizes[:]);
+				if err != nil {
+					return err;
+				}
+
+				n = 0;
+				c: u16;
+
+				for n < ntot {
+					c, err = decode_huffman(z, codelength_ht);
+					if err != nil {
+						return err;
+					}
+
+					if c < 0 || c >= 19 {
+						return E_Deflate.Huffman_Bad_Code_Lengths;
+					}
+					if c < 16 {
+						lencodes[n] = u8(c);
+						n += 1;
+					} else {
+						fill := u8(0);
+						compress.refill_lsb(z, 7);
+						switch c {
+						case 16:
+							c = u16(compress.read_bits_no_refill_lsb(z, 2) + 3);
+							if n == 0 {
+								return E_Deflate.Huffman_Bad_Code_Lengths;
+							}
+							fill = lencodes[n - 1];
+						case 17:
+							c = u16(compress.read_bits_no_refill_lsb(z, 3) + 3);
+						case 18:
+							c = u16(compress.read_bits_no_refill_lsb(z, 7) + 11);
+						case:
+					         	return E_Deflate.Huffman_Bad_Code_Lengths;
+						}
+
+						if ntot - n < u32(c) {
+							return E_Deflate.Huffman_Bad_Code_Lengths;
+						}
+
+						nc := n + u32(c);
+						#no_bounds_check for ; n < nc; n += 1 {
+							lencodes[n] = fill;
+						}
+					}
+				}
+
+				if n != ntot {
+					return E_Deflate.Huffman_Bad_Code_Lengths;
+				}
+
+				err = build_huffman(z_repeat, lencodes[:hlit]);
+				if err != nil {
+					return err;
+				}
+
+				err = build_huffman(z_offset, lencodes[hlit:ntot]);
+				if err != nil {
+					return err;
+				}
+			}
+			err = parse_huffman_block(z, z_repeat, z_offset);
+			// log.debugf("Err: %v | Final: %v | Type: %v\n", err, final, type);
+			if err != nil {
+				return err;
+			}
+		}
+		if final == 1 {
+			break;
+		}
+	}
+	return nil;
+}
+
+inflate_from_byte_array :: proc(input: []u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) {
+	ctx := Context{};
+
+	r := bytes.Reader{};
+	bytes.reader_init(&r, input);
+	rs := bytes.reader_to_stream(&r);
+	ctx.input = rs;
+
+	buf := buf;
+	ws := bytes.buffer_to_stream(buf);
+	ctx.output = ws;
+
+	err = inflate_from_stream(&ctx, raw);
+
+	return err;
+}
+
+inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, raw := false) -> (err: Error) {
+	return inflate_from_byte_array(input, buf, true);
+}
+
+inflate     :: proc{inflate_from_stream, inflate_from_byte_array};
+inflate_raw :: proc{inflate_from_stream_raw, inflate_from_byte_array_raw};
diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin
index 6de6b0245..3b3716a15 100644
--- a/core/fmt/fmt.odin
+++ b/core/fmt/fmt.odin
@@ -641,9 +641,9 @@ fmt_write_padding :: proc(fi: ^Info, width: int) {
 		return;
 	}
 
-	pad_byte: byte = '0';
-	if fi.space {
-		pad_byte = ' ';
+	pad_byte: byte = ' ';
+	if !fi.space {
+		pad_byte = '0';
 	}
 
 	for i := 0; i < width; i += 1 {
@@ -1908,17 +1908,6 @@ fmt_value :: proc(fi: ^Info, v: any, verb: rune) {
 		}
 
 	}
-
-	handle_relative_pointer :: proc(ptr: ^$T) -> rawptr where intrinsics.type_is_integer(T) {
-		if ptr^ == 0 {
-			return nil;
-		}
-		when intrinsics.type_is_unsigned(T) {
-			return rawptr(uintptr(ptr) + uintptr(ptr^));
-		} else {
-			return rawptr(uintptr(ptr) + uintptr(i64(ptr^)));
-		}
-	}
 }
 
 fmt_complex :: proc(fi: ^Info, c: complex128, bits: int, verb: rune) {
diff --git a/core/image/common.odin b/core/image/common.odin
new file mode 100644
index 000000000..9024ec769
--- /dev/null
+++ b/core/image/common.odin
@@ -0,0 +1,204 @@
+package image
+
+import "core:bytes"
+import "core:mem"
+
+Image :: struct {
+	width:      int,
+	height:     int,
+	channels:   int,
+	depth:      u8,
+	pixels:     bytes.Buffer,
+	/*
+		Some image loaders/writers can return/take an optional background color.
+		For convenience, we return them as u16 so we don't need to switch on the type
+		in our viewer, and can just test against nil.
+	*/
+	background: Maybe([3]u16),
+	sidecar:    any,
+}
+
+/*
+	IMPORTANT: `.do_not_expand_*` options currently skip handling of the `alpha_*` options,
+		therefore Gray+Alpha will be returned as such even if you add `.alpha_drop_if_present`,
+		and `.alpha_add_if_missing` and keyed transparency will likewise be ignored.
+
+		The same goes for indexed images. This will be remedied in a near future update.
+*/
+
+/*
+Image_Option:
+	`.info`
+		This option behaves as `.return_ihdr` and `.do_not_decompress_image` and can be used
+		to gather an image's dimensions and color information.
+
+	`.return_header`
+		Fill out img.sidecar.header with the image's format-specific header struct.
+		If we only care about the image specs, we can set `.return_header` +
+		`.do_not_decompress_image`, or `.info`, which works as if both of these were set.
+
+	`.return_metadata`
+		Returns all chunks not needed to decode the data.
+		It also returns the header as if `.return_header` was set.
+
+	`.do_not_decompress_image`
+		Skip decompressing IDAT chunk, defiltering and the rest.
+
+	`.do_not_expand_grayscale`
+		Do not turn grayscale (+ Alpha) images into RGB(A).
+		Returns just the 1 or 2 channels present, although 1, 2 and 4 bit are still scaled to 8-bit.
+
+	`.do_not_expand_indexed`
+		Do not turn indexed (+ Alpha) images into RGB(A).
+		Returns just the 1 or 2 (with `tRNS`) channels present.
+		Make sure to use `return_metadata` to also return the palette chunk so you can recolor it yourself.
+
+	`.do_not_expand_channels`
+		Applies both `.do_not_expand_grayscale` and `.do_not_expand_indexed`.
+
+	`.alpha_add_if_missing`
+		If the image has no alpha channel, it'll add one set to max(type).
+		Turns RGB into RGBA and Gray into Gray+Alpha
+
+	`.alpha_drop_if_present`
+		If the image has an alpha channel, drop it.
+		You may want to use `.alpha_premultiply` in this case.
+
+        NOTE: For PNG, this also skips handling of the tRNS chunk, if present,
+        unless you select `alpha_premultiply`.
+        In this case it'll premultiply the specified pixels in question only,
+        as the others are implicitly fully opaque.	
+
+	`.alpha_premultiply`
+		If the image has an alpha channel, returns image data as follows:
+			RGB  *= A, Gray = Gray *= A
+
+	`.blend_background`
+		If a bKGD chunk is present in a PNG, we normally just set `img.background`
+		with its value and leave it up to the application to decide how to display the image,
+		as per the PNG specification.
+
+		With `.blend_background` selected, we blend the image against the background
+		color. As this negates the use for an alpha channel, we'll drop it _unless_
+		you also specify `.alpha_add_if_missing`.
+
+	Options that don't apply to an image format will be ignored by their loader.
+*/
+
+Option :: enum {
+	info = 0,
+	do_not_decompress_image,
+	return_header,
+	return_metadata,
+	alpha_add_if_missing,
+	alpha_drop_if_present,
+	alpha_premultiply,
+	blend_background,
+	// Unimplemented
+	do_not_expand_grayscale,
+	do_not_expand_indexed,
+	do_not_expand_channels,
+}
+Options :: distinct bit_set[Option];
+
+Error :: enum {
+	Invalid_PNG_Signature,
+	IHDR_Not_First_Chunk,
+	IHDR_Corrupt,
+	IDAT_Missing,
+	IDAT_Must_Be_Contiguous,
+	IDAT_Corrupt,
+	PNG_Does_Not_Adhere_to_Spec,
+	PLTE_Encountered_Unexpectedly,
+	PLTE_Invalid_Length,
+	TRNS_Encountered_Unexpectedly,
+	BKGD_Invalid_Length,
+	Invalid_Image_Dimensions,
+	Unknown_Color_Type,
+	Invalid_Color_Bit_Depth_Combo,
+	Unknown_Filter_Method,
+	Unknown_Interlace_Method,
+	Requested_Channel_Not_Present,
+	Post_Processing_Error,
+}
+
+/*
+	Functions to help with image buffer calculations
+*/
+
+compute_buffer_size :: proc(width, height, channels, depth: int, extra_row_bytes := int(0)) -> (size: int) {
+
+	size = ((((channels * width * depth) + 7) >> 3) + extra_row_bytes) * height;
+	return;
+}
+
+/*
+	For when you have an RGB(A) image, but want a particular channel.
+*/
+
+Channel :: enum u8 {
+	R = 1,
+	G = 2,
+	B = 3,
+	A = 4,
+}
+
+return_single_channel :: proc(img: ^Image, channel: Channel) -> (res: ^Image, ok: bool) {
+
+	ok = false;
+	t: bytes.Buffer;
+
+	idx := int(channel);
+
+	if img.channels == 2 && idx == 4 {
+		// Alpha requested, which in a two channel image is index 2: G.
+		idx = 2;
+	}
+
+	if idx > img.channels {
+		return {}, false;
+	}
+
+	switch(img.depth) {
+		case 8:
+			buffer_size := compute_buffer_size(img.width, img.height, 1, 8);
+			t = bytes.Buffer{};
+			resize(&t.buf, buffer_size);
+
+			i := bytes.buffer_to_bytes(&img.pixels);
+			o := bytes.buffer_to_bytes(&t);
+
+			for len(i) > 0 {
+				o[0] = i[idx];
+				i = i[img.channels:];
+				o = o[1:];
+			}
+		case 16:
+			buffer_size := compute_buffer_size(img.width, img.height, 2, 8);
+			t = bytes.Buffer{};
+			resize(&t.buf, buffer_size);
+
+			i := mem.slice_data_cast([]u16, img.pixels.buf[:]);
+			o := mem.slice_data_cast([]u16, t.buf[:]);
+
+			for len(i) > 0 {
+				o[0] = i[idx];
+				i = i[img.channels:];
+				o = o[1:];
+			}
+		case 1, 2, 4:
+			// We shouldn't see this case, as the loader already turns these into 8-bit.
+			return {}, false;
+	}
+
+	res = new(Image);
+	res.width      = img.width;
+	res.height     = img.height;
+	res.channels   = 1;
+	res.depth      = img.depth;
+	res.pixels     = t;
+	res.background = img.background;
+	res.sidecar    = img.sidecar;
+
+	return res, true;
+}
diff --git a/core/image/png/example.odin b/core/image/png/example.odin
new file mode 100644
index 000000000..59a4cfd42
--- /dev/null
+++ b/core/image/png/example.odin
@@ -0,0 +1,327 @@
+//+ignore
+package png
+
+import "core:compress"
+import "core:image"
+import "core:image/png"
+import "core:bytes"
+import "core:fmt"
+
+// For PPM writer
+import "core:mem"
+import "core:os"
+
+main :: proc() {
+	file: string;
+
+	options := image.Options{};
+	err:       compress.Error;
+	img:      ^image.Image;
+
+	file = "../../../misc/logo-slim.png";
+
+	img, err = png.load(file, options);
+	defer png.destroy(img);
+
+	if err != nil {
+		fmt.printf("Trying to read PNG file %v returned %v\n", file, err);
+	} else {
+		v:  png.Info;
+		ok: bool;
+
+		fmt.printf("Image: %vx%vx%v, %v-bit.\n", img.width, img.height, img.channels, img.depth);
+
+		if v, ok = img.sidecar.(png.Info); ok {
+			// Handle ancillary chunks as you wish.
+			// We provide helper functions for a few types.
+			for c in v.chunks {
+				#partial switch (c.header.type) {
+					case .tIME:
+						t, _ := png.core_time(c);
+						fmt.printf("[tIME]: %v\n", t);
+					case .gAMA:
+						fmt.printf("[gAMA]: %v\n", png.gamma(c));
+					case .pHYs:
+						phys := png.phys(c);
+						if phys.unit == .Meter {
+							xm    := f32(img.width)  / f32(phys.ppu_x);
+							ym    := f32(img.height) / f32(phys.ppu_y);
+							dpi_x, dpi_y := png.phys_to_dpi(phys);
+							fmt.printf("[pHYs] Image resolution is %v x %v pixels per meter.\n", phys.ppu_x, phys.ppu_y);
+							fmt.printf("[pHYs] Image resolution is %v x %v DPI.\n", dpi_x, dpi_y);
+							fmt.printf("[pHYs] Image dimensions are %v x %v meters.\n", xm, ym);
+						} else {
+							fmt.printf("[pHYs] x: %v, y: %v pixels per unknown unit.\n", phys.ppu_x, phys.ppu_y);
+						}
+					case .iTXt, .zTXt, .tEXt:
+						res, ok_text := png.text(c);
+						if ok_text {
+							if c.header.type == .iTXt {
+								fmt.printf("[iTXt] %v (%v:%v): %v\n", res.keyword, res.language, res.keyword_localized, res.text);
+							} else {
+								fmt.printf("[tEXt/zTXt] %v: %v\n", res.keyword, res.text);
+							}
+						}
+						defer png.text_destroy(res);
+					case .bKGD:
+						fmt.printf("[bKGD] %v\n", img.background);
+					case .eXIf:
+						res, ok_exif := png.exif(c);
+						if ok_exif {
+							/*
+								Other than checking the signature and byte order, we don't handle Exif data.
+								If you wish to interpret it, pass it to an Exif parser.
+							*/
+							fmt.printf("[eXIf] %v\n", res);
+						}
+					case .PLTE:
+						plte, plte_ok := png.plte(c);
+						if plte_ok {
+							fmt.printf("[PLTE] %v\n", plte);
+						} else {
+							fmt.printf("[PLTE] Error\n");
+						}
+					case .hIST:
+						res, ok_hist := png.hist(c);
+						if ok_hist {
+							fmt.printf("[hIST] %v\n", res);
+						}
+					case .cHRM:
+						res, ok_chrm := png.chrm(c);
+						if ok_chrm {
+							fmt.printf("[cHRM] %v\n", res);
+						}
+					case .sPLT:
+						res, ok_splt := png.splt(c);
+						if ok_splt {
+							fmt.printf("[sPLT] %v\n", res);
+						}
+						png.splt_destroy(res);
+					case .sBIT:
+						if res, ok_sbit := png.sbit(c); ok_sbit {
+							fmt.printf("[sBIT] %v\n", res);
+						}
+					case .iCCP:
+						res, ok_iccp := png.iccp(c);
+						if ok_iccp {
+							fmt.printf("[iCCP] %v\n", res);
+						}
+						png.iccp_destroy(res);
+					case .sRGB:
+						if res, ok_srgb := png.srgb(c); ok_srgb {
+							fmt.printf("[sRGB] Rendering intent: %v\n", res);
+						}
+					case:
+						type := c.header.type;
+						name := png.chunk_type_to_name(&type);
+						fmt.printf("[%v]: %v\n", name, c.data);
+				}
+			}
+		}
+	}
+
+	if err == nil && .do_not_decompress_image not_in options && .info not_in options {
+		if ok := write_image_as_ppm("out.ppm", img); ok {
+			fmt.println("Saved decoded image.");
+		} else {
+			fmt.println("Error saving out.ppm.");
+			fmt.println(img);
+		}
+	}
+}
+
+// Crappy PPM writer used during testing. Don't use in production.
+write_image_as_ppm :: proc(filename: string, image: ^image.Image) -> (success: bool) {
+
+	_bg :: proc(bg: Maybe([3]u16), x, y: int, high := true) -> (res: [3]u16) {
+		if v, ok := bg.?; ok {
+			res = v;
+		} else {
+			if high {
+				l := u16(30 * 256 + 30);
+
+				if (x & 4 == 0) ~ (y & 4 == 0) {
+					res = [3]u16{l, 0, l};
+				} else {
+					res = [3]u16{l >> 1, 0, l >> 1};
+				}
+			} else {
+				if (x & 4 == 0) ~ (y & 4 == 0) {
+					res = [3]u16{30, 30, 30};
+				} else {
+					res = [3]u16{15, 15, 15};
+				}
+			}
+		}
+		return;
+	}
+
+	// profiler.timed_proc();
+	using image;
+	using os;
+
+	flags: int = O_WRONLY|O_CREATE|O_TRUNC;
+
+	img := image;
+
+	// PBM 16-bit images are big endian
+	when ODIN_ENDIAN == "little" {
+		if img.depth == 16 {
+			// The pixel components are in Big Endian. Let's byteswap back.
+			input  := mem.slice_data_cast([]u16,   img.pixels.buf[:]);
+			output := mem.slice_data_cast([]u16be, img.pixels.buf[:]);
+			#no_bounds_check for v, i in input {
+				output[i] = u16be(v);
+			}
+		}
+	}
+
+	pix := bytes.buffer_to_bytes(&img.pixels);
+
+	if len(pix) == 0 || len(pix) < image.width * image.height * int(image.channels) {
+		return false;
+	}
+
+	mode: int = 0;
+	when ODIN_OS == "linux" || ODIN_OS == "darwin" {
+		// NOTE(justasd): 644 (owner read, write; group read; others read)
+		mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+	}
+
+	fd, err := open(filename, flags, mode);
+	if err != 0 {
+		return false;
+	}
+	defer close(fd);
+
+	write_string(fd,
+		fmt.tprintf("P6\n%v %v\n%v\n", width, height, (1 << depth -1)),
+	);
+
+	if channels == 3 {
+		// We don't handle transparency here...
+		write_ptr(fd, raw_data(pix), len(pix));
+	} else {
+		bpp := depth == 16 ? 2 : 1;
+		bytes_needed := width * height * 3 * bpp;
+
+		op := bytes.Buffer{};
+		bytes.buffer_init_allocator(&op, bytes_needed, bytes_needed);
+		defer bytes.buffer_destroy(&op);
+
+		if channels == 1 {
+			if depth == 16 {
+				assert(len(pix) == width * height * 2);
+				p16 := mem.slice_data_cast([]u16, pix);
+				o16 := mem.slice_data_cast([]u16, op.buf[:]);
+				#no_bounds_check for len(p16) != 0 {
+					r := u16(p16[0]);
+					o16[0] = r;
+					o16[1] = r;
+					o16[2] = r;
+					p16 = p16[1:];
+					o16 = o16[3:];
+				}
+			} else {
+				o := 0;
+				for i := 0; i < len(pix); i += 1 {
+					r := pix[i];
+					op.buf[o  ] = r;
+					op.buf[o+1] = r;
+					op.buf[o+2] = r;
+					o += 3;
+				}
+			}
+			write_ptr(fd, raw_data(op.buf), len(op.buf));
+		} else if channels == 2 {
+			if depth == 16 {
+				p16 := mem.slice_data_cast([]u16, pix);
+				o16 := mem.slice_data_cast([]u16, op.buf[:]);
+
+				bgcol := img.background;
+
+				#no_bounds_check for len(p16) != 0 {
+					r  := f64(u16(p16[0]));
+					bg:   f64;
+					if bgcol != nil {
+						v := bgcol.([3]u16)[0];
+						bg = f64(v);
+					}
+					a  := f64(u16(p16[1])) / 65535.0;
+					l  := (a * r) + (1 - a) * bg;
+
+					o16[0] = u16(l);
+					o16[1] = u16(l);
+					o16[2] = u16(l);
+
+					p16 = p16[2:];
+					o16 = o16[3:];
+				}
+			} else {
+				o := 0;
+				for i := 0; i < len(pix); i += 2 {
+					r := pix[i]; a := pix[i+1]; a1 := f32(a) / 255.0;
+					c := u8(f32(r) * a1);
+					op.buf[o  ] = c;
+					op.buf[o+1] = c;
+					op.buf[o+2] = c;
+					o += 3;
+				}
+			}
+			write_ptr(fd, raw_data(op.buf), len(op.buf));
+		} else if channels == 4 {
+			if depth == 16 {
+				p16 := mem.slice_data_cast([]u16be, pix);
+				o16 := mem.slice_data_cast([]u16be, op.buf[:]);
+
+				#no_bounds_check for len(p16) != 0 {
+
+					bg := _bg(img.background, 0, 0);
+					r     := f32(p16[0]);
+					g     := f32(p16[1]);
+					b     := f32(p16[2]);
+					a     := f32(p16[3]) / 65535.0;
+
+					lr  := (a * r) + (1 - a) * f32(bg[0]);
+					lg  := (a * g) + (1 - a) * f32(bg[1]);
+					lb  := (a * b) + (1 - a) * f32(bg[2]);
+
+					o16[0] = u16be(lr);
+					o16[1] = u16be(lg);
+					o16[2] = u16be(lb);
+
+					p16 = p16[4:];
+					o16 = o16[3:];
+				}
+			} else {
+				o := 0;
+
+				for i := 0; i < len(pix); i += 4 {
+
+					x := (i / 4)  % width;
+					y := i / width / 4;
+
+					_b := _bg(img.background, x, y, false);
+					bgcol := [3]u8{u8(_b[0]), u8(_b[1]), u8(_b[2])};
+
+					r := f32(pix[i]);
+					g := f32(pix[i+1]);
+					b := f32(pix[i+2]);
+					a := f32(pix[i+3]) / 255.0;
+
+					lr := u8(f32(r) * a + (1 - a) * f32(bgcol[0]));
+					lg := u8(f32(g) * a + (1 - a) * f32(bgcol[1]));
+					lb := u8(f32(b) * a + (1 - a) * f32(bgcol[2]));
+					op.buf[o  ] = lr;
+					op.buf[o+1] = lg;
+					op.buf[o+2] = lb;
+					o += 3;
+				}
+			}
+			write_ptr(fd, raw_data(op.buf), len(op.buf));
+		} else {
+			return false;
+		}
+	}
+	return true;
+}
diff --git a/core/image/png/helpers.odin b/core/image/png/helpers.odin
new file mode 100644
index 000000000..0975d1d87
--- /dev/null
+++ b/core/image/png/helpers.odin
@@ -0,0 +1,516 @@
+package png
+
+import "core:image"
+import "core:compress/zlib"
+import coretime "core:time"
+import "core:strings"
+import "core:bytes"
+import "core:mem"
+
+/*
+	These are a few useful utility functions to work with PNG images.
+*/
+
+/*
+	Cleanup of image-specific data.
+	There are other helpers for cleanup of PNG-specific data.
+	Those are named *_destroy, where * is the name of the helper.
+*/
+
+destroy :: proc(img: ^Image) {
+	if img == nil {
+		/*
+			Nothing to do.
+			Load must've returned with an error.
+		*/
+		return;
+	}
+
+	bytes.buffer_destroy(&img.pixels);
+
+	/*
+		We don't need to do anything for the individual chunks.
+		They're allocated on the temp allocator, as is info.chunks
+
+		See read_chunk.
+	*/
+	free(img);
+}
+
+/*
+	Chunk helpers
+*/
+
+gamma :: proc(c: Chunk) -> f32 {
+	assert(c.header.type == .gAMA);
+	res := (^gAMA)(raw_data(c.data))^;
+	when true {
+		// Returns the wrong result on old backend
+		// Fixed for -llvm-api
+		return f32(res.gamma_100k) / 100_000.0;
+	} else {
+		return f32(u32(res.gamma_100k)) / 100_000.0;
+	}
+}
+
+INCHES_PER_METER :: 1000.0 / 25.4;
+
+phys :: proc(c: Chunk) -> pHYs {
+	assert(c.header.type == .pHYs);
+	res := (^pHYs)(raw_data(c.data))^;
+	return res;
+}
+
+phys_to_dpi :: proc(p: pHYs) -> (x_dpi, y_dpi: f32) {
+	return f32(p.ppu_x) / INCHES_PER_METER, f32(p.ppu_y) / INCHES_PER_METER;
+}
+
+time :: proc(c: Chunk) -> tIME {
+	assert(c.header.type == .tIME);
+	res := (^tIME)(raw_data(c.data))^;
+	return res;
+}
+
+core_time :: proc(c: Chunk) -> (t: coretime.Time, ok: bool) {
+	png_time := time(c);
+	using png_time;
+	return coretime.datetime_to_time(
+		int(year), int(month), int(day),
+		int(hour), int(minute), int(second),
+	);
+}
+
+text :: proc(c: Chunk) -> (res: Text, ok: bool) {
+	 #partial switch c.header.type {
+	case .tEXt:
+		ok = true;
+
+		fields := bytes.split(s=c.data, sep=[]u8{0}, allocator=context.temp_allocator);
+		if len(fields) == 2 {
+			res.keyword = strings.clone(string(fields[0]));
+			res.text    = strings.clone(string(fields[1]));
+		} else {
+			ok = false;
+		}
+		return;
+	case .zTXt:
+		ok = true;
+
+		fields := bytes.split_n(s=c.data, sep=[]u8{0}, n=3, allocator=context.temp_allocator);
+		if len(fields) != 3 || len(fields[1]) != 0 {
+			// Compression method must be 0=Deflate, which thanks to the split above turns
+			// into an empty slice
+			ok = false; return;
+		}
+
+		// Set up ZLIB context and decompress text payload.
+		buf: bytes.Buffer;
+		zlib_error := zlib.inflate_from_byte_array(fields[2], &buf);
+		defer bytes.buffer_destroy(&buf);
+		if zlib_error != nil {
+			ok = false; return;
+		}
+
+		res.keyword = strings.clone(string(fields[0]));
+		res.text = strings.clone(bytes.buffer_to_string(&buf));
+		return;
+	case .iTXt:
+		ok = true;
+
+		s := string(c.data);
+		null := strings.index_byte(s, 0);
+		if null == -1 {
+			ok = false; return;
+		}
+		if len(c.data) < null + 4 {
+			// At a minimum, including the \0 following the keyword, we require 5 more bytes.
+			ok = false;	return;
+		}
+		res.keyword = strings.clone(string(c.data[:null]));
+		rest := c.data[null+1:];
+
+		compression_flag := rest[:1][0];
+		if compression_flag > 1 {
+			ok = false; return;
+		}
+		compression_method := rest[1:2][0];
+		if compression_flag == 1 && compression_method > 0 {
+			// Only Deflate is supported
+			ok = false; return;
+		}
+		rest = rest[2:];
+
+		// We now expect an optional language keyword and translated keyword, both followed by a \0
+		null = strings.index_byte(string(rest), 0);
+		if null == -1 {
+			ok = false; return;
+		}
+		res.language = strings.clone(string(rest[:null]));
+		rest = rest[null+1:];
+
+		null = strings.index_byte(string(rest), 0);
+		if null == -1 {
+			ok = false; return;
+		}
+		res.keyword_localized = strings.clone(string(rest[:null]));
+		rest = rest[null+1:];
+		if compression_flag == 0 {
+			res.text = strings.clone(string(rest));
+		} else {
+			// Set up ZLIB context and decompress text payload.
+			buf: bytes.Buffer;
+			zlib_error := zlib.inflate_from_byte_array(rest, &buf);
+			defer bytes.buffer_destroy(&buf);
+			if zlib_error != nil {
+
+				ok = false; return;
+			}
+
+			res.text = strings.clone(bytes.buffer_to_string(&buf));
+		}
+		return;
+	case:
+		// PNG text helper called with an unrecognized chunk type.
+		ok = false; return;
+	}
+}
+
+text_destroy :: proc(text: Text) {
+	delete(text.keyword);
+	delete(text.keyword_localized);
+	delete(text.language);
+	delete(text.text);
+}
+
+iccp :: proc(c: Chunk) -> (res: iCCP, ok: bool) {
+	ok = true;
+
+	fields := bytes.split_n(s=c.data, sep=[]u8{0}, n=3, allocator=context.temp_allocator);
+
+	if len(fields[0]) < 1 || len(fields[0]) > 79 {
+		// Invalid profile name
+		ok = false; return;
+	}
+
+	if len(fields[1]) != 0 {
+		// Compression method should be a zero, which the split turned into an empty slice.
+		ok = false; return;
+	}
+
+	// Set up ZLIB context and decompress iCCP payload
+	buf: bytes.Buffer;
+	zlib_error := zlib.inflate_from_byte_array(fields[2], &buf);
+	if zlib_error != nil {
+		bytes.buffer_destroy(&buf);
+		ok = false; return;
+	}
+
+	res.name = strings.clone(string(fields[0]));
+	res.profile = bytes.buffer_to_bytes(&buf);
+
+	return;
+}
+
+iccp_destroy :: proc(i: iCCP) {
+	delete(i.name);
+
+	delete(i.profile);
+
+}
+
+srgb :: proc(c: Chunk) -> (res: sRGB, ok: bool) {
+	ok = true;
+
+	if c.header.type != .sRGB || len(c.data) != 1 {
+		return {}, false;
+	}
+
+	res.intent = sRGB_Rendering_Intent(c.data[0]);
+	if res.intent > max(sRGB_Rendering_Intent) {
+		ok = false; return;
+	}
+	return;
+}
+
+plte :: proc(c: Chunk) -> (res: PLTE, ok: bool) {
+	if c.header.type != .PLTE {
+		return {}, false;
+	}
+
+	i := 0; j := 0; ok = true;
+	for j < int(c.header.length) {
+		res.entries[i] = {c.data[j], c.data[j+1], c.data[j+2]};
+		i += 1; j += 3;
+	}
+	res.used = u16(i);
+	return;
+}
+
+splt :: proc(c: Chunk) -> (res: sPLT, ok: bool) {
+	if c.header.type != .sPLT {
+		return {}, false;
+	}
+	ok = true;
+
+	fields := bytes.split_n(s=c.data, sep=[]u8{0}, n=2, allocator=context.temp_allocator);
+	if len(fields) != 2 {
+		return {}, false;
+	}
+
+	res.depth = fields[1][0];
+	if res.depth != 8 && res.depth != 16 {
+		return {}, false;
+	}
+
+	data := fields[1][1:];
+	count: int;
+
+	if res.depth == 8 {
+		if len(data) % 6 != 0 {
+			return {}, false;
+		}
+		count = len(data) / 6;
+		if count > 256 {
+			return {}, false;
+		}
+
+		res.entries = mem.slice_data_cast([][4]u8, data);
+	} else { // res.depth == 16
+		if len(data) % 10 != 0 {
+			return {}, false;
+		}
+		count = len(data) / 10;
+		if count > 256 {
+			return {}, false;
+		}
+
+		res.entries = mem.slice_data_cast([][4]u16, data);
+	}
+
+	res.name = strings.clone(string(fields[0]));
+	res.used = u16(count);
+
+	return;
+}
+
+splt_destroy :: proc(s: sPLT) {
+	delete(s.name);
+}
+
+sbit :: proc(c: Chunk) -> (res: [4]u8, ok: bool) {
+	/*
+		Returns [4]u8 with the significant bits in each channel.
+		A channel will contain zero if not applicable to the PNG color type.
+	*/
+
+	if len(c.data) < 1 || len(c.data) > 4 {
+		ok = false; return;
+	}
+	ok = true;
+
+	for i := 0; i < len(c.data); i += 1 {
+		res[i] = c.data[i];
+	}
+	return;
+
+}
+
+hist :: proc(c: Chunk) -> (res: hIST, ok: bool) {
+	if c.header.type != .hIST {
+		return {}, false;
+	}
+	if c.header.length & 1 == 1 || c.header.length > 512 {
+		// The entries are u16be, so the length must be even.
+		// At most 256 entries must be present
+		return {}, false;
+	}
+
+	ok = true;
+	data := mem.slice_data_cast([]u16be, c.data);
+	i := 0;
+	for len(data) > 0 {
+		// HIST entries are u16be, we unpack them to machine format
+		res.entries[i] = u16(data[0]);
+		i += 1; data = data[1:];
+	}
+	res.used = u16(i);
+	return;
+}
+
+chrm :: proc(c: Chunk) -> (res: cHRM, ok: bool) {
+	ok = true;
+	if c.header.length != size_of(cHRM_Raw) {
+		return {}, false;
+	}
+	chrm := (^cHRM_Raw)(raw_data(c.data))^;
+
+	res.w.x = f32(chrm.w.x) / 100_000.0;
+	res.w.y = f32(chrm.w.y) / 100_000.0;
+	res.r.x = f32(chrm.r.x) / 100_000.0;
+	res.r.y = f32(chrm.r.y) / 100_000.0;
+	res.g.x = f32(chrm.g.x) / 100_000.0;
+	res.g.y = f32(chrm.g.y) / 100_000.0;
+	res.b.x = f32(chrm.b.x) / 100_000.0;
+	res.b.y = f32(chrm.b.y) / 100_000.0;
+	return;
+}
+
+exif :: proc(c: Chunk) -> (res: Exif, ok: bool) {
+
+	ok = true;
+
+	if len(c.data) < 4 {
+		ok = false; return;
+	}
+
+	if c.data[0] == 'M' && c.data[1] == 'M' {
+		res.byte_order = .big_endian;
+		if c.data[2] != 0 || c.data[3] != 42 {
+			ok = false; return;
+		}
+	} else if c.data[0] == 'I' && c.data[1] == 'I' {
+		res.byte_order = .little_endian;
+		if c.data[2] != 42 || c.data[3] != 0 {
+			ok = false; return;
+		}
+	} else {
+		ok = false; return;
+	}
+
+	res.data = c.data;
+	return;
+}
+
+/*
+	General helper functions
+*/
+
+compute_buffer_size :: image.compute_buffer_size;
+
+/*
+	PNG save helpers
+*/
+
+when false {
+
+	make_chunk :: proc(c: any, t: Chunk_Type) -> (res: Chunk) {
+
+		data: []u8;
+		if v, ok := c.([]u8); ok {
+			data = v;
+		} else {
+			data = mem.any_to_bytes(c);
+		}
+
+		res.header.length = u32be(len(data));
+		res.header.type   = t;
+		res.data   = data;
+
+		// CRC the type
+		crc    := hash.crc32(mem.any_to_bytes(res.header.type));
+		// Extend the CRC with the data
+		res.crc = u32be(hash.crc32(data, crc));
+		return;
+	}
+
+	write_chunk :: proc(fd: os.Handle, chunk: Chunk) {
+		c := chunk;
+		// Write length + type
+		os.write_ptr(fd, &c.header, 8);
+		// Write data
+		os.write_ptr(fd, mem.raw_data(c.data), int(c.header.length));
+		// Write CRC32
+		os.write_ptr(fd, &c.crc, 4);
+	}
+
+	write_image_as_png :: proc(filename: string, image: Image) -> (err: Error) {
+		profiler.timed_proc();
+		using image;
+		using os;
+		flags: int = O_WRONLY|O_CREATE|O_TRUNC;
+
+		if len(image.pixels) == 0 || len(image.pixels) < image.width * image.height * int(image.channels) {
+			return E_PNG.Invalid_Image_Dimensions;
+		}
+
+		mode: int = 0;
+		when ODIN_OS == "linux" || ODIN_OS == "darwin" {
+			// NOTE(justasd): 644 (owner read, write; group read; others read)
+			mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+		}
+
+		fd, fderr := open(filename, flags, mode);
+		if fderr != 0 {
+			return E_General.Cannot_Open_File;
+		}
+		defer close(fd);
+
+		magic := Signature;
+
+		write_ptr(fd, &magic, 8);
+
+		ihdr := IHDR{
+			width              = u32be(width),
+			height             = u32be(height),
+			bit_depth          = depth,
+			compression_method = 0,
+			filter_method      = 0,
+			interlace_method   = .None,
+		};
+
+		switch channels {
+		case 1: ihdr.color_type = Color_Type{};
+		case 2: ihdr.color_type = Color_Type{.Alpha};
+		case 3: ihdr.color_type = Color_Type{.Color};
+		case 4: ihdr.color_type = Color_Type{.Color, .Alpha};
+		case:// Unhandled
+			return E_PNG.Unknown_Color_Type;
+		}
+		h := make_chunk(ihdr, .IHDR);
+		write_chunk(fd, h);
+
+		bytes_needed := width * height * int(channels) + height;
+		filter_bytes := mem.make_dynamic_array_len_cap([dynamic]u8, bytes_needed, bytes_needed, context.allocator);
+		defer delete(filter_bytes);
+
+		i := 0; j := 0;
+		// Add a filter byte 0 per pixel row
+		for y := 0; y < height; y += 1 {
+			filter_bytes[j] = 0; j += 1;
+			for x := 0; x < width; x += 1 {
+				for z := 0; z < channels; z += 1 {
+					filter_bytes[j+z] = image.pixels[i+z];
+				}
+				i += channels; j += channels;
+			}
+		}
+		assert(j == bytes_needed);
+
+		a: []u8 = filter_bytes[:];
+
+		out_buf: ^[dynamic]u8;
+		defer free(out_buf);
+
+		ctx := zlib.ZLIB_Context{
+			in_buf  = &a,
+			out_buf = out_buf,
+		};
+		err = zlib.write_zlib_stream_from_memory(&ctx);
+
+		b: []u8;
+		if err == nil {
+			b = ctx.out_buf[:];
+		} else {
+			return err;
+		}
+
+		idat := make_chunk(b, .IDAT);
+
+		write_chunk(fd, idat);
+
+		iend := make_chunk([]u8{}, .IEND);
+		write_chunk(fd, iend);
+
+		return nil;
+	}
+}
diff --git a/core/image/png/png.odin b/core/image/png/png.odin
new file mode 100644
index 000000000..7762f0106
--- /dev/null
+++ b/core/image/png/png.odin
@@ -0,0 +1,1657 @@
+package png
+
+import "core:compress"
+import "core:compress/zlib"
+import "core:image"
+
+import "core:os"
+import "core:strings"
+import "core:hash"
+import "core:bytes"
+import "core:io"
+import "core:mem"
+import "core:intrinsics"
+
+Error     :: compress.Error;
+E_General :: compress.General_Error;
+E_PNG     :: image.Error;
+E_Deflate :: compress.Deflate_Error;
+
+Image     :: image.Image;
+Options   :: image.Options;
+
+Signature :: enum u64be {
+	// 0x89504e470d0a1a0a
+	PNG = 0x89 << 56 | 'P' << 48 | 'N' << 40 | 'G' << 32 | '\r' << 24 | '\n' << 16 | 0x1a << 8 | '\n',
+}
+
+Info :: struct {
+	header: IHDR,
+	chunks: [dynamic]Chunk,
+}
+
+Chunk_Header :: struct #packed {
+	length: u32be,
+	type:   Chunk_Type,
+}
+
+Chunk :: struct #packed {
+	header: Chunk_Header,
+	data:   []byte,
+	crc:    u32be,
+}
+
+Chunk_Type :: enum u32be {
+	// IHDR must come first in a file
+	IHDR = 'I' << 24 | 'H' << 16 | 'D' << 8 | 'R',
+	// PLTE must precede the first IDAT chunk
+	PLTE = 'P' << 24 | 'L' << 16 | 'T' << 8 | 'E',
+	bKGD = 'b' << 24 | 'K' << 16 | 'G' << 8 | 'D',
+	tRNS = 't' << 24 | 'R' << 16 | 'N' << 8 | 'S',
+	IDAT = 'I' << 24 | 'D' << 16 | 'A' << 8 | 'T',
+
+	iTXt = 'i' << 24 | 'T' << 16 | 'X' << 8 | 't',
+	tEXt = 't' << 24 | 'E' << 16 | 'X' << 8 | 't',
+	zTXt = 'z' << 24 | 'T' << 16 | 'X' << 8 | 't',
+
+	iCCP = 'i' << 24 | 'C' << 16 | 'C' << 8 | 'P',
+	pHYs = 'p' << 24 | 'H' << 16 | 'Y' << 8 | 's',
+	gAMA = 'g' << 24 | 'A' << 16 | 'M' << 8 | 'A',
+	tIME = 't' << 24 | 'I' << 16 | 'M' << 8 | 'E',
+
+	sPLT = 's' << 24 | 'P' << 16 | 'L' << 8 | 'T',
+	sRGB = 's' << 24 | 'R' << 16 | 'G' << 8 | 'B',
+	hIST = 'h' << 24 | 'I' << 16 | 'S' << 8 | 'T',
+	cHRM = 'c' << 24 | 'H' << 16 | 'R' << 8 | 'M',
+	sBIT = 's' << 24 | 'B' << 16 | 'I' << 8 | 'T',
+
+	/*
+		eXIf tags are not part of the core spec, but have been ratified
+		in v1.5.0 of the PNG Ext register.
+
+		We will provide unprocessed chunks to the caller if `.return_metadata` is set.
+		Applications are free to implement an Exif decoder.
+	*/
+	eXIf = 'e' << 24 | 'X' << 16 | 'I' << 8 | 'f',
+
+	// PNG files must end with IEND
+	IEND = 'I' << 24 | 'E' << 16 | 'N' << 8 | 'D',
+
+	/*
+		XCode sometimes produces "PNG" files that don't adhere to the PNG spec.
+		We recognize them only in order to avoid doing further work on them.
+
+		Some tools like PNG Defry may be able to repair them, but we're not
+		going to reward Apple for producing proprietary broken files purporting
+		to be PNGs by supporting them.
+
+	*/
+	iDOT = 'i' << 24 | 'D' << 16 | 'O' << 8 | 'T',
+	CbGI = 'C' << 24 | 'b' << 16 | 'H' << 8 | 'I',
+}
+
+IHDR :: struct #packed {
+	width: u32be,
+	height: u32be,
+	bit_depth: u8,
+	color_type: Color_Type,
+	compression_method: u8,
+	filter_method: u8,
+	interlace_method: Interlace_Method,
+}
+IHDR_SIZE :: size_of(IHDR);
+#assert (IHDR_SIZE == 13);
+
+Color_Value :: enum u8 {
+	Paletted = 0, // 1 << 0 = 1
+	Color    = 1, // 1 << 1 = 2
+	Alpha    = 2, // 1 << 2 = 4
+}
+Color_Type :: distinct bit_set[Color_Value; u8];
+
+Interlace_Method :: enum u8 {
+	None  = 0,
+	Adam7 = 1,
+}
+
+Row_Filter :: enum u8 {
+   None    = 0,
+   Sub     = 1,
+   Up      = 2,
+   Average = 3,
+   Paeth   = 4,
+};
+
+PLTE_Entry    :: [3]u8;
+
+PLTE :: struct #packed {
+	entries: [256]PLTE_Entry,
+	used: u16,
+}
+
+hIST :: struct #packed {
+	entries: [256]u16,
+	used: u16,
+}
+
+sPLT :: struct #packed {
+	name: string,
+	depth: u8,
+	entries: union {
+		[][4]u8,
+		[][4]u16,
+	},
+	used: u16,
+}
+
+// Other chunks
+tIME :: struct #packed {
+	year:   u16be,
+	month:  u8,
+	day:    u8,
+	hour:   u8,
+	minute: u8,
+	second: u8,
+};
+#assert(size_of(tIME) == 7);
+
+CIE_1931_Raw :: struct #packed {
+	x: u32be,
+	y: u32be,
+}
+
+CIE_1931 :: struct #packed {
+	x: f32,
+	y: f32,
+}
+
+cHRM_Raw :: struct #packed {
+   w: CIE_1931_Raw,
+   r: CIE_1931_Raw,
+   g: CIE_1931_Raw,
+   b: CIE_1931_Raw,
+}
+#assert(size_of(cHRM_Raw) == 32);
+
+cHRM :: struct #packed {
+   w: CIE_1931,
+   r: CIE_1931,
+   g: CIE_1931,
+   b: CIE_1931,
+}
+#assert(size_of(cHRM) == 32);
+
+gAMA :: struct {
+	gamma_100k: u32be, // Gamma * 100k
+};
+#assert(size_of(gAMA) == 4);
+
+pHYs :: struct #packed {
+	ppu_x: u32be,
+	ppu_y: u32be,
+	unit:  pHYs_Unit,
+};
+#assert(size_of(pHYs) == 9);
+
+pHYs_Unit :: enum u8 {
+	Unknown = 0,
+	Meter   = 1,
+};
+
+Text :: struct {
+	keyword:           string,
+	keyword_localized: string,
+	language:          string,
+	text:              string,
+};
+
+Exif :: struct {
+	byte_order: enum {
+		little_endian,
+		big_endian,
+	},
+	data: []u8,
+}
+
+iCCP :: struct {
+	name: string,
+	profile: []u8,
+}
+
+sRGB_Rendering_Intent :: enum u8 {
+	Perceptual = 0,
+	Relative_colorimetric = 1,
+	Saturation = 2,
+	Absolute_colorimetric = 3,
+}
+
+sRGB :: struct #packed {
+	intent: sRGB_Rendering_Intent,
+}
+
+ADAM7_X_ORIG    := []int{ 0,4,0,2,0,1,0 };
+ADAM7_Y_ORIG    := []int{ 0,0,4,0,2,0,1 };
+ADAM7_X_SPACING := []int{ 8,8,4,4,2,2,1 };
+ADAM7_Y_SPACING := []int{ 8,8,8,4,4,2,2 };
+
+// Implementation starts here
+
+read_chunk :: proc(ctx: ^compress.Context) -> (Chunk, Error) {
+
+	chunk := Chunk{};
+
+	ch, e := compress.read_data(ctx, Chunk_Header);
+	if e != .None {
+		return {}, E_General.Stream_Too_Short;
+	}
+	chunk.header = ch;
+
+	data := make([]u8, ch.length, context.temp_allocator);
+	_, e2 := ctx.input->impl_read(data);
+	if e2 != .None {
+		return {}, E_General.Stream_Too_Short;
+	}
+	chunk.data = data;
+
+	// Compute CRC over chunk type + data
+	type := (^[4]byte)(&ch.type)^;
+	computed_crc := hash.crc32(type[:]);
+	computed_crc =  hash.crc32(data, computed_crc);
+
+	crc, e3 := compress.read_data(ctx, u32be);
+	if e3 != .None {
+		return {}, E_General.Stream_Too_Short;
+	}
+	chunk.crc = crc;
+
+	if chunk.crc != u32be(computed_crc) {
+		return {}, E_General.Checksum_Failed;
+	}
+	return chunk, nil;
+}
+
+read_header :: proc(ctx: ^compress.Context) -> (IHDR, Error) {
+
+	c, e := read_chunk(ctx);
+	if e != nil {
+		return {}, e;
+	}
+
+	header := (^IHDR)(raw_data(c.data))^;
+	// Validate IHDR
+	using header;
+	if width == 0 || height == 0 {
+		return {}, E_PNG.Invalid_Image_Dimensions;
+	}
+
+	if compression_method != 0 {
+		return {}, E_General.Unknown_Compression_Method;
+	}
+
+	if filter_method != 0 {
+		return {}, E_PNG.Unknown_Filter_Method;
+	}
+
+	if interlace_method != .None && interlace_method != .Adam7 {
+		return {}, E_PNG.Unknown_Interlace_Method;
+
+	}
+
+	switch (transmute(u8)color_type) {
+		case 0:
+			/*
+				Grayscale.
+				Allowed bit depths: 1, 2, 4, 8 and 16.
+			*/
+			allowed := false;
+			for i in ([]u8{1, 2, 4, 8, 16}) {
+				if bit_depth == i {
+					allowed = true;
+					break;
+				}
+			}
+			if !allowed {
+				return {}, E_PNG.Invalid_Color_Bit_Depth_Combo;
+			}
+		case 2, 4, 6:
+			/*
+				RGB, Grayscale+Alpha, RGBA.
+				Allowed bit depths: 8 and 16
+			*/
+			if bit_depth != 8 && bit_depth != 16 {
+				return {}, E_PNG.Invalid_Color_Bit_Depth_Combo;
+			}
+		case 3:
+			/*
+				Paletted. PLTE chunk must appear.
+				Allowed bit depths: 1, 2, 4 and 8.
+			*/
+			allowed := false;
+			for i in ([]u8{1, 2, 4, 8}) {
+				if bit_depth == i {
+					allowed = true;
+					break;
+				}
+			}
+			if !allowed {
+				return {}, E_PNG.Invalid_Color_Bit_Depth_Combo;
+			}
+
+		case:
+			return {}, E_PNG.Unknown_Color_Type;
+	}
+
+	return header, nil;
+}
+
+chunk_type_to_name :: proc(type: ^Chunk_Type) -> string {
+	t := transmute(^u8)type;
+	return strings.string_from_ptr(t, 4);
+}
+
+load_from_slice :: proc(slice: []u8, options := Options{}, allocator := context.allocator) -> (img: ^Image, err: Error) {
+	r := bytes.Reader{};
+	bytes.reader_init(&r, slice);
+	stream := bytes.reader_to_stream(&r);
+
+	/*
+		TODO: Add a flag to tell the PNG loader that the stream is backed by a slice.
+		This way the stream reader could avoid the copy into the temp memory returned by it,
+		and instead return a slice into the original memory that's already owned by the caller.
+	*/
+	img, err = load_from_stream(stream, options, allocator);
+
+	return img, err;
+}
+
+load_from_file :: proc(filename: string, options := Options{}, allocator := context.allocator) -> (img: ^Image, err: Error) {
+	data, ok := os.read_entire_file(filename, allocator);
+	defer delete(data);
+
+	if ok {
+		img, err = load_from_slice(data, options, allocator);
+		return;
+	} else {
+		img = new(Image);
+		return img, E_General.File_Not_Found;
+	}
+}
+
+load_from_stream :: proc(stream: io.Stream, options := Options{}, allocator := context.allocator) -> (img: ^Image, err: Error) {
+	options := options;
+	if .info in options {
+		options |= {.return_metadata, .do_not_decompress_image};
+		options -= {.info};
+	}
+
+	if .alpha_drop_if_present in options && .alpha_add_if_missing in options {
+		return {}, E_General.Incompatible_Options;
+	}
+
+	if .do_not_expand_channels in options {
+		options |= {.do_not_expand_grayscale, .do_not_expand_indexed};
+	}
+
+	if img == nil {
+		img = new(Image);
+	}
+
+	img.sidecar = nil;
+
+	ctx := compress.Context{
+		input = stream,
+	};
+
+	signature, io_error := compress.read_data(&ctx, Signature);
+	if io_error != .None || signature != .PNG {
+		return img, E_PNG.Invalid_PNG_Signature;
+	}
+
+	idat: []u8;
+	idat_b: bytes.Buffer;
+	idat_length := u32be(0);
+	defer bytes.buffer_destroy(&idat_b);
+
+	c:		Chunk;
+	ch:     Chunk_Header;
+	e:      io.Error;
+
+	header:	IHDR;
+	info:   Info;
+	info.chunks.allocator = context.temp_allocator;
+
+	// State to ensure correct chunk ordering.
+	seen_ihdr := false; first := true;
+	seen_plte := false;
+	seen_bkgd := false;
+	seen_trns := false;
+	seen_idat := false;
+	seen_iend := false;
+
+	_plte := PLTE{};
+	trns := Chunk{};
+
+	final_image_channels := 0;
+
+	read_error: io.Error;
+	// 12 bytes is the size of a chunk with a zero-length payload.
+	for (read_error == .None && !seen_iend) {
+		// Peek at next chunk's length and type.
+		// TODO: Some streams may not provide seek/read_at
+
+		ch, e = compress.peek_data(&ctx, Chunk_Header);
+		if e != .None {
+			return img, E_General.Stream_Too_Short;
+		}
+		// name := chunk_type_to_name(&ch.type); // Only used for debug prints during development.
+
+		#partial switch(ch.type) {
+			case .IHDR:
+				if seen_ihdr || !first {
+					return {}, E_PNG.IHDR_Not_First_Chunk;
+				}
+				seen_ihdr = true;
+
+				header, err = read_header(&ctx);
+				if err != nil {
+					return img, err;
+				}
+
+				if .Paletted in header.color_type {
+					// Color type 3
+					img.channels = 1;
+					final_image_channels = 3;
+					img.depth    = 8;
+				} else if .Color in header.color_type {
+					// Color image without a palette
+					img.channels = 3;
+					final_image_channels = 3;
+					img.depth    = header.bit_depth;
+				} else {
+					// Grayscale
+					img.channels = 1;
+					final_image_channels = 1;
+					img.depth    = header.bit_depth;
+				}
+
+				if .Alpha in header.color_type {
+					img.channels += 1;
+					final_image_channels += 1;
+				}
+
+				if img.channels == 0 || img.depth == 0 {
+					return {}, E_PNG.IHDR_Corrupt;
+				}
+
+				img.width  = int(header.width);
+				img.height = int(header.height);
+
+				using header;
+				h := IHDR{
+					width              = width,
+					height             = height,
+					bit_depth          = bit_depth,
+					color_type         = color_type,
+					compression_method = compression_method,
+					filter_method      = filter_method,
+					interlace_method   = interlace_method,
+				};
+				info.header = h;
+			case .PLTE:
+				seen_plte = true;
+				// PLTE must appear before IDAT and can't appear for color types 0, 4.
+				ct := transmute(u8)info.header.color_type;
+				if seen_idat || ct == 0 || ct == 4 {
+					return img, E_PNG.PLTE_Encountered_Unexpectedly;
+				}
+
+				c, err = read_chunk(&ctx);
+				if err != nil {
+					return img, err;
+				}
+
+				if c.header.length % 3 != 0 || c.header.length > 768 {
+					return img, E_PNG.PLTE_Invalid_Length;
+				}
+				plte_ok: bool;
+				_plte, plte_ok = plte(c);
+				if !plte_ok {
+					return img, E_PNG.PLTE_Invalid_Length;
+				}
+
+				if .return_metadata in options {
+					append(&info.chunks, c);
+				}
+			case .IDAT:
+				// If we only want image metadata and don't want the pixel data, we can early out.
+				if .return_metadata not_in options && .do_not_decompress_image in options {
+					img.channels = final_image_channels;
+					img.sidecar = info;
+					return img, nil;
+				}
+				// There must be at least 1 IDAT, contiguous if more.
+				if seen_idat {
+					return img, E_PNG.IDAT_Must_Be_Contiguous;
+				}
+
+				if idat_length > 0 {
+					return img, E_PNG.IDAT_Must_Be_Contiguous;
+				}
+
+				next := ch.type;
+				for next == .IDAT {
+					c, err = read_chunk(&ctx);
+					if err != nil {
+						return img, err;
+					}
+
+					bytes.buffer_write(&idat_b, c.data);
+					idat_length += c.header.length;
+
+					ch, e = compress.peek_data(&ctx, Chunk_Header);
+					if e != .None {
+						return img, E_General.Stream_Too_Short;
+					}
+					next = ch.type;
+				}
+				idat = bytes.buffer_to_bytes(&idat_b);
+				if int(idat_length) != len(idat) {
+					return {}, E_PNG.IDAT_Corrupt;
+				}
+				seen_idat = true;
+			case .IEND:
+				c, err = read_chunk(&ctx);
+				if err != nil {
+					return img, err;
+				}
+				seen_iend = true;
+			case .bKGD:
+
+				// TODO: Make sure that 16-bit bKGD + tRNS chunks return u16 instead of u16be
+
+				c, err = read_chunk(&ctx);
+				if err != nil {
+					return img, err;
+				}
+				seen_bkgd = true;
+				if .return_metadata in options {
+					append(&info.chunks, c);
+				}
+
+				ct := transmute(u8)info.header.color_type;
+				switch(ct) {
+					case 3: // Indexed color
+						if c.header.length != 1 {
+							return {}, E_PNG.BKGD_Invalid_Length;
+						}
+						col := _plte.entries[c.data[0]];
+						img.background = [3]u16{
+							u16(col[0]) << 8 | u16(col[0]),
+							u16(col[1]) << 8 | u16(col[1]),
+							u16(col[2]) << 8 | u16(col[2]),
+						};
+					case 0, 4: // Grayscale, with and without Alpha
+						if c.header.length != 2 {
+							return {}, E_PNG.BKGD_Invalid_Length;
+						}
+						col := u16(mem.slice_data_cast([]u16be, c.data[:])[0]);
+						img.background = [3]u16{col, col, col};
+					case 2, 6: // Color, with and without Alpha
+						if c.header.length != 6 {
+							return {}, E_PNG.BKGD_Invalid_Length;
+						}
+						col := mem.slice_data_cast([]u16be, c.data[:]);
+						img.background = [3]u16{u16(col[0]), u16(col[1]), u16(col[2])};
+				}
+			case .tRNS:
+				c, err = read_chunk(&ctx);
+				if err != nil {
+					return img, err;
+				}
+
+				if .Alpha in info.header.color_type {
+					return img, E_PNG.TRNS_Encountered_Unexpectedly;
+				}
+
+				if .return_metadata in options {
+					append(&info.chunks, c);
+				}
+
+				/*
+					This makes the image one with transparency, so set it to +1 here,
+					even if we need we leave img.channels alone for the defilterer's
+					sake. If we early because the user just cares about metadata,
+					we'll set it to 'final_image_channels'.
+				*/
+
+				final_image_channels += 1;
+
+				seen_trns = true;
+				if info.header.bit_depth < 8 && .Paletted not_in info.header.color_type {
+					// Rescale tRNS data so key matches intensity
+					dsc := depth_scale_table;
+					scale := dsc[info.header.bit_depth];
+					if scale != 1 {
+						key := mem.slice_data_cast([]u16be, c.data)[0] * u16be(scale);
+						c.data = []u8{0, u8(key & 255)};
+					}
+				}
+				trns = c;
+			case .iDOT, .CbGI:
+				/*
+					iPhone PNG bastardization that doesn't adhere to spec with broken IDAT chunk.
+					We're not going to add support for it. If you have the misfortunte of coming
+					across one of these files, use a utility to defry it.s
+				*/
+				return img, E_PNG.PNG_Does_Not_Adhere_to_Spec;
+			case:
+				// Unhandled type
+				c, err = read_chunk(&ctx);
+				if err != nil {
+					return img, err;
+				}
+				if .return_metadata in options {
+					// NOTE: Chunk cata is currently allocated on the temp allocator.
+					append(&info.chunks, c);
+				}
+
+			first = false;
+		}
+	}
+
+	if .return_header in options || .return_metadata in options {
+		img.sidecar = info;
+	}
+	if .do_not_decompress_image in options {
+		img.channels = final_image_channels;
+		return img, nil;
+	}
+
+	if !seen_idat {
+		return img, E_PNG.IDAT_Missing;
+	}
+
+	buf: bytes.Buffer;
+	zlib_error := zlib.inflate(idat, &buf);
+	defer bytes.buffer_destroy(&buf);
+
+	if zlib_error != nil {
+		return {}, zlib_error;
+	} else {
+		/*
+			Let's calcalate the expected size of the IDAT based on its dimensions,
+			and whether or not it's interlaced
+		*/
+		expected_size: int;
+		buf_len := len(buf.buf);
+
+		if header.interlace_method != .Adam7 {
+			expected_size = compute_buffer_size(int(header.width), int(header.height), int(img.channels), int(header.bit_depth), 1);
+		} else {
+			/*
+				Because Adam7 divides the image up into sub-images, and each scanline must start
+				with a filter byte, Adam7 interlaced images can have a larger raw size.
+			*/
+			for p := 0; p < 7; p += 1 {
+				x := (int(header.width)  - ADAM7_X_ORIG[p] + ADAM7_X_SPACING[p] - 1) / ADAM7_X_SPACING[p];
+				y := (int(header.height) - ADAM7_Y_ORIG[p] + ADAM7_Y_SPACING[p] - 1) / ADAM7_Y_SPACING[p];
+				if (x > 0 && y > 0) {
+					expected_size += compute_buffer_size(int(x), int(y), int(img.channels), int(header.bit_depth), 1);
+				}
+			}
+		}
+
+		if expected_size != buf_len {
+			return {}, E_PNG.IDAT_Corrupt;
+		}
+	}
+
+	/*
+		Defilter just cares about the raw number of image channels present.
+		So, we'll save the old value of img.channels we return to the user
+		as metadata, and set it instead to the raw number of channels.
+	*/
+	defilter_error := defilter(img, &buf, &header, options);
+	if defilter_error != nil {
+		bytes.buffer_destroy(&img.pixels);
+		return {}, defilter_error;
+	}
+
+	/*
+		Now we'll handle the relocoring of paletted images, handling of tRNS chunks,
+		and we'll expand grayscale images to RGB(A).
+
+		For the sake of convenience we return only RGB(A) images. In the future we
+		may supply an option to return Gray/Gray+Alpha as-is, in which case RGB(A)
+		will become the default.
+	*/
+
+	if .Paletted in header.color_type && .do_not_expand_indexed in options {
+		return img, nil;
+	}
+	if .Color not_in header.color_type && .do_not_expand_grayscale in options {
+		return img, nil;
+	}
+
+
+	raw_image_channels := img.channels;
+	out_image_channels := 3;
+
+	/*
+		To give ourselves less options to test, we'll knock out
+		`.blend_background` and `seen_bkgd` if we haven't seen both.
+	*/
+	if !(seen_bkgd && .blend_background in options) {
+		options -= {.blend_background};
+		seen_bkgd = false;
+	}
+
+	if seen_trns || .Alpha in info.header.color_type || .alpha_add_if_missing in options {
+		out_image_channels = 4;
+	}
+
+	if .alpha_drop_if_present in options {
+		out_image_channels = 3;
+	}
+
+	if seen_bkgd && .blend_background in options && .alpha_add_if_missing not_in options {
+		out_image_channels = 3;
+	}
+
+	add_alpha   := (seen_trns && .alpha_drop_if_present not_in options) || (.alpha_add_if_missing in options);
+	premultiply := .alpha_premultiply in options || seen_bkgd;
+
+	img.channels = out_image_channels;
+
+	if .Paletted in header.color_type {
+		temp := img.pixels;
+		defer bytes.buffer_destroy(&temp);
+
+		// We need to create a new image buffer
+		dest_raw_size := compute_buffer_size(int(header.width), int(header.height), out_image_channels, 8);
+		t := bytes.Buffer{};
+		resize(&t.buf, dest_raw_size);
+
+		i := 0; j := 0;
+
+		// If we don't have transparency or drop it without applying it, we can do this:
+		if (!seen_trns || (seen_trns && .alpha_drop_if_present in options && .alpha_premultiply not_in options)) && .alpha_add_if_missing not_in options {
+			for h := 0; h < int(img.height); h += 1 {
+				for w := 0; w < int(img.width);  w += 1 {
+					c := _plte.entries[temp.buf[i]];
+					t.buf[j  ] = c.r;
+					t.buf[j+1] = c.g;
+					t.buf[j+2] = c.b;
+					i += 1; j += 3;
+				}
+			}
+		} else if add_alpha || .alpha_drop_if_present in options {
+			bg := [3]f32{0, 0, 0};
+			if premultiply && seen_bkgd {
+				c16 := img.background.([3]u16);
+				bg = [3]f32{f32(c16.r), f32(c16.g), f32(c16.b)};
+			}
+
+			no_alpha := (.alpha_drop_if_present in options || premultiply) && .alpha_add_if_missing not_in options;
+			blend_background := seen_bkgd && .blend_background in options;
+
+			for h := 0; h < int(img.height); h += 1 {
+				for w := 0; w < int(img.width);  w += 1 {
+					index := temp.buf[i];
+
+					c     := _plte.entries[index];
+					a     := int(index) < len(trns.data) ? trns.data[index] : 255;
+					alpha := f32(a) / 255.0;
+
+					if blend_background {
+						c.r = u8((1.0 - alpha) * bg[0] + f32(c.r) * alpha);
+						c.g = u8((1.0 - alpha) * bg[1] + f32(c.g) * alpha);
+						c.b = u8((1.0 - alpha) * bg[2] + f32(c.b) * alpha);
+						a = 255;
+					} else if premultiply {
+						c.r = u8(f32(c.r) * alpha);
+						c.g = u8(f32(c.g) * alpha);
+						c.b = u8(f32(c.b) * alpha);
+					}
+
+					t.buf[j  ] = c.r;
+					t.buf[j+1] = c.g;
+					t.buf[j+2] = c.b;
+					i += 1;
+
+					if no_alpha {
+						j += 3;
+					} else {
+						t.buf[j+3] = u8(a);
+						j += 4;
+					}
+				}
+			}
+		} else {
+			unreachable();
+		}
+
+		img.pixels = t;
+
+	} else if img.depth == 16 {
+		// Check if we need to do something.
+		if raw_image_channels == out_image_channels {
+			// If we have 3 in and 3 out, or 4 in and 4 out without premultiplication...
+			if raw_image_channels == 4 && .alpha_premultiply not_in options && !seen_bkgd {
+				// Then we're done.
+				return img, nil;
+			}
+		}
+
+		temp := img.pixels;
+		defer bytes.buffer_destroy(&temp);
+
+		// We need to create a new image buffer
+		dest_raw_size := compute_buffer_size(int(header.width), int(header.height), out_image_channels, 16);
+		t := bytes.Buffer{};
+		resize(&t.buf, dest_raw_size);
+
+		p16 := mem.slice_data_cast([]u16, temp.buf[:]);
+		o16 := mem.slice_data_cast([]u16, t.buf[:]);
+
+		switch (raw_image_channels) {
+		case 1:
+			// Gray without Alpha. Might have tRNS alpha.
+			key   := u16(0);
+			if seen_trns {
+				key = mem.slice_data_cast([]u16, trns.data)[0];
+			}
+
+			for len(p16) > 0 {
+				r := p16[0];
+
+				alpha := u16(1); // Default to full opaque
+
+				if seen_trns {
+					if r == key {
+						if seen_bkgd {
+							c := img.background.([3]u16);
+							r = c[0];
+						} else {
+							alpha = 0; // Keyed transparency
+						}
+					}
+				}
+
+				if premultiply {
+					o16[0] = r * alpha;
+					o16[1] = r * alpha;
+					o16[2] = r * alpha;
+				} else {
+					o16[0] = r;
+					o16[1] = r;
+					o16[2] = r;
+				}
+
+				if out_image_channels == 4 {
+					o16[3] = alpha * 65535;
+				}
+
+				p16 = p16[1:];
+				o16 = o16[out_image_channels:];
+			}
+		case 2:
+			// Gray with alpha, we shouldn't have a tRNS chunk.
+			bg := f32(0.0);
+			if seen_bkgd {
+				bg = f32(img.background.([3]u16)[0]);
+			}
+
+			for len(p16) > 0 {
+				r := p16[0];
+				if seen_bkgd {
+					alpha := f32(p16[1]) / f32(65535);
+					c := u16(f32(r) * alpha + (1.0 - alpha) * bg);
+					o16[0] = c;
+					o16[1] = c;
+					o16[2] = c;
+					/*
+						After BG blending, the pixel is now fully opaque.
+						Update the value we'll write to the output alpha.
+					*/
+					p16[1] = 65535;
+				} else if premultiply {
+					alpha := p16[1];
+					c := u16(f32(r) * f32(alpha) / f32(65535));
+					o16[0] = c;
+					o16[1] = c;
+					o16[2] = c;
+				} else {
+					o16[0] = r;
+					o16[1] = r;
+					o16[2] = r;
+				}
+
+				if out_image_channels == 4 {
+					o16[3] = p16[1];
+				}
+
+				p16 = p16[2:];
+				o16 = o16[out_image_channels:];
+			}
+		case 3:
+			/*
+				Color without Alpha.
+				We may still have a tRNS chunk or `.alpha_add_if_missing`.
+			*/
+
+			key: []u16;
+			if seen_trns {
+				key = mem.slice_data_cast([]u16, trns.data);
+			}
+
+			for len(p16) > 0 {
+				r     := p16[0];
+				g     := p16[1];
+				b     := p16[2];
+
+				alpha := u16(1); // Default to full opaque
+
+				if seen_trns {
+					if r == key[0] && g == key[1] && b == key[2] {
+						if seen_bkgd {
+							c := img.background.([3]u16);
+							r = c[0];
+							g = c[1];
+							b = c[2];
+						} else {
+							alpha = 0; // Keyed transparency
+						}
+					}
+				}
+
+				if premultiply {
+					o16[0] = r * alpha;
+					o16[1] = g * alpha;
+					o16[2] = b * alpha;
+				} else {
+					o16[0] = r;
+					o16[1] = g;
+					o16[2] = b;
+				}
+
+				if out_image_channels == 4 {
+					o16[3] = alpha * 65535;
+				}
+
+				p16 = p16[3:];
+				o16 = o16[out_image_channels:];
+			}
+		case 4:
+			// Color with Alpha, can't have tRNS.
+			for len(p16) > 0 {
+				r     := p16[0];
+				g     := p16[1];
+				b     := p16[2];
+				a     := p16[3];
+
+				if seen_bkgd {
+					alpha := f32(a) / 65535.0;
+					c  := img.background.([3]u16);
+					rb := f32(c[0]) * (1.0 - alpha);
+					gb := f32(c[1]) * (1.0 - alpha);
+					bb := f32(c[2]) * (1.0 - alpha);
+
+					o16[0] = u16(f32(r) * alpha + rb);
+					o16[1] = u16(f32(g) * alpha + gb);
+					o16[2] = u16(f32(b) * alpha + bb);
+					/*
+						After BG blending, the pixel is now fully opaque.
+						Update the value we'll write to the output alpha.
+					*/
+					a = 65535;
+				} else if premultiply {
+					alpha := f32(a) / 65535.0;
+					o16[0] = u16(f32(r) * alpha);
+					o16[1] = u16(f32(g) * alpha);
+					o16[2] = u16(f32(b) * alpha);
+				} else {
+					o16[0] = r;
+					o16[1] = g;
+					o16[2] = b;
+				}
+
+				if out_image_channels == 4 {
+					o16[3] = a;
+				}
+
+				p16 = p16[4:];
+				o16 = o16[out_image_channels:];
+			}
+		case:
+			unreachable("We should never seen # channels other than 1-4 inclusive.");
+		}
+
+		img.pixels = t;
+		img.channels = out_image_channels;
+
+	} else if img.depth == 8 {
+		// Check if we need to do something.
+		if raw_image_channels == out_image_channels {
+			// If we have 3 in and 3 out, or 4 in and 4 out without premultiplication...
+			if !premultiply {
+				// Then we're done.
+				return img, nil;
+			}
+		}
+
+		temp := img.pixels;
+		defer bytes.buffer_destroy(&temp);
+
+		// We need to create a new image buffer
+		dest_raw_size := compute_buffer_size(int(header.width), int(header.height), out_image_channels, 8);
+		t := bytes.Buffer{};
+		resize(&t.buf, dest_raw_size);
+
+		p := mem.slice_data_cast([]u8, temp.buf[:]);
+		o := mem.slice_data_cast([]u8, t.buf[:]);
+
+		switch (raw_image_channels) {
+		case 1:
+			// Gray without Alpha. Might have tRNS alpha.
+			key   := u8(0);
+			if seen_trns {
+				key = u8(mem.slice_data_cast([]u16be, trns.data)[0]);
+			}
+
+			for len(p) > 0 {
+				r     := p[0];
+				alpha := u8(1);
+
+				if seen_trns {
+					if r == key {
+						if seen_bkgd {
+							bc := img.background.([3]u16);
+							r = u8(bc[0]);
+						} else {
+							alpha = 0; // Keyed transparency
+						}
+					}
+					if premultiply {
+						r *= alpha;
+					}
+				}
+				o[0] = r;
+				o[1] = r;
+				o[2] = r;
+
+				if out_image_channels == 4 {
+					o[3] = alpha * 255;
+				}
+
+				p = p[1:];
+				o = o[out_image_channels:];
+			}
+		case 2:
+			// Gray with alpha, we shouldn't have a tRNS chunk.
+			bg := f32(0.0);
+			if seen_bkgd {
+				bg = f32(img.background.([3]u16)[0]);
+			}
+
+			for len(p) > 0 {
+				r := p[0];
+				if seen_bkgd {
+					alpha := f32(p[1]) / f32(255);
+					c := u8(f32(r) * alpha + (1.0 - alpha) * bg);
+					o[0] = c;
+					o[1] = c;
+					o[2] = c;
+					/*
+						After BG blending, the pixel is now fully opaque.
+						Update the value we'll write to the output alpha.
+					*/
+					p[1] = 255;
+				} else if .alpha_premultiply in options {
+					alpha := p[1];
+					c := u8(f32(r) * f32(alpha) / f32(255));
+					o[0] = c;
+					o[1] = c;
+					o[2] = c;
+				} else {
+					o[0] = r;
+					o[1] = r;
+					o[2] = r;
+				}
+
+				if out_image_channels == 4 {
+					o[3] = p[1];
+				}
+
+				p = p[2:];
+				o = o[out_image_channels:];
+			}
+		case 3:
+			// Color without Alpha. We may still have a tRNS chunk
+			key: []u8;
+			if seen_trns {
+				/*
+					For 8-bit images, the tRNS chunk still contains a triple in u16be.
+					We use only the low byte in this case.
+				*/
+				key = []u8{trns.data[1], trns.data[3], trns.data[5]};
+			}
+
+			for len(p) > 0 {
+				r     := p[0];
+				g     := p[1];
+				b     := p[2];
+
+				alpha := u8(1); // Default to full opaque
+
+				if seen_trns {
+					if r == key[0] && g == key[1] && b == key[2] {
+						if seen_bkgd {
+							c := img.background.([3]u16);
+							r = u8(c[0]);
+							g = u8(c[1]);
+							b = u8(c[2]);
+						} else {
+							alpha = 0; // Keyed transparency
+						}
+					}
+
+					if premultiply {
+						r *= alpha;
+						g *= alpha;
+						b *= alpha;
+					}
+				}
+
+				o[0] = r;
+				o[1] = g;
+				o[2] = b;
+
+				if out_image_channels == 4 {
+					o[3] = alpha * 255;
+				}
+
+				p = p[3:];
+				o = o[out_image_channels:];
+			}
+		case 4:
+			// Color with Alpha, can't have tRNS.
+			for len(p) > 0 {
+				r     := p[0];
+				g     := p[1];
+				b     := p[2];
+				a     := p[3];
+				if seen_bkgd {
+					alpha := f32(a) / 255.0;
+					c  := img.background.([3]u16);
+					rb := f32(c[0]) * (1.0 - alpha);
+					gb := f32(c[1]) * (1.0 - alpha);
+					bb := f32(c[2]) * (1.0 - alpha);
+
+					o[0] = u8(f32(r) * alpha + rb);
+					o[1] = u8(f32(g) * alpha + gb);
+					o[2] = u8(f32(b) * alpha + bb);
+					/*
+						After BG blending, the pixel is now fully opaque.
+						Update the value we'll write to the output alpha.
+					*/
+					a = 255;
+				} else if premultiply {
+					alpha := f32(a) / 255.0;
+					o[0] = u8(f32(r) * alpha);
+					o[1] = u8(f32(g) * alpha);
+					o[2] = u8(f32(b) * alpha);
+				} else {
+					o[0] = r;
+					o[1] = g;
+					o[2] = b;
+				}
+
+				if out_image_channels == 4 {
+					o[3] = a;
+				}
+
+				p = p[4:];
+				o = o[out_image_channels:];
+			}
+		case:
+			unreachable("We should never seen # channels other than 1-4 inclusive.");
+		}
+
+		img.pixels = t;
+		img.channels = out_image_channels;
+
+	} else {
+		/*
+			This may change if we ever don't expand 1, 2 and 4 bit images. But, those raw
+			returns will likely bypass this processing pipeline.
+		*/
+		unreachable("We should never see bit depths other than 8, 16 and 'Paletted' here.");
+	}
+
+	return img, nil;
+}
+
+
+filter_paeth :: #force_inline proc(left, up, up_left: u8) -> u8 {
+	aa, bb, cc := i16(left), i16(up), i16(up_left);
+	p  := aa + bb - cc;
+	pa := abs(p - aa);
+	pb := abs(p - bb);
+	pc := abs(p - cc);
+	if pa <= pb && pa <= pc {
+		return left;
+	}
+	if pb <= pc {
+		return up;
+	}
+	return up_left;
+}
+
+Filter_Params :: struct #packed {
+	src:      []u8,
+	dest:     []u8,
+	width:    int,
+	height:   int,
+	depth:    int,
+	channels: int,
+	rescale:  bool,
+}
+
+depth_scale_table :: []u8{0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01};
+
+// @(optimization_mode="speed")
+defilter_8 :: proc(params: ^Filter_Params) -> (ok: bool) {
+
+	using params;
+	row_stride := channels * width;
+
+	// TODO: See about doing a Duff's #unroll where practicable
+
+	// Apron so we don't need to special case first rows.
+	up := make([]u8, row_stride, context.temp_allocator);
+	ok = true;
+
+	for _ in 0..<height {
+		nk := row_stride - channels;
+
+		filter := Row_Filter(src[0]); src = src[1:];
+		switch(filter) {
+		case .None:
+			copy(dest, src[:row_stride]);
+		case .Sub:
+			for i := 0; i < channels; i += 1 {
+				dest[i] = src[i];
+			}
+			for k := 0; k < nk; k += 1 {
+				dest[channels+k] = (src[channels+k] + dest[k]) & 255;
+			}
+		case .Up:
+			for k := 0; k < row_stride; k += 1 {
+				dest[k] = (src[k] + up[k]) & 255;
+			}
+		case .Average:
+			for i := 0; i < channels; i += 1 {
+				avg := up[i] >> 1;
+				dest[i] = (src[i] + avg) & 255;
+			}
+			for k := 0; k < nk; k += 1 {
+				avg := u8((u16(up[channels+k]) + u16(dest[k])) >> 1);
+				dest[channels+k] = (src[channels+k] + avg) & 255;
+			}
+		case .Paeth:
+			for i := 0; i < channels; i += 1 {
+				paeth := filter_paeth(0, up[i], 0);
+				dest[i] = (src[i] + paeth) & 255;
+			}
+			for k := 0; k < nk; k += 1 {
+				paeth := filter_paeth(dest[k], up[channels+k], up[k]);
+				dest[channels+k] = (src[channels+k] + paeth) & 255;
+			}
+		case:
+			return false;
+		}
+
+		src     = src[row_stride:];
+		up      = dest;
+		dest    = dest[row_stride:];
+	}
+	return;
+}
+
+// @(optimization_mode="speed")
+defilter_less_than_8 :: proc(params: ^Filter_Params) -> (ok: bool) #no_bounds_check {
+
+	using params;
+	ok = true;
+
+	row_stride_in  := ((channels * width * depth) + 7) >> 3;
+	row_stride_out := channels * width;
+
+	// Store defiltered bytes rightmost so we can widen in-place.
+	row_offset := row_stride_out - row_stride_in;
+	// Save original dest because we'll need it for the bit widening.
+	orig_dest := dest;
+
+	// TODO: See about doing a Duff's #unroll where practicable
+
+	// Apron so we don't need to special case first rows.
+	up := make([]u8, row_stride_out, context.temp_allocator);
+
+	#no_bounds_check for _ in 0..<height {
+		nk := row_stride_in - channels;
+
+		dest = dest[row_offset:];
+
+		filter := Row_Filter(src[0]); src = src[1:];
+		switch filter {
+		case .None:
+			copy(dest, src[:row_stride_in]);
+		case .Sub:
+			for i in 0..channels {
+				dest[i] = src[i];
+			}
+			for k in 0..nk {
+				dest[channels+k] = (src[channels+k] + dest[k]) & 255;
+			}
+		case .Up:
+			for k in 0..row_stride_in {
+				dest[k] = (src[k] + up[k]) & 255;
+			}
+		case .Average:
+			for i in 0..channels {
+				avg := up[i] >> 1;
+				dest[i] = (src[i] + avg) & 255;
+			}
+			for k in 0..nk {
+				avg := u8((u16(up[channels+k]) + u16(dest[k])) >> 1);
+				dest[channels+k] = (src[channels+k] + avg) & 255;
+			}
+		case .Paeth:
+			for i in 0..channels {
+				paeth := filter_paeth(0, up[i], 0);
+				dest[i] = (src[i] + paeth) & 255;
+			}
+			for k in 0..nk {
+				paeth := filter_paeth(dest[k], up[channels+k], up[k]);
+				dest[channels+k] = (src[channels+k] + paeth) & 255;
+			}
+		case:
+			return false;
+		}
+
+		src   = src [row_stride_in:];
+		up    = dest;
+		dest  = dest[row_stride_in:];
+	}
+
+	// Let's expand the bits
+	dest = orig_dest;
+
+	// Don't rescale the bits if we're a paletted image.
+	dsc := depth_scale_table;
+	scale := rescale ? dsc[depth] : 1;
+
+	/*
+		For sBIT support we should probably set scale to 1 and mask the significant bits.
+		Seperately, do we want to support packed pixels? i.e defiltering only, no expansion?
+		If so, all we have to do is call defilter_8 for that case and not set img.depth to 8.
+	*/
+
+	for j := 0; j < height; j += 1 {
+		src = dest[row_offset:];
+
+		switch depth {
+		case 4:
+			k := row_stride_out;
+			for ; k >= 2; k -= 2 {
+				c := src[0];
+				dest[0] = scale * (c >> 4);
+				dest[1] = scale * (c & 15);
+				dest = dest[2:]; src = src[1:];
+			}
+			if k > 0 {
+				c := src[0];
+				dest[0] = scale * (c >> 4);
+				dest = dest[1:];
+			}
+		case 2:
+			k := row_stride_out;
+			for ; k >= 4; k -= 4 {
+				c := src[0];
+				dest[0] = scale * ((c >> 6)    );
+				dest[1] = scale * ((c >> 4) & 3);
+				dest[2] = scale * ((c >> 2) & 3);
+				dest[3] = scale * ((c     ) & 3);
+				dest = dest[4:]; src = src[1:];
+			}
+			if k > 0 {
+				c := src[0];
+				dest[0] = scale * ((c >> 6)    );
+				if k > 1 {
+					dest[1] = scale * ((c >> 4) & 3);
+				}
+				if k > 2 {
+					dest[2] = scale * ((c >> 2) & 3);
+				}
+				dest = dest[k:];
+			}
+		case 1:
+			k := row_stride_out;
+			for ; k >= 8; k -= 8 {
+				c := src[0];
+				dest[0] = scale * ((c >> 7)    );
+				dest[1] = scale * ((c >> 6) & 1);
+				dest[2] = scale * ((c >> 5) & 1);
+				dest[3] = scale * ((c >> 4) & 1);
+				dest[4] = scale * ((c >> 3) & 1);
+				dest[5] = scale * ((c >> 2) & 1);
+				dest[6] = scale * ((c >> 1) & 1);
+				dest[7] = scale * ((c     ) & 1);
+				dest = dest[8:]; src = src[1:];
+			}
+			if k > 0 {
+				c := src[0];
+				dest[0] = scale * ((c >> 7)    );
+				if k > 1 {
+					dest[1] = scale * ((c >> 6) & 1);
+				}
+				if k > 2 {
+					dest[2] = scale * ((c >> 5) & 1);
+				}
+				if k > 3 {
+					dest[3] = scale * ((c >> 4) & 1);
+				}
+				if k > 4 {
+					dest[4] = scale * ((c >> 3) & 1);
+				}
+				if k > 5 {
+					dest[5] = scale * ((c >> 2) & 1);
+				}
+				if k > 6 {
+					dest[6] = scale * ((c >> 1) & 1);
+				}
+				dest = dest[k:];
+
+			}
+
+		}
+	}
+
+	return;
+}
+
+// @(optimization_mode="speed")
+defilter_16 :: proc(params: ^Filter_Params) -> (ok: bool) {
+
+	using params;
+	ok = true;
+
+	stride := channels * 2;
+	row_stride := width * stride;
+
+	// TODO: See about doing a Duff's #unroll where practicable
+	// Apron so we don't need to special case first rows.
+	up := make([]u8, row_stride, context.temp_allocator);
+
+	for y := 0; y < height; y += 1 {
+		nk := row_stride - stride;
+
+		filter := Row_Filter(src[0]); src = src[1:];
+		switch filter {
+		case .None:
+			copy(dest, src[:row_stride]);
+		case .Sub:
+			for i := 0; i < stride; i += 1 {
+				dest[i] = src[i];
+			}
+			for k := 0; k < nk; k += 1 {
+				dest[stride+k] = (src[stride+k] + dest[k]) & 255;
+			}
+		case .Up:
+			for k := 0; k < row_stride; k += 1 {
+				dest[k] = (src[k] + up[k]) & 255;
+			}
+		case .Average:
+			for i := 0; i < stride; i += 1 {
+				avg := up[i] >> 1;
+				dest[i] = (src[i] + avg) & 255;
+			}
+			for k := 0; k < nk; k += 1 {
+				avg := u8((u16(up[stride+k]) + u16(dest[k])) >> 1);
+				dest[stride+k] = (src[stride+k] + avg) & 255;
+			}
+		case .Paeth:
+			for i := 0; i < stride; i += 1 {
+				paeth := filter_paeth(0, up[i], 0);
+				dest[i] = (src[i] + paeth) & 255;
+			}
+			for k := 0; k < nk; k += 1 {
+				paeth := filter_paeth(dest[k], up[stride+k], up[k]);
+				dest[stride+k] = (src[stride+k] + paeth) & 255;
+			}
+		case:
+			return false;
+		}
+
+		src     = src[row_stride:];
+		up      = dest;
+		dest    = dest[row_stride:];
+	}
+
+	return;
+}
+
+defilter :: proc(img: ^Image, filter_bytes: ^bytes.Buffer, header: ^IHDR, options: Options) -> (err: compress.Error) {
+	input    := bytes.buffer_to_bytes(filter_bytes);
+	width    := int(header.width);
+	height   := int(header.height);
+	channels := int(img.channels);
+	depth    := int(header.bit_depth);
+	rescale  := .Color not_in header.color_type;
+
+	bytes_per_channel := depth == 16 ? 2 : 1;
+
+	num_bytes := compute_buffer_size(width, height, channels, depth == 16 ? 16 : 8);
+	resize(&img.pixels.buf, num_bytes);
+
+	filter_ok: bool;
+
+	if header.interlace_method != .Adam7 {
+		params := Filter_Params{
+			src      = input,
+			width    = width,
+			height   = height,
+			channels = channels,
+			depth    = depth,
+			rescale  = rescale,
+			dest     = img.pixels.buf[:],
+		};
+
+		if depth == 8 {
+			filter_ok = defilter_8(&params);
+		} else if depth < 8 {
+			filter_ok = defilter_less_than_8(&params);
+			img.depth = 8;
+		} else {
+			filter_ok = defilter_16(&params);
+		}
+		if !filter_ok {
+			// Caller will destroy buffer for us.
+			return E_PNG.Unknown_Filter_Method;
+		}
+	} else {
+		/*
+			For deinterlacing we need to make a temporary buffer, defiilter part of the image,
+			and copy that back into the actual output buffer.
+		*/
+
+		for p := 0; p < 7; p += 1 {
+			i,j,x,y: int;
+			x = (width  - ADAM7_X_ORIG[p] + ADAM7_X_SPACING[p] - 1) / ADAM7_X_SPACING[p];
+			y = (height - ADAM7_Y_ORIG[p] + ADAM7_Y_SPACING[p] - 1) / ADAM7_Y_SPACING[p];
+			if (x > 0 && y > 0) {
+				temp: bytes.Buffer;
+				temp_len := compute_buffer_size(x, y, channels, depth == 16 ? 16 : 8);
+				resize(&temp.buf, temp_len);
+
+				params := Filter_Params{
+					src      = input,
+					width    = x,
+					height   = y,
+					channels = channels,
+					depth    = depth,
+					rescale  = rescale,
+					dest     = temp.buf[:],
+				};
+
+				if depth == 8 {
+					filter_ok = defilter_8(&params);
+				} else if depth < 8 {
+					filter_ok = defilter_less_than_8(&params);
+					img.depth = 8;
+				} else {
+					filter_ok = defilter_16(&params);
+				}
+
+				if !filter_ok {
+					// Caller will destroy buffer for us.
+					return E_PNG.Unknown_Filter_Method;
+				}
+
+				t := temp.buf[:];
+				for j = 0; j < y; j += 1 {
+					for i = 0; i < x; i += 1 {
+						out_y := j * ADAM7_Y_SPACING[p] + ADAM7_Y_ORIG[p];
+						out_x := i * ADAM7_X_SPACING[p] + ADAM7_X_ORIG[p];
+
+						out_off := out_y * width * channels * bytes_per_channel;
+						out_off += out_x * channels * bytes_per_channel;
+
+						for z := 0; z < channels * bytes_per_channel; z += 1 {
+							img.pixels.buf[out_off + z] = t[z];
+						}
+						t = t[channels * bytes_per_channel:];
+					}
+				}
+				bytes.buffer_destroy(&temp);
+				input_stride := compute_buffer_size(x, y, channels, depth, 1);
+				input = input[input_stride:];
+			}
+		}
+	}
+	when ODIN_ENDIAN == "little" {
+		if img.depth == 16 {
+			// The pixel components are in Big Endian. Let's byteswap.
+			input  := mem.slice_data_cast([]u16be, img.pixels.buf[:]);
+			output := mem.slice_data_cast([]u16  , img.pixels.buf[:]);
+			#no_bounds_check for v, i in input {
+				output[i] = u16(v);
+			}
+		}
+	}
+
+	return nil;
+}
+
+load :: proc{load_from_file, load_from_slice, load_from_stream};
diff --git a/core/intrinsics/intrinsics.odin b/core/intrinsics/intrinsics.odin
index ac916a693..60b595aab 100644
--- a/core/intrinsics/intrinsics.odin
+++ b/core/intrinsics/intrinsics.odin
@@ -12,7 +12,33 @@ volatile_store :: proc(dst: ^$T, val: T) -> T ---
 
 // Trapping
 debug_trap :: proc() ---
-trap :: proc() -> ! ---
+trap       :: proc() -> ! ---
+
+// Instructions
+
+alloca             :: proc(size, align: int) -> ^u8 ---
+cpu_relax          :: proc() ---
+read_cycle_counter :: proc() -> i64 ---
+
+count_ones           :: proc(x: $T) -> T where type_is_integer(T) ---
+count_zeros          :: proc(x: $T) -> T where type_is_integer(T) ---
+count_trailing_zeros :: proc(x: $T) -> T where type_is_integer(T) ---
+count_leading_zeros  :: proc(x: $T) -> T where type_is_integer(T) ---
+reverse_bits         :: proc(x: $T) -> T where type_is_integer(T) ---
+byte_swap            :: proc(x: $T) -> T where type_is_integer(T) || type_is_float(T) ---
+
+overflow_add :: proc(lhs, rhs: $T) -> (T, bool) #optional_ok ---
+overflow_sub :: proc(lhs, rhs: $T) -> (T, bool) #optional_ok ---
+overflow_mul :: proc(lhs, rhs: $T) -> (T, bool) #optional_ok ---
+
+fixed_point_mul     :: proc(lhs, rhs: $T, #const scale: uint) -> T where type_is_integer(T) ---
+fixed_point_div     :: proc(lhs, rhs: $T, #const scale: uint) -> T where type_is_integer(T) ---
+fixed_point_mul_sat :: proc(lhs, rhs: $T, #const scale: uint) -> T where type_is_integer(T) ---
+fixed_point_div_sat :: proc(lhs, rhs: $T, #const scale: uint) -> T where type_is_integer(T) ---
+
+// Compiler Hints
+expect :: proc(val, expected_val: T) -> T ---
+
 
 // Atomics
 atomic_fence        :: proc() ---
@@ -67,36 +93,25 @@ atomic_xchg_rel     :: proc(dst; ^$T, val: T) -> T ---
 atomic_xchg_acqrel  :: proc(dst; ^$T, val: T) -> T ---
 atomic_xchg_relaxed :: proc(dst; ^$T, val: T) -> T ---
 
-atomic_cxchg                    :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchg_acq                :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchg_rel                :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchg_acqrel             :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchg_relaxed            :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchg_failrelaxed        :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchg_failacq            :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchg_acq_failrelaxed    :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchg_acqrel_failrelaxed :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-
-atomic_cxchgweak                    :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchgweak_acq                :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchgweak_rel                :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchgweak_acqrel             :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchgweak_relaxed            :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchgweak_failrelaxed        :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchgweak_failacq            :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchgweak_acq_failrelaxed    :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-atomic_cxchgweak_acqrel_failrelaxed :: proc(dst: ^$T, old, new: T) -> (T, /*option*/bool) ---
-
-// Instructions
-
-alloca             :: proc(size, align: int) -> ^u8 ---
-cpu_relax          :: proc() ---
-read_cycle_counter :: proc() -> i64 ---
-
-
-// Compiler Hints
-expect :: proc(val, expected_val: T) -> T ---
-
+atomic_cxchg                    :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchg_acq                :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchg_rel                :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchg_acqrel             :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchg_relaxed            :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchg_failrelaxed        :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchg_failacq            :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchg_acq_failrelaxed    :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchg_acqrel_failrelaxed :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+
+atomic_cxchgweak                    :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchgweak_acq                :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchgweak_rel                :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchgweak_acqrel             :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchgweak_relaxed            :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchgweak_failrelaxed        :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchgweak_failacq            :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchgweak_acq_failrelaxed    :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
+atomic_cxchgweak_acqrel_failrelaxed :: proc(dst: ^$T, old, new: T) -> (T, bool) #optional_ok ---
 
 // Constant type tests
 
@@ -144,6 +159,7 @@ type_is_simd_vector      :: proc($T: typeid) -> bool ---
 type_has_nil :: proc($T: typeid) -> bool ---
 
 type_is_specialization_of :: proc($T, $S: typeid) -> bool ---
+type_is_variant_of :: proc($U, $V: typeid) -> bool where type_is_union(U) ---
 
 type_has_field :: proc($T: typeid, $name: string) -> bool ---
 
@@ -159,5 +175,5 @@ type_polymorphic_record_parameter_value :: proc($T: typeid, index: int) -> $V --
 
 type_field_index_of :: proc($T: typeid, $name: string) -> uintptr ---
 
-type_equal_proc  :: proc($T: typeid) -> (equal:  proc "contextless" (rawptr, rawptr) -> bool) ---
-type_hasher_proc :: proc($T: typeid) -> (hasher: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr) ---
+type_equal_proc  :: proc($T: typeid) -> (equal:  proc "contextless" (rawptr, rawptr) -> bool)                 where type_is_comparable(T) ---
+type_hasher_proc :: proc($T: typeid) -> (hasher: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr) where type_is_comparable(T) ---
diff --git a/core/math/rand/rand.odin b/core/math/rand/rand.odin
index 4f6e7474f..f5558bb8c 100644
--- a/core/math/rand/rand.odin
+++ b/core/math/rand/rand.odin
@@ -6,9 +6,9 @@ Rand :: struct {
 }
 
 
-@(private, static)
+@(private)
 _GLOBAL_SEED_DATA := 1234567890;
-@(private, static)
+@(private)
 global_rand := create(u64(uintptr(&_GLOBAL_SEED_DATA)));
 
 set_global_seed :: proc(seed: u64) {
diff --git a/core/mem/alloc.odin b/core/mem/alloc.odin
index 0df68255f..0da7a9708 100644
--- a/core/mem/alloc.odin
+++ b/core/mem/alloc.odin
@@ -22,7 +22,7 @@ Allocator_Mode_Set :: distinct bit_set[Allocator_Mode];
 Allocator_Query_Info :: runtime.Allocator_Query_Info;
 /*
 Allocator_Query_Info :: struct {
-	pointer:   Maybe(rawptr),
+	pointer:   rawptr,
 	size:      Maybe(int),
 	alignment: Maybe(int),
 }
diff --git a/core/mem/mem.odin b/core/mem/mem.odin
index ddf9e9637..ecf232557 100644
--- a/core/mem/mem.odin
+++ b/core/mem/mem.odin
@@ -142,6 +142,7 @@ slice_ptr :: proc(ptr: ^$T, len: int) -> []T {
 byte_slice :: #force_inline proc "contextless" (data: rawptr, len: int) -> []byte {
 	return transmute([]u8)Raw_Slice{data=data, len=max(len, 0)};
 }
+@(deprecated="use byte_slice")
 slice_ptr_to_bytes :: proc(data: rawptr, len: int) -> []byte {
 	return transmute([]u8)Raw_Slice{data=data, len=max(len, 0)};
 }
diff --git a/core/odin/ast/ast.odin b/core/odin/ast/ast.odin
index 0d015f9bb..cf2cdeacc 100644
--- a/core/odin/ast/ast.odin
+++ b/core/odin/ast/ast.odin
@@ -69,7 +69,7 @@ File :: struct {
 	pkg: ^Package,
 
 	fullpath: string,
-	src:      []byte,
+	src:      string,
 
 	docs: ^Comment_Group,
 
diff --git a/core/odin/parser/parse_files.odin b/core/odin/parser/parse_files.odin
index 99275777c..f622c9781 100644
--- a/core/odin/parser/parse_files.odin
+++ b/core/odin/parser/parse_files.odin
@@ -39,7 +39,7 @@ collect_package :: proc(path: string) -> (pkg: ^ast.Package, success: bool) {
 		}
 		file := ast.new(ast.File, NO_POS, NO_POS);
 		file.pkg = pkg;
-		file.src = src;
+		file.src = string(src);
 		file.fullpath = fullpath;
 		pkg.files[fullpath] = file;
 	}
diff --git a/core/odin/parser/parser.odin b/core/odin/parser/parser.odin
index 51bb3a261..890ebe86d 100644
--- a/core/odin/parser/parser.odin
+++ b/core/odin/parser/parser.odin
@@ -8,10 +8,21 @@ import "core:fmt"
 Warning_Handler :: #type proc(pos: tokenizer.Pos, fmt: string, args: ..any);
 Error_Handler   :: #type proc(pos: tokenizer.Pos, fmt: string, args: ..any);
 
+Flag :: enum u32 {
+	Optional_Semicolons,
+}
+
+Flags :: distinct bit_set[Flag; u32];
+
+
 Parser :: struct {
 	file: ^ast.File,
 	tok: tokenizer.Tokenizer,
 
+	// If .Optional_Semicolons is true, semicolons are completely as statement terminators
+	// different to .Insert_Semicolon in tok.flags
+	flags: Flags,
+
 	warn: Warning_Handler,
 	err:  Error_Handler,
 
@@ -100,8 +111,9 @@ end_pos :: proc(tok: tokenizer.Token) -> tokenizer.Pos {
 	return pos;
 }
 
-default_parser :: proc() -> Parser {
+default_parser :: proc(flags := Flags{}) -> Parser {
 	return Parser {
+		flags = flags,
 		err  = default_error_handler,
 		warn = default_warning_handler,
 	};
@@ -128,6 +140,10 @@ parse_file :: proc(p: ^Parser, file: ^ast.File) -> bool {
 		p.line_comment     = nil;
 	}
 
+	if .Optional_Semicolons in p.flags {
+		p.tok.flags += {.Insert_Semicolon};
+	}
+
 	p.file = file;
 	tokenizer.init(&p.tok, file.src, file.fullpath, p.err);
 	if p.tok.ch <= 0 {
@@ -400,6 +416,11 @@ is_semicolon_optional_for_node :: proc(p: ^Parser, node: ^ast.Node) -> bool {
 	if node == nil {
 		return false;
 	}
+
+	if .Optional_Semicolons in p.flags {
+		return true;
+	}
+
 	switch n in node.derived {
 	case ast.Empty_Stmt, ast.Block_Stmt:
 		return true;
@@ -439,14 +460,34 @@ is_semicolon_optional_for_node :: proc(p: ^Parser, node: ^ast.Node) -> bool {
 	return false;
 }
 
+expect_semicolon_newline_error :: proc(p: ^Parser, token: tokenizer.Token, s: ^ast.Node) {
+	if .Optional_Semicolons not_in p.flags && .Insert_Semicolon in p.tok.flags && token.text == "\n" {
+		#partial switch token.kind {
+		case .Close_Brace:
+		case .Close_Paren:
+		case .Else:
+			return;
+		}
+		if is_semicolon_optional_for_node(p, s) {
+			return;
+		}
+
+		tok := token;
+		tok.pos.column -= 1;
+		error(p, tok.pos, "expected ';', got newline");
+	}
+}
+
 
 expect_semicolon :: proc(p: ^Parser, node: ^ast.Node) -> bool {
 	if allow_token(p, .Semicolon) {
+		expect_semicolon_newline_error(p, p.prev_tok, node);
 		return true;
 	}
 
 	prev := p.prev_tok;
 	if prev.kind == .Semicolon {
+		expect_semicolon_newline_error(p, p.prev_tok, node);
 		return true;
 	}
 
@@ -615,7 +656,7 @@ parse_if_stmt :: proc(p: ^Parser) -> ^ast.If_Stmt {
 		cond = parse_expr(p, false);
 	} else {
 		init = parse_simple_stmt(p, nil);
-		if allow_token(p, .Semicolon) {
+		if parse_control_statement_semicolon_separator(p) {
 			cond = parse_expr(p, false);
 		} else {
 			cond = convert_stmt_to_expr(p, init, "boolean expression");
@@ -668,6 +709,18 @@ parse_if_stmt :: proc(p: ^Parser) -> ^ast.If_Stmt {
 	return if_stmt;
 }
 
+parse_control_statement_semicolon_separator :: proc(p: ^Parser) -> bool {
+	tok := peek_token(p);
+	if tok.kind != .Open_Brace {
+		return allow_token(p, .Semicolon);
+	}
+	if tok.text == ";" {
+		return allow_token(p, .Semicolon);
+	}
+	return false;
+
+}
+
 parse_for_stmt :: proc(p: ^Parser) -> ^ast.Stmt {
 	if p.curr_proc == nil {
 		error(p, p.curr_tok.pos, "you cannot use a for statement in the file scope");
@@ -716,7 +769,7 @@ parse_for_stmt :: proc(p: ^Parser) -> ^ast.Stmt {
 			}
 		}
 
-		if !is_range && allow_token(p, .Semicolon) {
+		if !is_range && parse_control_statement_semicolon_separator(p) {
 			init = cond;
 			cond = nil;
 			if p.curr_tok.kind != .Semicolon {
@@ -820,7 +873,7 @@ parse_switch_stmt :: proc(p: ^Parser) -> ^ast.Stmt {
 			tag = parse_simple_stmt(p, {Stmt_Allow_Flag.In});
 			if as, ok := tag.derived.(ast.Assign_Stmt); ok && as.op.kind == .In {
 				is_type_switch = true;
-			} else if allow_token(p, .Semicolon) {
+			} else if parse_control_statement_semicolon_separator(p) {
 				init = tag;
 				tag = nil;
 				if p.curr_tok.kind != .Open_Brace {
@@ -831,6 +884,7 @@ parse_switch_stmt :: proc(p: ^Parser) -> ^ast.Stmt {
 	}
 
 
+	skip_possible_newline(p);
 	open := expect_token(p, .Open_Brace);
 
 	for p.curr_tok.kind == .Case {
@@ -958,6 +1012,7 @@ parse_foreign_block :: proc(p: ^Parser, tok: tokenizer.Token) -> ^ast.Foreign_Bl
 	defer p.in_foreign_block = prev_in_foreign_block;
 	p.in_foreign_block = true;
 
+	skip_possible_newline_for_literal(p);
 	open := expect_token(p, .Open_Brace);
 	for p.curr_tok.kind != .Close_Brace && p.curr_tok.kind != .EOF {
 		decl := parse_foreign_block_decl(p);
@@ -1287,7 +1342,7 @@ token_precedence :: proc(p: ^Parser, kind: tokenizer.Token_Kind) -> int {
 	#partial switch kind {
 	case .Question, .If, .When:
 		return 1;
-	case .Ellipsis, .Range_Half:
+	case .Ellipsis, .Range_Half, .Range_Full:
 		if !p.allow_range {
 			return 0;
 		}
@@ -2234,6 +2289,8 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr {
 		}
 		body: ^ast.Stmt;
 
+		skip_possible_newline_for_literal(p);
+
 		if allow_token(p, .Undef) {
 			body = nil;
 			if where_token.kind != .Invalid {
@@ -2406,6 +2463,7 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr {
 			p.expr_level = where_prev_level;
 		}
 
+		skip_possible_newline_for_literal(p);
 		expect_token(p, .Open_Brace);
 		fields, name_count = parse_field_list(p, .Close_Brace, ast.Field_Flags_Struct);
 		close := expect_token(p, .Close_Brace);
@@ -2474,6 +2532,7 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr {
 
 		variants: [dynamic]^ast.Expr;
 
+		skip_possible_newline_for_literal(p);
 		expect_token_after(p, .Open_Brace, "union");
 
 		for p.curr_tok.kind != .Close_Brace && p.curr_tok.kind != .EOF {
@@ -2504,6 +2563,8 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr {
 		if p.curr_tok.kind != .Open_Brace {
 			base_type = parse_type(p);
 		}
+
+		skip_possible_newline_for_literal(p);
 		open := expect_token(p, .Open_Brace);
 		fields := parse_elem_list(p);
 		close := expect_token(p, .Close_Brace);
@@ -2602,6 +2663,7 @@ parse_operand :: proc(p: ^Parser, lhs: bool) -> ^ast.Expr {
 			}
 		}
 
+		skip_possible_newline_for_literal(p);
 		open := expect_token(p, .Open_Brace);
 		asm_string := parse_expr(p, false);
 		expect_token(p, .Comma);
@@ -2812,7 +2874,7 @@ parse_atom_expr :: proc(p: ^Parser, value: ^ast.Expr, lhs: bool) -> (operand: ^a
 			open := expect_token(p, .Open_Bracket);
 
 			#partial switch p.curr_tok.kind {
-			case .Colon, .Ellipsis, .Range_Half:
+			case .Colon, .Ellipsis, .Range_Half, .Range_Full:
 				// NOTE(bill): Do not err yet
 				break;
 			case:
@@ -2820,7 +2882,7 @@ parse_atom_expr :: proc(p: ^Parser, value: ^ast.Expr, lhs: bool) -> (operand: ^a
 			}
 
 			#partial switch p.curr_tok.kind {
-			case .Ellipsis, .Range_Half:
+			case .Ellipsis, .Range_Half, .Range_Full:
 				error(p, p.curr_tok.pos, "expected a colon, not a range");
 				fallthrough;
 			case .Colon:
diff --git a/core/odin/tokenizer/token.odin b/core/odin/tokenizer/token.odin
index 1b37bae23..88908d7f8 100644
--- a/core/odin/tokenizer/token.odin
+++ b/core/odin/tokenizer/token.odin
@@ -107,6 +107,7 @@ Token_Kind :: enum u32 {
 		Comma,         // ,
 		Ellipsis,      // ..
 		Range_Half,    // ..<
+		Range_Full,    // ..=
 		Back_Slash,    // \
 	B_Operator_End,
 
@@ -233,6 +234,7 @@ tokens := [Token_Kind.COUNT]string {
 	",",
 	"..",
 	"..<",
+	"..=",
 	"\\",
 	"",
 
diff --git a/core/odin/tokenizer/tokenizer.odin b/core/odin/tokenizer/tokenizer.odin
index b1b446192..e0cc6dcd3 100644
--- a/core/odin/tokenizer/tokenizer.odin
+++ b/core/odin/tokenizer/tokenizer.odin
@@ -14,7 +14,7 @@ Flags :: distinct bit_set[Flag; u32];
 Tokenizer :: struct {
 	// Immutable data
 	path: string,
-	src:  []byte,
+	src:  string,
 	err:  Error_Handler,
 
 	flags: Flags,
@@ -31,7 +31,7 @@ Tokenizer :: struct {
 	error_count: int,
 }
 
-init :: proc(t: ^Tokenizer, src: []byte, path: string, err: Error_Handler = default_error_handler) {
+init :: proc(t: ^Tokenizer, src: string, path: string, err: Error_Handler = default_error_handler) {
 	t.src = src;
 	t.err = err;
 	t.ch = ' ';
@@ -87,7 +87,7 @@ advance_rune :: proc(using t: ^Tokenizer) {
 		case r == 0:
 			error(t, t.offset, "illegal character NUL");
 		case r >= utf8.RUNE_SELF:
-			r, w = utf8.decode_rune(src[read_offset:]);
+			r, w = utf8.decode_rune_in_string(src[read_offset:]);
 			if r == utf8.RUNE_ERROR && w == 1 {
 				error(t, t.offset, "illegal UTF-8 encoding");
 			} else if r == utf8.RUNE_BOM && offset > 0 {
@@ -623,6 +623,9 @@ scan :: proc(t: ^Tokenizer) -> Token {
 					if t.ch == '<' {
 						advance_rune(t);
 						kind = .Range_Half;
+					} else if t.ch == '=' {
+						advance_rune(t);
+						kind = .Range_Full;
 					}
 				}
 			}
diff --git a/core/os/os2/errors.odin b/core/os/os2/errors.odin
index 00cd600a8..2fc49deed 100644
--- a/core/os/os2/errors.odin
+++ b/core/os/os2/errors.odin
@@ -1,11 +1,8 @@
 package os2
 
-Platform_Error_Min_Bits :: 32;
+import "core:io"
 
-Error :: enum u64 {
-	None = 0,
-
-	// General Errors
+General_Error :: enum u32 {
 	Invalid_Argument,
 
 	Permission_Denied,
@@ -13,42 +10,19 @@ Error :: enum u64 {
 	Not_Exist,
 	Closed,
 
-	// Timeout Errors
 	Timeout,
+}
 
-	// I/O Errors
-	// EOF is the error returned by `read` when no more input is available
-	EOF,
-
-	// Unexpected_EOF means that EOF was encountered in the middle of reading a fixed-sized block of data
-	Unexpected_EOF,
-
-	// Short_Write means that a write accepted fewer bytes than requested but failed to return an explicit error
-	Short_Write,
-
-	// Invalid_Write means that a write returned an impossible count
-	Invalid_Write,
-
-	// Short_Buffer means that a read required a longer buffer than was provided
-	Short_Buffer,
-
-	// No_Progress is returned by some implementations of `io.Reader` when many calls
-	// to `read` have failed to return any data or error.
-	// This is usually a signed of a broken `io.Reader` implementation
-	No_Progress,
-
-	Invalid_Whence,
-	Invalid_Offset,
-	Invalid_Unread,
-
-	Negative_Read,
-	Negative_Write,
-	Negative_Count,
-	Buffer_Full,
+Platform_Error :: struct {
+	err: i32,
+}
 
-	// Platform Specific Errors
-	Platform_Minimum = 1<<Platform_Error_Min_Bits,
+Error :: union {
+	General_Error,
+	io.Error,
+	Platform_Error,
 }
+#assert(size_of(Error) == size_of(u64));
 
 Path_Error :: struct {
 	op:   string,
@@ -83,20 +57,17 @@ link_error_delete :: proc(lerr: Maybe(Link_Error)) {
 
 
 is_platform_error :: proc(ferr: Error) -> (err: i32, ok: bool) {
-	if ferr >= .Platform_Minimum {
-		err = i32(u64(ferr)>>Platform_Error_Min_Bits);
-		ok = true;
+	v: Platform_Error;
+	if v, ok = ferr.(Platform_Error); ok {
+		err = v.err;
 	}
 	return;
 }
 
-error_from_platform_error :: proc(errno: i32) -> Error {
-	return Error(u64(errno) << Platform_Error_Min_Bits);
-}
 
 error_string :: proc(ferr: Error) -> string {
-	#partial switch ferr {
-	case .None:              return "";
+	switch ferr {
+	case nil:                return "";
 	case .Invalid_Argument:  return "invalid argument";
 	case .Permission_Denied: return "permission denied";
 	case .Exist:             return "file already exists";
diff --git a/core/os/os2/file_stream.odin b/core/os/os2/file_stream.odin
index 6877faea4..52f5b30e9 100644
--- a/core/os/os2/file_stream.odin
+++ b/core/os/os2/file_stream.odin
@@ -10,23 +10,14 @@ file_to_stream :: proc(fd: Handle) -> (s: io.Stream) {
 
 @(private)
 error_to_io_error :: proc(ferr: Error) -> io.Error {
-	#partial switch ferr {
-	case .None:           return .None;
-	case .EOF:            return .EOF;
-	case .Unexpected_EOF: return .Unexpected_EOF;
-	case .Short_Write:    return .Short_Write;
-	case .Invalid_Write:  return .Invalid_Write;
-	case .Short_Buffer:   return .Short_Buffer;
-	case .No_Progress:    return .No_Progress;
-	case .Invalid_Whence: return .Invalid_Whence;
-	case .Invalid_Offset: return .Invalid_Offset;
-	case .Invalid_Unread: return .Invalid_Unread;
-	case .Negative_Read:  return .Negative_Read;
-	case .Negative_Write: return .Negative_Write;
-	case .Negative_Count: return .Negative_Count;
-	case .Buffer_Full:    return .Buffer_Full;
+	if ferr == nil {
+		return .None;
 	}
-	return .Unknown;
+	err, ok := ferr.(io.Error);
+	if !ok {
+		err = .Unknown;
+	}
+	return err;
 }
 
 
diff --git a/core/os/os2/file_util.odin b/core/os/os2/file_util.odin
index 435eba3ab..db6842cf8 100644
--- a/core/os/os2/file_util.odin
+++ b/core/os/os2/file_util.odin
@@ -1,6 +1,7 @@
 package os2
 
 import "core:mem"
+import "core:io"
 import "core:strconv"
 import "core:unicode/utf8"
 
diff --git a/core/os/os2/file_windows.odin b/core/os/os2/file_windows.odin
index 97fe6b3d9..5e87d80a4 100644
--- a/core/os/os2/file_windows.odin
+++ b/core/os/os2/file_windows.odin
@@ -5,19 +5,19 @@ import "core:io"
 import "core:time"
 
 _create :: proc(name: string) -> (Handle, Error) {
-	return 0, .None;
+	return 0, nil;
 }
 
 _open :: proc(name: string) -> (Handle, Error) {
-	return 0, .None;
+	return 0, nil;
 }
 
 _open_file :: proc(name: string, flag: int, perm: File_Mode) -> (Handle, Error) {
-	return 0, .None;
+	return 0, nil;
 }
 
 _close :: proc(fd: Handle) -> Error {
-	return .None;
+	return nil;
 }
 
 _name :: proc(fd: Handle, allocator := context.allocator) -> string {
@@ -58,11 +58,11 @@ _file_size :: proc(fd: Handle) -> (n: i64, err: Error) {
 
 
 _sync :: proc(fd: Handle) -> Error {
-	return .None;
+	return nil;
 }
 
 _flush :: proc(fd: Handle) -> Error {
-	return .None;
+	return nil;
 }
 
 _truncate :: proc(fd: Handle, size: i64) -> Maybe(Path_Error) {
@@ -92,20 +92,20 @@ _read_link :: proc(name: string) -> (string, Maybe(Path_Error)) {
 
 
 _chdir :: proc(fd: Handle) -> Error {
-	return .None;
+	return nil;
 }
 
 _chmod :: proc(fd: Handle, mode: File_Mode) -> Error {
-	return .None;
+	return nil;
 }
 
 _chown :: proc(fd: Handle, uid, gid: int) -> Error {
-	return .None;
+	return nil;
 }
 
 
 _lchown :: proc(name: string, uid, gid: int) -> Error {
-	return .None;
+	return nil;
 }
 
 
diff --git a/core/os/os2/pipe_windows.odin b/core/os/os2/pipe_windows.odin
index 68adb6c3b..04750bf88 100644
--- a/core/os/os2/pipe_windows.odin
+++ b/core/os/os2/pipe_windows.odin
@@ -6,7 +6,7 @@ import win32 "core:sys/windows"
 _pipe :: proc() -> (r, w: Handle, err: Error) {
 	p: [2]win32.HANDLE;
 	if !win32.CreatePipe(&p[0], &p[1], nil, 0) {
-		return 0, 0, error_from_platform_error(i32(win32.GetLastError()));
+		return 0, 0, Platform_Error{i32(win32.GetLastError())};
 	}
 	return Handle(p[0]), Handle(p[1]), nil;
 }
diff --git a/core/os/os2/stat_windows.odin b/core/os/os2/stat_windows.odin
index ed739b894..48811340a 100644
--- a/core/os/os2/stat_windows.odin
+++ b/core/os/os2/stat_windows.odin
@@ -40,7 +40,7 @@ _same_file :: proc(fi1, fi2: File_Info) -> bool {
 
 
 _stat_errno :: proc(errno: win32.DWORD) -> Path_Error {
-	return Path_Error{err = error_from_platform_error(i32(errno))};
+	return Path_Error{err = Platform_Error{i32(errno)}};
 }
 
 
@@ -89,7 +89,7 @@ internal_stat :: proc(name: string, create_file_attributes: u32, allocator := co
 		fd: win32.WIN32_FIND_DATAW;
 		sh := win32.FindFirstFileW(wname, &fd);
 		if sh == win32.INVALID_HANDLE_VALUE {
-			e = Path_Error{err = error_from_platform_error(i32(win32.GetLastError()))};
+			e = Path_Error{err = Platform_Error{i32(win32.GetLastError())}};
 			return;
 		}
 		win32.FindClose(sh);
@@ -99,7 +99,7 @@ internal_stat :: proc(name: string, create_file_attributes: u32, allocator := co
 
 	h := win32.CreateFileW(wname, 0, 0, nil, win32.OPEN_EXISTING, create_file_attributes, nil);
 	if h == win32.INVALID_HANDLE_VALUE {
-		e = Path_Error{err = error_from_platform_error(i32(win32.GetLastError()))};
+		e = Path_Error{err = Platform_Error{i32(win32.GetLastError())}};
 		return;
 	}
 	defer win32.CloseHandle(h);
diff --git a/core/os/os2/temp_file_windows.odin b/core/os/os2/temp_file_windows.odin
index 19dca1b04..dd050ab48 100644
--- a/core/os/os2/temp_file_windows.odin
+++ b/core/os/os2/temp_file_windows.odin
@@ -4,11 +4,11 @@ package os2
 import win32 "core:sys/windows"
 
 _create_temp :: proc(dir, pattern: string) -> (Handle, Error) {
-	return 0, .None;
+	return 0, nil;
 }
 
 _mkdir_temp :: proc(dir, pattern: string, allocator := context.allocator) -> (string, Error) {
-	return "", .None;
+	return "", nil;
 }
 
 _temp_dir :: proc(allocator := context.allocator) -> string {
diff --git a/core/os/os_freebsd.odin b/core/os/os_freebsd.odin
index 137c6f864..2afa8bd14 100644
--- a/core/os/os_freebsd.odin
+++ b/core/os/os_freebsd.odin
@@ -10,7 +10,7 @@ import "core:c"
 Handle :: distinct i32;
 File_Time :: distinct u64;
 Errno :: distinct i32;
-Syscall :: distinct int;
+Syscall :: distinct i32;
 
 INVALID_HANDLE :: ~Handle(0);
 
diff --git a/core/os/os_linux.odin b/core/os/os_linux.odin
index dd0914f40..7569909d7 100644
--- a/core/os/os_linux.odin
+++ b/core/os/os_linux.odin
@@ -11,7 +11,7 @@ import "core:strconv"
 Handle    :: distinct i32;
 File_Time :: distinct u64;
 Errno     :: distinct i32;
-Syscall   :: distinct int;
+Syscall   :: distinct i32;
 
 INVALID_HANDLE :: ~Handle(0);
 
@@ -269,7 +269,7 @@ SYS_GETTID: Syscall : 186;
 
 foreign libc {
 	@(link_name="__errno_location") __errno_location    :: proc() -> ^int ---;
-	@(link_name="syscall")          syscall             :: proc(number: Syscall, #c_vararg args: ..any) -> int ---;
+	@(link_name="syscall")          syscall             :: proc(number: Syscall, #c_vararg args: ..any) -> i32 ---;
 
 	@(link_name="open")             _unix_open          :: proc(path: cstring, flags: c.int, mode: c.int) -> Handle ---;
 	@(link_name="close")            _unix_close         :: proc(fd: Handle) -> c.int ---;
@@ -595,7 +595,7 @@ exit :: proc "contextless" (code: int) -> ! {
 }
 
 current_thread_id :: proc "contextless" () -> int {
-	return syscall(SYS_GETTID);
+	return cast(int)syscall(SYS_GETTID);
 }
 
 dlopen :: proc(filename: string, flags: int) -> rawptr {
diff --git a/core/runtime/core.odin b/core/runtime/core.odin
index 0033aad9a..cb526ed2d 100644
--- a/core/runtime/core.odin
+++ b/core/runtime/core.odin
@@ -32,6 +32,7 @@ Calling_Convention :: enum u8 {
 	Fast_Call   = 5,
 
 	None        = 6,
+	Naked       = 7,
 }
 
 Type_Info_Enum_Value :: distinct i64;
@@ -120,6 +121,9 @@ Type_Info_Union :: struct {
 	variants:     []^Type_Info,
 	tag_offset:   uintptr,
 	tag_type:     ^Type_Info,
+
+	equal: Equal_Proc, // set only when the struct has .Comparable set but does not have .Simple_Compare set
+
 	custom_align: bool,
 	no_nil:       bool,
 	maybe:        bool,
diff --git a/core/runtime/internal.odin b/core/runtime/internal.odin
index 0e128567a..8a7b22ca4 100644
--- a/core/runtime/internal.odin
+++ b/core/runtime/internal.odin
@@ -105,17 +105,9 @@ mem_copy :: proc "contextless" (dst, src: rawptr, len: int) -> rawptr {
 	if src == nil {
 		return dst;
 	}
+
 	// NOTE(bill): This _must_ be implemented like C's memmove
-	foreign _ {
-		when size_of(rawptr) == 8 {
-			@(link_name="llvm.memmove.p0i8.p0i8.i64")
-			llvm_memmove :: proc "none" (dst, src: rawptr, len: int, is_volatile: bool = false) ---;
-		} else {
-			@(link_name="llvm.memmove.p0i8.p0i8.i32")
-			llvm_memmove :: proc "none" (dst, src: rawptr, len: int, is_volatile: bool = false) ---;
-		}
-	}
-	llvm_memmove(dst, src, len);
+	intrinsics.mem_copy(dst, src, len);
 	return dst;
 }
 
@@ -123,17 +115,9 @@ mem_copy_non_overlapping :: proc "contextless" (dst, src: rawptr, len: int) -> r
 	if src == nil {
 		return dst;
 	}
+
 	// NOTE(bill): This _must_ be implemented like C's memcpy
-	foreign _ {
-		when size_of(rawptr) == 8 {
-			@(link_name="llvm.memcpy.p0i8.p0i8.i64")
-			llvm_memcpy :: proc "none" (dst, src: rawptr, len: int, is_volatile: bool = false) ---;
-		} else {
-			@(link_name="llvm.memcpy.p0i8.p0i8.i32")
-			llvm_memcpy :: proc "none" (dst, src: rawptr, len: int, is_volatile: bool = false) ---;
-		}
-	}
-	llvm_memcpy(dst, src, len);
+	intrinsics.mem_copy_non_overlapping(dst, src, len);
 	return dst;
 }
 
@@ -409,11 +393,6 @@ string_decode_rune :: #force_inline proc "contextless" (s: string) -> (rune, int
 	return rune(s0&MASK4)<<18 | rune(b1&MASKX)<<12 | rune(b2&MASKX)<<6 | rune(b3&MASKX), 4;
 }
 
-@(default_calling_convention = "none")
-foreign {
-	@(link_name="llvm.sqrt.f32") _sqrt_f32 :: proc(x: f32) -> f32 ---
-	@(link_name="llvm.sqrt.f64") _sqrt_f64 :: proc(x: f64) -> f64 ---
-}
 abs_f16 :: #force_inline proc "contextless" (x: f16) -> f16 {
 	return -x if x < 0 else x;
 }
@@ -445,27 +424,27 @@ max_f64 :: proc(a, b: f64) -> f64 {
 
 abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 {
 	r, i := real(x), imag(x);
-	return f16(_sqrt_f32(f32(r*r + i*i)));
+	return f16(intrinsics.sqrt(f32(r*r + i*i)));
 }
 abs_complex64 :: #force_inline proc "contextless" (x: complex64) -> f32 {
 	r, i := real(x), imag(x);
-	return _sqrt_f32(r*r + i*i);
+	return intrinsics.sqrt(r*r + i*i);
 }
 abs_complex128 :: #force_inline proc "contextless" (x: complex128) -> f64 {
 	r, i := real(x), imag(x);
-	return _sqrt_f64(r*r + i*i);
+	return intrinsics.sqrt(r*r + i*i);
 }
 abs_quaternion64 :: #force_inline proc "contextless" (x: quaternion64) -> f16 {
 	r, i, j, k := real(x), imag(x), jmag(x), kmag(x);
-	return f16(_sqrt_f32(f32(r*r + i*i + j*j + k*k)));
+	return f16(intrinsics.sqrt(f32(r*r + i*i + j*j + k*k)));
 }
 abs_quaternion128 :: #force_inline proc "contextless" (x: quaternion128) -> f32 {
 	r, i, j, k := real(x), imag(x), jmag(x), kmag(x);
-	return _sqrt_f32(r*r + i*i + j*j + k*k);
+	return intrinsics.sqrt(r*r + i*i + j*j + k*k);
 }
 abs_quaternion256 :: #force_inline proc "contextless" (x: quaternion256) -> f64 {
 	r, i, j, k := real(x), imag(x), jmag(x), kmag(x);
-	return _sqrt_f64(r*r + i*i + j*j + k*k);
+	return intrinsics.sqrt(r*r + i*i + j*j + k*k);
 }
 
 
diff --git a/core/runtime/udivmod128.odin b/core/runtime/udivmod128.odin
index e4b7380d3..fff856ab6 100644
--- a/core/runtime/udivmod128.odin
+++ b/core/runtime/udivmod128.odin
@@ -11,7 +11,7 @@ udivmod128 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
 	q, r: [2]u64 = ---, ---;
 	sr: u32 = 0;
 
-	low  :: ODIN_ENDIAN == "big" ? 1 : 0;
+	low  :: 1 when ODIN_ENDIAN == "big" else 0;
 	high :: 1 - low;
 	U64_BITS :: 8*size_of(u64);
 	U128_BITS :: 8*size_of(u128);
diff --git a/core/strings/builder.odin b/core/strings/builder.odin
index dd7fd4f1e..843f79381 100644
--- a/core/strings/builder.odin
+++ b/core/strings/builder.odin
@@ -221,7 +221,7 @@ pop_rune :: proc(b: ^Builder) -> (r: rune, width: int) {
 }
 
 
-@(private, static)
+@(private)
 DIGITS_LOWER := "0123456789abcdefx";
 
 write_quoted_string :: proc{
diff --git a/core/sync/sync2/atomic.odin b/core/sync/sync2/atomic.odin
index 1f8e2f3a8..fa86ec352 100644
--- a/core/sync/sync2/atomic.odin
+++ b/core/sync/sync2/atomic.odin
@@ -2,78 +2,76 @@ package sync2
 
 import "intrinsics"
 
-// TODO(bill): Is this even a good design? The intrinsics seem to be more than good enough and just as clean
-
 cpu_relax :: intrinsics.cpu_relax;
 
-atomic_fence        :: intrinsics.atomic_fence;
-atomic_fence_acq    :: intrinsics.atomic_fence_acq;
-atomic_fence_rel    :: intrinsics.atomic_fence_rel;
-atomic_fence_acqrel :: intrinsics.atomic_fence_acqrel;
+atomic_fence         :: intrinsics.atomic_fence;
+atomic_fence_acquire :: intrinsics.atomic_fence_acq;
+atomic_fence_release :: intrinsics.atomic_fence_rel;
+atomic_fence_acqrel  :: intrinsics.atomic_fence_acqrel;
 
 atomic_store           :: intrinsics.atomic_store;
-atomic_store_rel       :: intrinsics.atomic_store_rel;
+atomic_store_release   :: intrinsics.atomic_store_rel;
 atomic_store_relaxed   :: intrinsics.atomic_store_relaxed;
 atomic_store_unordered :: intrinsics.atomic_store_unordered;
 
 atomic_load           :: intrinsics.atomic_load;
-atomic_load_acq       :: intrinsics.atomic_load_acq;
+atomic_load_acquire   :: intrinsics.atomic_load_acq;
 atomic_load_relaxed   :: intrinsics.atomic_load_relaxed;
 atomic_load_unordered :: intrinsics.atomic_load_unordered;
 
 atomic_add          :: intrinsics.atomic_add;
-atomic_add_acq      :: intrinsics.atomic_add_acq;
-atomic_add_rel      :: intrinsics.atomic_add_rel;
+atomic_add_acquire  :: intrinsics.atomic_add_acq;
+atomic_add_release  :: intrinsics.atomic_add_rel;
 atomic_add_acqrel   :: intrinsics.atomic_add_acqrel;
 atomic_add_relaxed  :: intrinsics.atomic_add_relaxed;
 atomic_sub          :: intrinsics.atomic_sub;
-atomic_sub_acq      :: intrinsics.atomic_sub_acq;
-atomic_sub_rel      :: intrinsics.atomic_sub_rel;
+atomic_sub_acquire  :: intrinsics.atomic_sub_acq;
+atomic_sub_release  :: intrinsics.atomic_sub_rel;
 atomic_sub_acqrel   :: intrinsics.atomic_sub_acqrel;
 atomic_sub_relaxed  :: intrinsics.atomic_sub_relaxed;
 atomic_and          :: intrinsics.atomic_and;
-atomic_and_acq      :: intrinsics.atomic_and_acq;
-atomic_and_rel      :: intrinsics.atomic_and_rel;
+atomic_and_acquire  :: intrinsics.atomic_and_acq;
+atomic_and_release  :: intrinsics.atomic_and_rel;
 atomic_and_acqrel   :: intrinsics.atomic_and_acqrel;
 atomic_and_relaxed  :: intrinsics.atomic_and_relaxed;
 atomic_nand         :: intrinsics.atomic_nand;
-atomic_nand_acq     :: intrinsics.atomic_nand_acq;
-atomic_nand_rel     :: intrinsics.atomic_nand_rel;
+atomic_nand_acquire :: intrinsics.atomic_nand_acq;
+atomic_nand_release :: intrinsics.atomic_nand_rel;
 atomic_nand_acqrel  :: intrinsics.atomic_nand_acqrel;
 atomic_nand_relaxed :: intrinsics.atomic_nand_relaxed;
 atomic_or           :: intrinsics.atomic_or;
-atomic_or_acq       :: intrinsics.atomic_or_acq;
-atomic_or_rel       :: intrinsics.atomic_or_rel;
+atomic_or_acquire   :: intrinsics.atomic_or_acq;
+atomic_or_release   :: intrinsics.atomic_or_rel;
 atomic_or_acqrel    :: intrinsics.atomic_or_acqrel;
 atomic_or_relaxed   :: intrinsics.atomic_or_relaxed;
 atomic_xor          :: intrinsics.atomic_xor;
-atomic_xor_acq      :: intrinsics.atomic_xor_acq;
-atomic_xor_rel      :: intrinsics.atomic_xor_rel;
+atomic_xor_acquire  :: intrinsics.atomic_xor_acq;
+atomic_xor_release  :: intrinsics.atomic_xor_rel;
 atomic_xor_acqrel   :: intrinsics.atomic_xor_acqrel;
 atomic_xor_relaxed  :: intrinsics.atomic_xor_relaxed;
 
-atomic_xchg         :: intrinsics.atomic_xchg;
-atomic_xchg_acq     :: intrinsics.atomic_xchg_acq;
-atomic_xchg_rel     :: intrinsics.atomic_xchg_rel;
-atomic_xchg_acqrel  :: intrinsics.atomic_xchg_acqrel;
-atomic_xchg_relaxed :: intrinsics.atomic_xchg_relaxed;
+atomic_exchange         :: intrinsics.atomic_xchg;
+atomic_exchange_acquire :: intrinsics.atomic_xchg_acq;
+atomic_exchange_release :: intrinsics.atomic_xchg_rel;
+atomic_exchange_acqrel  :: intrinsics.atomic_xchg_acqrel;
+atomic_exchange_relaxed :: intrinsics.atomic_xchg_relaxed;
 
-atomic_cxchg                    :: intrinsics.atomic_cxchg;
-atomic_cxchg_acq                :: intrinsics.atomic_cxchg_acq;
-atomic_cxchg_rel                :: intrinsics.atomic_cxchg_rel;
-atomic_cxchg_acqrel             :: intrinsics.atomic_cxchg_acqrel;
-atomic_cxchg_relaxed            :: intrinsics.atomic_cxchg_relaxed;
-atomic_cxchg_failrelaxed        :: intrinsics.atomic_cxchg_failrelaxed;
-atomic_cxchg_failacq            :: intrinsics.atomic_cxchg_failacq;
-atomic_cxchg_acq_failrelaxed    :: intrinsics.atomic_cxchg_acq_failrelaxed;
-atomic_cxchg_acqrel_failrelaxed :: intrinsics.atomic_cxchg_acqrel_failrelaxed;
+atomic_compare_exchange_strong                     :: intrinsics.atomic_cxchg;
+atomic_compare_exchange_strong_acquire             :: intrinsics.atomic_cxchg_acq;
+atomic_compare_exchange_strong_release             :: intrinsics.atomic_cxchg_rel;
+atomic_compare_exchange_strong_acqrel              :: intrinsics.atomic_cxchg_acqrel;
+atomic_compare_exchange_strong_relaxed             :: intrinsics.atomic_cxchg_relaxed;
+atomic_compare_exchange_strong_failrelaxed         :: intrinsics.atomic_cxchg_failrelaxed;
+atomic_compare_exchange_strong_failacquire         :: intrinsics.atomic_cxchg_failacq;
+atomic_compare_exchange_strong_acquire_failrelaxed :: intrinsics.atomic_cxchg_acq_failrelaxed;
+atomic_compare_exchange_strong_acqrel_failrelaxed  :: intrinsics.atomic_cxchg_acqrel_failrelaxed;
 
-atomic_cxchgweak                    :: intrinsics.atomic_cxchgweak;
-atomic_cxchgweak_acq                :: intrinsics.atomic_cxchgweak_acq;
-atomic_cxchgweak_rel                :: intrinsics.atomic_cxchgweak_rel;
-atomic_cxchgweak_acqrel             :: intrinsics.atomic_cxchgweak_acqrel;
-atomic_cxchgweak_relaxed            :: intrinsics.atomic_cxchgweak_relaxed;
-atomic_cxchgweak_failrelaxed        :: intrinsics.atomic_cxchgweak_failrelaxed;
-atomic_cxchgweak_failacq            :: intrinsics.atomic_cxchgweak_failacq;
-atomic_cxchgweak_acq_failrelaxed    :: intrinsics.atomic_cxchgweak_acq_failrelaxed;
-atomic_cxchgweak_acqrel_failrelaxed :: intrinsics.atomic_cxchgweak_acqrel_failrelaxed;
+atomic_compare_exchange_weak                     :: intrinsics.atomic_cxchgweak;
+atomic_compare_exchange_weak_acquire             :: intrinsics.atomic_cxchgweak_acq;
+atomic_compare_exchange_weak_release             :: intrinsics.atomic_cxchgweak_rel;
+atomic_compare_exchange_weak_acqrel              :: intrinsics.atomic_cxchgweak_acqrel;
+atomic_compare_exchange_weak_relaxed             :: intrinsics.atomic_cxchgweak_relaxed;
+atomic_compare_exchange_weak_failrelaxed         :: intrinsics.atomic_cxchgweak_failrelaxed;
+atomic_compare_exchange_weak_failacquire         :: intrinsics.atomic_cxchgweak_failacq;
+atomic_compare_exchange_weak_acquire_failrelaxed :: intrinsics.atomic_cxchgweak_acq_failrelaxed;
+atomic_compare_exchange_weak_acqrel_failrelaxed  :: intrinsics.atomic_cxchgweak_acqrel_failrelaxed;
diff --git a/core/sync/sync2/channel.odin b/core/sync/sync2/channel.odin
deleted file mode 100644
index fc30d8280..000000000
--- a/core/sync/sync2/channel.odin
+++ /dev/null
@@ -1,886 +0,0 @@
-package sync2
-
-// TODO(bill): The Channel implementation needs a complete rewrite for this new package sync design
-// Especially how the `select` things work
-
-import "core:mem"
-import "core:time"
-import "core:math/rand"
-
-_, _ :: time, rand;
-
-Channel_Direction :: enum i8 {
-	Both =  0,
-	Send = +1,
-	Recv = -1,
-}
-
-Channel :: struct(T: typeid, Direction := Channel_Direction.Both) {
-	using _internal: ^Raw_Channel,
-}
-
-channel_init :: proc(ch: ^$C/Channel($T, $D), cap := 0, allocator := context.allocator) {
-	context.allocator = allocator;
-	ch._internal = raw_channel_create(size_of(T), align_of(T), cap);
-	return;
-}
-
-channel_make :: proc($T: typeid, cap := 0, allocator := context.allocator) -> (ch: Channel(T, .Both)) {
-	context.allocator = allocator;
-	ch._internal = raw_channel_create(size_of(T), align_of(T), cap);
-	return;
-}
-
-channel_make_send :: proc($T: typeid, cap := 0, allocator := context.allocator) -> (ch: Channel(T, .Send)) {
-	context.allocator = allocator;
-	ch._internal = raw_channel_create(size_of(T), align_of(T), cap);
-	return;
-}
-channel_make_recv :: proc($T: typeid, cap := 0, allocator := context.allocator) -> (ch: Channel(T, .Recv)) {
-	context.allocator = allocator;
-	ch._internal = raw_channel_create(size_of(T), align_of(T), cap);
-	return;
-}
-
-channel_destroy :: proc(ch: $C/Channel($T, $D)) {
-	raw_channel_destroy(ch._internal);
-}
-
-channel_as_send :: proc(ch: $C/Channel($T, .Both)) -> (res: Channel(T, .Send)) {
-	res._internal = ch._internal;
-	return;
-}
-
-channel_as_recv :: proc(ch: $C/Channel($T, .Both)) -> (res: Channel(T, .Recv)) {
-	res._internal = ch._internal;
-	return;
-}
-
-
-channel_len :: proc(ch: $C/Channel($T, $D)) -> int {
-	return ch._internal.len if ch._internal != nil else 0;
-}
-channel_cap :: proc(ch: $C/Channel($T, $D)) -> int {
-	return ch._internal.cap if ch._internal != nil else 0;
-}
-
-
-channel_send :: proc(ch: $C/Channel($T, $D), msg: T, loc := #caller_location) where D >= .Both {
-	msg := msg;
-	_ = raw_channel_send_impl(ch._internal, &msg, /*block*/true, loc);
-}
-channel_try_send :: proc(ch: $C/Channel($T, $D), msg: T, loc := #caller_location) -> bool where D >= .Both {
-	msg := msg;
-	return raw_channel_send_impl(ch._internal, &msg, /*block*/false, loc);
-}
-
-channel_recv :: proc(ch: $C/Channel($T, $D), loc := #caller_location) -> (msg: T) where D <= .Both {
-	c := ch._internal;
-	if c == nil {
-		panic(message="cannot recv message; channel is nil", loc=loc);
-	}
-	mutex_lock(&c.mutex);
-	raw_channel_recv_impl(c, &msg, loc);
-	mutex_unlock(&c.mutex);
-	return;
-}
-channel_try_recv :: proc(ch: $C/Channel($T, $D), loc := #caller_location) -> (msg: T, ok: bool) where D <= .Both {
-	c := ch._internal;
-	if c != nil && mutex_try_lock(&c.mutex) {
-		if c.len > 0 {
-			raw_channel_recv_impl(c, &msg, loc);
-			ok = true;
-		}
-		mutex_unlock(&c.mutex);
-	}
-	return;
-}
-channel_try_recv_ptr :: proc(ch: $C/Channel($T, $D), msg: ^T, loc := #caller_location) -> (ok: bool) where D <= .Both {
-	res: T;
-	res, ok = channel_try_recv(ch, loc);
-	if ok && msg != nil {
-		msg^ = res;
-	}
-	return;
-}
-
-
-channel_is_nil :: proc(ch: $C/Channel($T, $D)) -> bool {
-	return ch._internal == nil;
-}
-channel_is_open :: proc(ch: $C/Channel($T, $D)) -> bool {
-	c := ch._internal;
-	return c != nil && !c.closed;
-}
-
-
-channel_eq :: proc(a, b: $C/Channel($T, $D)) -> bool {
-	return a._internal == b._internal;
-}
-channel_ne :: proc(a, b: $C/Channel($T, $D)) -> bool {
-	return a._internal != b._internal;
-}
-
-
-channel_can_send :: proc(ch: $C/Channel($T, $D)) -> (ok: bool) where D >= .Both {
-	return raw_channel_can_send(ch._internal);
-}
-channel_can_recv :: proc(ch: $C/Channel($T, $D)) -> (ok: bool) where D <= .Both {
-	return raw_channel_can_recv(ch._internal);
-}
-
-
-channel_peek :: proc(ch: $C/Channel($T, $D)) -> int {
-	c := ch._internal;
-	if c == nil {
-		return -1;
-	}
-	if atomic_load(&c.closed) {
-		return -1;
-	}
-	return atomic_load(&c.len);
-}
-
-
-channel_close :: proc(ch: $C/Channel($T, $D), loc := #caller_location) {
-	raw_channel_close(ch._internal, loc);
-}
-
-
-channel_iterator :: proc(ch: $C/Channel($T, $D)) -> (msg: T, ok: bool) where D <= .Both {
-	c := ch._internal;
-	if c == nil {
-		return;
-	}
-
-	if !c.closed || c.len > 0 {
-		msg, ok = channel_recv(ch), true;
-	}
-	return;
-}
-channel_drain :: proc(ch: $C/Channel($T, $D)) where D >= .Both {
-	raw_channel_drain(ch._internal);
-}
-
-
-channel_move :: proc(dst: $C1/Channel($T, $D1) src: $C2/Channel(T, $D2)) where D1 <= .Both, D2 >= .Both {
-	for msg in channel_iterator(src) {
-		channel_send(dst, msg);
-	}
-}
-
-
-Raw_Channel_Wait_Queue :: struct {
-	next: ^Raw_Channel_Wait_Queue,
-	state: ^uintptr,
-}
-
-
-Raw_Channel :: struct {
-	closed:      bool,
-	ready:       bool, // ready to recv
-	data_offset: u16,  // data is stored at the end of this data structure
-	elem_size:   u32,
-	len, cap:    int,
-	read, write: int,
-	mutex:       Mutex,
-	cond:        Cond,
-	allocator:   mem.Allocator,
-
-	sendq: ^Raw_Channel_Wait_Queue,
-	recvq: ^Raw_Channel_Wait_Queue,
-}
-
-raw_channel_wait_queue_insert :: proc(head: ^^Raw_Channel_Wait_Queue, val: ^Raw_Channel_Wait_Queue) {
-	val.next = head^;
-	head^ = val;
-}
-raw_channel_wait_queue_remove :: proc(head: ^^Raw_Channel_Wait_Queue, val: ^Raw_Channel_Wait_Queue) {
-	p := head;
-	for p^ != nil && p^ != val {
-		p = &p^.next;
-	}
-	if p != nil {
-		p^ = p^.next;
-	}
-}
-
-
-raw_channel_create :: proc(elem_size, elem_align: int, cap := 0) -> ^Raw_Channel {
-	assert(int(u32(elem_size)) == elem_size);
-
-	s := size_of(Raw_Channel);
-	s = mem.align_forward_int(s, elem_align);
-	data_offset := uintptr(s);
-	s += elem_size * max(cap, 1);
-
-	a := max(elem_align, align_of(Raw_Channel));
-
-	c := (^Raw_Channel)(mem.alloc(s, a));
-	if c == nil {
-		return nil;
-	}
-
-	c.data_offset = u16(data_offset);
-	c.elem_size = u32(elem_size);
-	c.len, c.cap = 0, max(cap, 0);
-	c.read, c.write = 0, 0;
-	c.allocator = context.allocator;
-	c.closed = false;
-
-	return c;
-}
-
-
-raw_channel_destroy :: proc(c: ^Raw_Channel) {
-	if c == nil {
-		return;
-	}
-	context.allocator = c.allocator;
-	atomic_store(&c.closed, true);
-	free(c);
-}
-
-raw_channel_close :: proc(c: ^Raw_Channel, loc := #caller_location) {
-	if c == nil {
-		panic(message="cannot close nil channel", loc=loc);
-	}
-	mutex_lock(&c.mutex);
-	defer mutex_unlock(&c.mutex);
-	atomic_store(&c.closed, true);
-
-	// Release readers and writers
-	raw_channel_wait_queue_broadcast(c.recvq);
-	raw_channel_wait_queue_broadcast(c.sendq);
-	cond_broadcast(&c.cond);
-}
-
-
-
-raw_channel_send_impl :: proc(c: ^Raw_Channel, msg: rawptr, block: bool, loc := #caller_location) -> bool {
-	send :: proc(c: ^Raw_Channel, src: rawptr) {
-		data := uintptr(c) + uintptr(c.data_offset);
-		dst := data + uintptr(c.write * int(c.elem_size));
-		mem.copy(rawptr(dst), src, int(c.elem_size));
-		c.len += 1;
-		c.write = (c.write + 1) % max(c.cap, 1);
-	}
-
-	switch {
-	case c == nil:
-		panic(message="cannot send message; channel is nil", loc=loc);
-	case c.closed:
-		panic(message="cannot send message; channel is closed", loc=loc);
-	}
-
-	mutex_lock(&c.mutex);
-	defer mutex_unlock(&c.mutex);
-
-	if c.cap > 0 {
-		if !block && c.len >= c.cap {
-			return false;
-		}
-
-		for c.len >= c.cap {
-			cond_wait(&c.cond, &c.mutex);
-		}
-	} else if c.len > 0 { // TODO(bill): determine correct behaviour
-		if !block {
-			return false;
-		}
-		cond_wait(&c.cond, &c.mutex);
-	} else if c.len == 0 && !block {
-		return false;
-	}
-
-	send(c, msg);
-	cond_signal(&c.cond);
-	raw_channel_wait_queue_signal(c.recvq);
-
-	return true;
-}
-
-raw_channel_recv_impl :: proc(c: ^Raw_Channel, res: rawptr, loc := #caller_location) {
-	recv :: proc(c: ^Raw_Channel, dst: rawptr, loc := #caller_location) {
-		if c.len < 1 {
-			panic(message="cannot recv message; channel is empty", loc=loc);
-		}
-		c.len -= 1;
-
-		data := uintptr(c) + uintptr(c.data_offset);
-		src := data + uintptr(c.read * int(c.elem_size));
-		mem.copy(dst, rawptr(src), int(c.elem_size));
-		c.read = (c.read + 1) % max(c.cap, 1);
-	}
-
-	if c == nil {
-		panic(message="cannot recv message; channel is nil", loc=loc);
-	}
-	atomic_store(&c.ready, true);
-	for c.len < 1 {
-		raw_channel_wait_queue_signal(c.sendq);
-		cond_wait(&c.cond, &c.mutex);
-	}
-	atomic_store(&c.ready, false);
-	recv(c, res, loc);
-	if c.cap > 0 {
-		if c.len == c.cap - 1 {
-			// NOTE(bill): Only signal on the last one
-			cond_signal(&c.cond);
-		}
-	} else {
-		cond_signal(&c.cond);
-	}
-}
-
-
-raw_channel_can_send :: proc(c: ^Raw_Channel) -> (ok: bool) {
-	if c == nil {
-		return false;
-	}
-	mutex_lock(&c.mutex);
-	switch {
-	case c.closed:
-		ok = false;
-	case c.cap > 0:
-		ok = c.ready && c.len < c.cap;
-	case:
-		ok = c.ready && c.len == 0;
-	}
-	mutex_unlock(&c.mutex);
-	return;
-}
-raw_channel_can_recv :: proc(c: ^Raw_Channel) -> (ok: bool) {
-	if c == nil {
-		return false;
-	}
-	mutex_lock(&c.mutex);
-	ok = c.len > 0;
-	mutex_unlock(&c.mutex);
-	return;
-}
-
-
-raw_channel_drain :: proc(c: ^Raw_Channel) {
-	if c == nil {
-		return;
-	}
-	mutex_lock(&c.mutex);
-	c.len   = 0;
-	c.read  = 0;
-	c.write = 0;
-	mutex_unlock(&c.mutex);
-}
-
-
-
-MAX_SELECT_CHANNELS :: 64;
-SELECT_MAX_TIMEOUT :: max(time.Duration);
-
-Select_Command :: enum {
-	Recv,
-	Send,
-}
-
-Select_Channel :: struct {
-	channel: ^Raw_Channel,
-	command: Select_Command,
-}
-
-
-
-select :: proc(channels: ..Select_Channel) -> (index: int) {
-	return select_timeout(SELECT_MAX_TIMEOUT, ..channels);
-}
-select_timeout :: proc(timeout: time.Duration, channels: ..Select_Channel) -> (index: int) {
-	switch len(channels) {
-	case 0:
-		panic("sync: select with no channels");
-	}
-
-	assert(len(channels) <= MAX_SELECT_CHANNELS);
-
-	backing: [MAX_SELECT_CHANNELS]int;
-	queues:  [MAX_SELECT_CHANNELS]Raw_Channel_Wait_Queue;
-	candidates := backing[:];
-	cap := len(channels);
-	candidates = candidates[:cap];
-
-	count := u32(0);
-	for c, i in channels {
-		if c.channel == nil {
-			continue;
-		}
-		switch c.command {
-		case .Recv:
-			if raw_channel_can_recv(c.channel) {
-				candidates[count] = i;
-				count += 1;
-			}
-		case .Send:
-			if raw_channel_can_send(c.channel) {
-				candidates[count] = i;
-				count += 1;
-			}
-		}
-	}
-
-	if count == 0 {
-		wait_state: uintptr = 0;
-		for _, i in channels {
-			q := &queues[i];
-			q.state = &wait_state;
-		}
-
-		for c, i in channels {
-			if c.channel == nil {
-				continue;
-			}
-			q := &queues[i];
-			switch c.command {
-			case .Recv: raw_channel_wait_queue_insert(&c.channel.recvq, q);
-			case .Send: raw_channel_wait_queue_insert(&c.channel.sendq, q);
-			}
-		}
-		raw_channel_wait_queue_wait_on(&wait_state, timeout);
-		for c, i in channels {
-			if c.channel == nil {
-				continue;
-			}
-			q := &queues[i];
-			switch c.command {
-			case .Recv: raw_channel_wait_queue_remove(&c.channel.recvq, q);
-			case .Send: raw_channel_wait_queue_remove(&c.channel.sendq, q);
-			}
-		}
-
-		for c, i in channels {
-			switch c.command {
-			case .Recv:
-				if raw_channel_can_recv(c.channel) {
-					candidates[count] = i;
-					count += 1;
-				}
-			case .Send:
-				if raw_channel_can_send(c.channel) {
-					candidates[count] = i;
-					count += 1;
-				}
-			}
-		}
-		if count == 0 && timeout == SELECT_MAX_TIMEOUT {
-			index = -1;
-			return;
-		}
-
-		assert(count != 0);
-	}
-
-	t := time.now();
-	r := rand.create(transmute(u64)t);
-	i := rand.uint32(&r);
-
-	index = candidates[i % count];
-	return;
-}
-
-select_recv :: proc(channels: ..^Raw_Channel) -> (index: int) {
-	switch len(channels) {
-	case 0:
-		panic("sync: select with no channels");
-	}
-
-	assert(len(channels) <= MAX_SELECT_CHANNELS);
-
-	backing: [MAX_SELECT_CHANNELS]int;
-	queues:  [MAX_SELECT_CHANNELS]Raw_Channel_Wait_Queue;
-	candidates := backing[:];
-	cap := len(channels);
-	candidates = candidates[:cap];
-
-	count := u32(0);
-	for c, i in channels {
-		if raw_channel_can_recv(c) {
-			candidates[count] = i;
-			count += 1;
-		}
-	}
-
-	if count == 0 {
-		state: uintptr;
-		for c, i in channels {
-			q := &queues[i];
-			q.state = &state;
-			raw_channel_wait_queue_insert(&c.recvq, q);
-		}
-		raw_channel_wait_queue_wait_on(&state, SELECT_MAX_TIMEOUT);
-		for c, i in channels {
-			q := &queues[i];
-			raw_channel_wait_queue_remove(&c.recvq, q);
-		}
-
-		for c, i in channels {
-			if raw_channel_can_recv(c) {
-				candidates[count] = i;
-				count += 1;
-			}
-		}
-		assert(count != 0);
-	}
-
-	t := time.now();
-	r := rand.create(transmute(u64)t);
-	i := rand.uint32(&r);
-
-	index = candidates[i % count];
-	return;
-}
-
-select_recv_msg :: proc(channels: ..$C/Channel($T, $D)) -> (msg: T, index: int) {
-	switch len(channels) {
-	case 0:
-		panic("sync: select with no channels");
-	}
-
-	assert(len(channels) <= MAX_SELECT_CHANNELS);
-
-	queues:  [MAX_SELECT_CHANNELS]Raw_Channel_Wait_Queue;
-	candidates: [MAX_SELECT_CHANNELS]int;
-
-	count := u32(0);
-	for c, i in channels {
-		if raw_channel_can_recv(c) {
-			candidates[count] = i;
-			count += 1;
-		}
-	}
-
-	if count == 0 {
-		state: uintptr;
-		for c, i in channels {
-			q := &queues[i];
-			q.state = &state;
-			raw_channel_wait_queue_insert(&c.recvq, q);
-		}
-		raw_channel_wait_queue_wait_on(&state, SELECT_MAX_TIMEOUT);
-		for c, i in channels {
-			q := &queues[i];
-			raw_channel_wait_queue_remove(&c.recvq, q);
-		}
-
-		for c, i in channels {
-			if raw_channel_can_recv(c) {
-				candidates[count] = i;
-				count += 1;
-			}
-		}
-		assert(count != 0);
-	}
-
-	t := time.now();
-	r := rand.create(transmute(u64)t);
-	i := rand.uint32(&r);
-
-	index = candidates[i % count];
-	msg = channel_recv(channels[index]);
-
-	return;
-}
-
-select_send_msg :: proc(msg: $T, channels: ..$C/Channel(T, $D)) -> (index: int) {
-	switch len(channels) {
-	case 0:
-		panic("sync: select with no channels");
-	}
-
-	assert(len(channels) <= MAX_SELECT_CHANNELS);
-
-	backing: [MAX_SELECT_CHANNELS]int;
-	queues:  [MAX_SELECT_CHANNELS]Raw_Channel_Wait_Queue;
-	candidates := backing[:];
-	cap := len(channels);
-	candidates = candidates[:cap];
-
-	count := u32(0);
-	for c, i in channels {
-		if raw_channel_can_recv(c) {
-			candidates[count] = i;
-			count += 1;
-		}
-	}
-
-	if count == 0 {
-		state: uintptr;
-		for c, i in channels {
-			q := &queues[i];
-			q.state = &state;
-			raw_channel_wait_queue_insert(&c.recvq, q);
-		}
-		raw_channel_wait_queue_wait_on(&state, SELECT_MAX_TIMEOUT);
-		for c, i in channels {
-			q := &queues[i];
-			raw_channel_wait_queue_remove(&c.recvq, q);
-		}
-
-		for c, i in channels {
-			if raw_channel_can_recv(c) {
-				candidates[count] = i;
-				count += 1;
-			}
-		}
-		assert(count != 0);
-	}
-
-	t := time.now();
-	r := rand.create(transmute(u64)t);
-	i := rand.uint32(&r);
-
-	index = candidates[i % count];
-
-	if msg != nil {
-		channel_send(channels[index], msg);
-	}
-
-	return;
-}
-
-select_send :: proc(channels: ..^Raw_Channel) -> (index: int) {
-	switch len(channels) {
-	case 0:
-		panic("sync: select with no channels");
-	}
-
-	assert(len(channels) <= MAX_SELECT_CHANNELS);
-	candidates: [MAX_SELECT_CHANNELS]int;
-	queues: [MAX_SELECT_CHANNELS]Raw_Channel_Wait_Queue;
-
-	count := u32(0);
-	for c, i in channels {
-		if raw_channel_can_send(c) {
-			candidates[count] = i;
-			count += 1;
-		}
-	}
-
-	if count == 0 {
-		state: uintptr;
-		for c, i in channels {
-			q := &queues[i];
-			q.state = &state;
-			raw_channel_wait_queue_insert(&c.sendq, q);
-		}
-		raw_channel_wait_queue_wait_on(&state, SELECT_MAX_TIMEOUT);
-		for c, i in channels {
-			q := &queues[i];
-			raw_channel_wait_queue_remove(&c.sendq, q);
-		}
-
-		for c, i in channels {
-			if raw_channel_can_send(c) {
-				candidates[count] = i;
-				count += 1;
-			}
-		}
-		assert(count != 0);
-	}
-
-	t := time.now();
-	r := rand.create(transmute(u64)t);
-	i := rand.uint32(&r);
-
-	index = candidates[i % count];
-	return;
-}
-
-select_try :: proc(channels: ..Select_Channel) -> (index: int) {
-	switch len(channels) {
-	case 0:
-		panic("sync: select with no channels");
-	}
-
-	assert(len(channels) <= MAX_SELECT_CHANNELS);
-
-	backing: [MAX_SELECT_CHANNELS]int;
-	candidates := backing[:];
-	cap := len(channels);
-	candidates = candidates[:cap];
-
-	count := u32(0);
-	for c, i in channels {
-		switch c.command {
-		case .Recv:
-			if raw_channel_can_recv(c.channel) {
-				candidates[count] = i;
-				count += 1;
-			}
-		case .Send:
-			if raw_channel_can_send(c.channel) {
-				candidates[count] = i;
-				count += 1;
-			}
-		}
-	}
-
-	if count == 0 {
-		index = -1;
-		return;
-	}
-
-	t := time.now();
-	r := rand.create(transmute(u64)t);
-	i := rand.uint32(&r);
-
-	index = candidates[i % count];
-	return;
-}
-
-
-select_try_recv :: proc(channels: ..^Raw_Channel) -> (index: int) {
-	switch len(channels) {
-	case 0:
-		index = -1;
-		return;
-	case 1:
-		index = -1;
-		if raw_channel_can_recv(channels[0]) {
-			index = 0;
-		}
-		return;
-	}
-
-	assert(len(channels) <= MAX_SELECT_CHANNELS);
-	candidates: [MAX_SELECT_CHANNELS]int;
-
-	count := u32(0);
-	for c, i in channels {
-		if raw_channel_can_recv(c) {
-			candidates[count] = i;
-			count += 1;
-		}
-	}
-
-	if count == 0 {
-		index = -1;
-		return;
-	}
-
-	t := time.now();
-	r := rand.create(transmute(u64)t);
-	i := rand.uint32(&r);
-
-	index = candidates[i % count];
-	return;
-}
-
-
-select_try_send :: proc(channels: ..^Raw_Channel) -> (index: int) #no_bounds_check {
-	switch len(channels) {
-	case 0:
-		return -1;
-	case 1:
-		if raw_channel_can_send(channels[0]) {
-			return 0;
-		}
-		return -1;
-	}
-
-	assert(len(channels) <= MAX_SELECT_CHANNELS);
-	candidates: [MAX_SELECT_CHANNELS]int;
-
-	count := u32(0);
-	for c, i in channels {
-		if raw_channel_can_send(c) {
-			candidates[count] = i;
-			count += 1;
-		}
-	}
-
-	if count == 0 {
-		index = -1;
-		return;
-	}
-
-	t := time.now();
-	r := rand.create(transmute(u64)t);
-	i := rand.uint32(&r);
-
-	index = candidates[i % count];
-	return;
-}
-
-select_try_recv_msg :: proc(channels: ..$C/Channel($T, $D)) -> (msg: T, index: int) {
-	switch len(channels) {
-	case 0:
-		index = -1;
-		return;
-	case 1:
-		ok: bool;
-		if msg, ok = channel_try_recv(channels[0]); ok {
-			index = 0;
-		}
-		return;
-	}
-
-	assert(len(channels) <= MAX_SELECT_CHANNELS);
-	candidates: [MAX_SELECT_CHANNELS]int;
-
-	count := u32(0);
-	for c, i in channels {
-		if channel_can_recv(c) {
-			candidates[count] = i;
-			count += 1;
-		}
-	}
-
-	if count == 0 {
-		index = -1;
-		return;
-	}
-
-	t := time.now();
-	r := rand.create(transmute(u64)t);
-	i := rand.uint32(&r);
-
-	index = candidates[i % count];
-	msg = channel_recv(channels[index]);
-	return;
-}
-
-select_try_send_msg :: proc(msg: $T, channels: ..$C/Channel(T, $D)) -> (index: int) {
-	index = -1;
-	switch len(channels) {
-	case 0:
-		return;
-	case 1:
-		if channel_try_send(channels[0], msg) {
-			index = 0;
-		}
-		return;
-	}
-
-
-	assert(len(channels) <= MAX_SELECT_CHANNELS);
-	candidates: [MAX_SELECT_CHANNELS]int;
-
-	count := u32(0);
-	for c, i in channels {
-		if raw_channel_can_send(c) {
-			candidates[count] = i;
-			count += 1;
-		}
-	}
-
-	if count == 0 {
-		index = -1;
-		return;
-	}
-
-	t := time.now();
-	r := rand.create(transmute(u64)t);
-	i := rand.uint32(&r);
-
-	index = candidates[i % count];
-	channel_send(channels[index], msg);
-	return;
-}
-
diff --git a/core/sync/sync2/channel_unix.odin b/core/sync/sync2/channel_unix.odin
deleted file mode 100644
index 7429b67db..000000000
--- a/core/sync/sync2/channel_unix.odin
+++ /dev/null
@@ -1,17 +0,0 @@
-//+build linux, darwin, freebsd
-//+private
-package sync2
-
-import "core:time"
-
-raw_channel_wait_queue_wait_on :: proc(state: ^uintptr, timeout: time.Duration) {
-	// stub
-}
-
-raw_channel_wait_queue_signal :: proc(q: ^Raw_Channel_Wait_Queue) {
-	// stub
-}
-
-raw_channel_wait_queue_broadcast :: proc(q: ^Raw_Channel_Wait_Queue) {
-	// stub
-}
diff --git a/core/sync/sync2/channel_windows.odin b/core/sync/sync2/channel_windows.odin
deleted file mode 100644
index e365506c8..000000000
--- a/core/sync/sync2/channel_windows.odin
+++ /dev/null
@@ -1,34 +0,0 @@
-//+build windows
-//+private
-package sync2
-
-import win32 "core:sys/windows"
-import "core:time"
-
-raw_channel_wait_queue_wait_on :: proc(state: ^uintptr, timeout: time.Duration) {
-	ms: win32.DWORD = win32.INFINITE;
-	if max(time.Duration) != SELECT_MAX_TIMEOUT {
-		ms = win32.DWORD((max(time.duration_nanoseconds(timeout), 0) + 999999)/1000000);
-	}
-
-	v := atomic_load(state);
-	for v == 0 {
-		win32.WaitOnAddress(state, &v, size_of(state^), ms);
-		v = atomic_load(state);
-	}
-	atomic_store(state, 0);
-}
-
-raw_channel_wait_queue_signal :: proc(q: ^Raw_Channel_Wait_Queue) {
-	for x := q; x != nil; x = x.next {
-		atomic_add(x.state, 1);
-		win32.WakeByAddressSingle(x.state);
-	}
-}
-
-raw_channel_wait_queue_broadcast :: proc(q: ^Raw_Channel_Wait_Queue) {
-	for x := q; x != nil; x = x.next {
-		atomic_add(x.state, 1);
-		win32.WakeByAddressAll(x.state);
-	}
-}
diff --git a/core/sync/sync2/extended.odin b/core/sync/sync2/extended.odin
index 3f44a172a..06051c822 100644
--- a/core/sync/sync2/extended.odin
+++ b/core/sync/sync2/extended.odin
@@ -122,6 +122,36 @@ barrier_wait :: proc(b: ^Barrier) -> (is_leader: bool) {
 }
 
 
+Auto_Reset_Event :: struct {
+	// status ==  0: Event is reset and no threads are waiting
+	// status ==  1: Event is signaled
+	// status == -N: Event is reset and N threads are waiting
+	status: i32,
+	sema:   Sema,
+}
+
+auto_reset_event_signal :: proc(e: ^Auto_Reset_Event) {
+	old_status := atomic_load_relaxed(&e.status);
+	for {
+		new_status := old_status + 1 if old_status < 1 else 1;
+		if _, ok := atomic_compare_exchange_weak_release(&e.status, old_status, new_status); ok {
+			break;
+		}
+
+		if old_status < 0 {
+			sema_post(&e.sema);
+		}
+	}
+}
+
+auto_reset_event_wait :: proc(e: ^Auto_Reset_Event) {
+	old_status := atomic_sub_acquire(&e.status, 1);
+	if old_status < 1 {
+		sema_wait(&e.sema);
+	}
+}
+
+
 
 Ticket_Mutex :: struct {
 	ticket:  uint,
@@ -130,7 +160,7 @@ Ticket_Mutex :: struct {
 
 ticket_mutex_lock :: #force_inline proc(m: ^Ticket_Mutex) {
 	ticket := atomic_add_relaxed(&m.ticket, 1);
-	for ticket != atomic_load_acq(&m.serving) {
+	for ticket != atomic_load_acquire(&m.serving) {
 		cpu_relax();
 	}
 }
@@ -142,23 +172,23 @@ ticket_mutex_unlock :: #force_inline proc(m: ^Ticket_Mutex) {
 
 
 Benaphore :: struct {
-	counter: int,
+	counter: i32,
 	sema:    Sema,
 }
 
 benaphore_lock :: proc(b: ^Benaphore) {
-	if atomic_add_acq(&b.counter, 1) > 1 {
+	if atomic_add_acquire(&b.counter, 1) > 1 {
 		sema_wait(&b.sema);
 	}
 }
 
 benaphore_try_lock :: proc(b: ^Benaphore) -> bool {
-	v, _ := atomic_cxchg_acq(&b.counter, 1, 0);
+	v, _ := atomic_compare_exchange_strong_acquire(&b.counter, 1, 0);
 	return v == 0;
 }
 
 benaphore_unlock :: proc(b: ^Benaphore) {
-	if atomic_sub_rel(&b.counter, 1) > 0 {
+	if atomic_sub_release(&b.counter, 1) > 0 {
 		sema_post(&b.sema);
 	}
 }
@@ -166,13 +196,13 @@ benaphore_unlock :: proc(b: ^Benaphore) {
 Recursive_Benaphore :: struct {
 	counter:   int,
 	owner:     int,
-	recursion: int,
+	recursion: i32,
 	sema:      Sema,
 }
 
 recursive_benaphore_lock :: proc(b: ^Recursive_Benaphore) {
 	tid := runtime.current_thread_id();
-	if atomic_add_acq(&b.counter, 1) > 1 {
+	if atomic_add_acquire(&b.counter, 1) > 1 {
 		if tid != b.owner {
 			sema_wait(&b.sema);
 		}
@@ -185,10 +215,10 @@ recursive_benaphore_lock :: proc(b: ^Recursive_Benaphore) {
 recursive_benaphore_try_lock :: proc(b: ^Recursive_Benaphore) -> bool {
 	tid := runtime.current_thread_id();
 	if b.owner == tid {
-		atomic_add_acq(&b.counter, 1);
+		atomic_add_acquire(&b.counter, 1);
 	}
 
-	if v, _ := atomic_cxchg_acq(&b.counter, 1, 0); v != 0 {
+	if v, _ := atomic_compare_exchange_strong_acquire(&b.counter, 1, 0); v != 0 {
 		return false;
 	}
 	// inside the lock
@@ -205,7 +235,7 @@ recursive_benaphore_unlock :: proc(b: ^Recursive_Benaphore) {
 	if recursion == 0 {
 		b.owner = 0;
 	}
-	if atomic_sub_rel(&b.counter, 1) > 0 {
+	if atomic_sub_release(&b.counter, 1) > 0 {
 		if recursion == 0 {
 			sema_post(&b.sema);
 		}
@@ -223,7 +253,7 @@ Once :: struct {
 }
 
 once_do :: proc(o: ^Once, fn: proc()) {
-	if atomic_load_acq(&o.done) == false {
+	if atomic_load_acquire(&o.done) == false {
 		_once_do_slow(o, fn);
 	}
 }
@@ -234,6 +264,6 @@ _once_do_slow :: proc(o: ^Once, fn: proc()) {
 	defer mutex_unlock(&o.m);
 	if !o.done {
 		fn();
-		atomic_store_rel(&o.done, true);
+		atomic_store_release(&o.done, true);
 	}
 }
diff --git a/core/sync/sync2/primitives.odin b/core/sync/sync2/primitives.odin
index dd6688a50..1ed83f706 100644
--- a/core/sync/sync2/primitives.odin
+++ b/core/sync/sync2/primitives.odin
@@ -1,7 +1,6 @@
 package sync2
 
 import "core:time"
-import "core:runtime"
 
 // A Mutex is a mutual exclusion lock
 // The zero value for a Mutex is an unlocked mutex
@@ -26,6 +25,18 @@ mutex_try_lock :: proc(m: ^Mutex) -> bool {
 	return _mutex_try_lock(m);
 }
 
+// Example:
+//
+// if mutex_guard(&m) {
+//         ...
+// }
+//
+@(deferred_in=mutex_unlock)
+mutex_guard :: proc(m: ^Mutex) -> bool {
+	mutex_lock(m);
+	return true;
+}
+
 // A RW_Mutex is a reader/writer mutual exclusion lock
 // The lock can be held by any arbitrary number of readers or a single writer
 // The zero value for a RW_Mutex is an unlocked mutex
@@ -66,61 +77,65 @@ rw_mutex_try_shared_lock :: proc(rw: ^RW_Mutex) -> bool {
 	return _rw_mutex_try_shared_lock(rw);
 }
 
+// Example:
+//
+// if rw_mutex_guard(&m) {
+//         ...
+// }
+//
+@(deferred_in=rw_mutex_unlock)
+rw_mutex_guard :: proc(m: ^RW_Mutex) -> bool {
+	rw_mutex_lock(m);
+	return true;
+}
+
+// Example:
+//
+// if rw_mutex_shared_guard(&m) {
+//         ...
+// }
+//
+@(deferred_in=rw_mutex_shared_unlock)
+rw_mutex_shared_guard :: proc(m: ^RW_Mutex) -> bool {
+	rw_mutex_shared_lock(m);
+	return true;
+}
+
+
 
 // A Recusrive_Mutex is a recursive mutual exclusion lock
 // The zero value for a Recursive_Mutex is an unlocked mutex
 //
 // A Recursive_Mutex must not be copied after first use
 Recursive_Mutex :: struct {
-	// TODO(bill): Is this implementation too lazy?
-	// Can this be made to work on all OSes without construction and destruction, i.e. Zero is Initialized
-	// CRITICAL_SECTION would be a perfect candidate for this on Windows but that cannot be "dumb"
-
-	owner:     int,
-	recursion: int,
-	mutex: Mutex,
+	impl: _Recursive_Mutex,
 }
 
 recursive_mutex_lock :: proc(m: ^Recursive_Mutex) {
-	tid := runtime.current_thread_id();
-	if tid != m.owner {
-		mutex_lock(&m.mutex);
-	}
-	// inside the lock
-	m.owner = tid;
-	m.recursion += 1;
+	_recursive_mutex_lock(m);
 }
 
 recursive_mutex_unlock :: proc(m: ^Recursive_Mutex) {
-	tid := runtime.current_thread_id();
-	assert(tid == m.owner);
-	m.recursion -= 1;
-	recursion := m.recursion;
-	if recursion == 0 {
-		m.owner = 0;
-	}
-	if recursion == 0 {
-		mutex_unlock(&m.mutex);
-	}
-	// outside the lock
-
+	_recursive_mutex_unlock(m);
 }
 
 recursive_mutex_try_lock :: proc(m: ^Recursive_Mutex) -> bool {
-	tid := runtime.current_thread_id();
-	if m.owner == tid {
-		return mutex_try_lock(&m.mutex);
-	}
-	if !mutex_try_lock(&m.mutex) {
-		return false;
-	}
-	// inside the lock
-	m.owner = tid;
-	m.recursion += 1;
-	return true;
+	return _recursive_mutex_try_lock(m);
 }
 
 
+// Example:
+//
+// if recursive_mutex_guard(&m) {
+//         ...
+// }
+//
+@(deferred_in=recursive_mutex_unlock)
+recursive_mutex_guard :: proc(m: ^Recursive_Mutex) -> bool {
+	recursive_mutex_lock(m);
+	return true;
+}
+
 
 // Cond implements a condition variable, a rendezvous point for threads
 // waiting for signalling the occurence of an event
@@ -153,33 +168,14 @@ cond_broadcast :: proc(c: ^Cond) {
 //
 // A Sema must not be copied after first use
 Sema :: struct {
-	// TODO(bill): Is this implementation too lazy?
-	// Can this be made to work on all OSes without construction and destruction, i.e. Zero is Initialized
-
-	mutex: Mutex,
-	cond:  Cond,
-	count: int,
+	impl: _Sema,
 }
 
 
 sema_wait :: proc(s: ^Sema) {
-	mutex_lock(&s.mutex);
-	defer mutex_unlock(&s.mutex);
-
-	for s.count == 0 {
-		cond_wait(&s.cond, &s.mutex);
-	}
-
-	s.count -= 1;
-	if s.count > 0 {
-		cond_signal(&s.cond);
-	}
+	_sema_wait(s);
 }
 
 sema_post :: proc(s: ^Sema, count := 1) {
-	mutex_lock(&s.mutex);
-	defer mutex_unlock(&s.mutex);
-
-	s.count += count;
-	cond_signal(&s.cond);
+	_sema_post(s, count);
 }
diff --git a/core/sync/sync2/primitives_atomic.odin b/core/sync/sync2/primitives_atomic.odin
index 610ab7ee0..7043f8c84 100644
--- a/core/sync/sync2/primitives_atomic.odin
+++ b/core/sync/sync2/primitives_atomic.odin
@@ -5,6 +5,7 @@ package sync2
 when !#config(ODIN_SYNC_USE_PTHREADS, true) {
 
 import "core:time"
+import "core:runtime"
 
 _Mutex_State :: enum i32 {
 	Unlocked = 0,
@@ -160,6 +161,54 @@ _rw_mutex_try_shared_lock :: proc(rw: ^RW_Mutex) -> bool {
 }
 
 
+_Recursive_Mutex :: struct {
+	owner:     int,
+	recursion: int,
+	mutex: Mutex,
+}
+
+_recursive_mutex_lock :: proc(m: ^Recursive_Mutex) {
+	tid := runtime.current_thread_id();
+	if tid != m.impl.owner {
+		mutex_lock(&m.impl.mutex);
+	}
+	// inside the lock
+	m.impl.owner = tid;
+	m.impl.recursion += 1;
+}
+
+_recursive_mutex_unlock :: proc(m: ^Recursive_Mutex) {
+	tid := runtime.current_thread_id();
+	assert(tid == m.impl.owner);
+	m.impl.recursion -= 1;
+	recursion := m.impl.recursion;
+	if recursion == 0 {
+		m.impl.owner = 0;
+	}
+	if recursion == 0 {
+		mutex_unlock(&m.impl.mutex);
+	}
+	// outside the lock
+
+}
+
+_recursive_mutex_try_lock :: proc(m: ^Recursive_Mutex) -> bool {
+	tid := runtime.current_thread_id();
+	if m.impl.owner == tid {
+		return mutex_try_lock(&m.impl.mutex);
+	}
+	if !mutex_try_lock(&m.impl.mutex) {
+		return false;
+	}
+	// inside the lock
+	m.impl.owner = tid;
+	m.impl.recursion += 1;
+	return true;
+}
+
+
+
+
 
 Queue_Item :: struct {
 	next: ^Queue_Item,
@@ -240,5 +289,35 @@ _cond_broadcast :: proc(c: ^Cond) {
 	}
 }
 
+_Sema :: struct {
+	mutex: Mutex,
+	cond:  Cond,
+	count: int,
+}
+
+_sema_wait :: proc(s: ^Sema) {
+	mutex_lock(&s.impl.mutex);
+	defer mutex_unlock(&s.impl.mutex);
+
+	for s.impl.count == 0 {
+		cond_wait(&s.impl.cond, &s.impl.mutex);
+	}
+
+	s.impl.count -= 1;
+	if s.impl.count > 0 {
+		cond_signal(&s.impl.cond);
+	}
+}
+
+_sema_post :: proc(s: ^Sema, count := 1) {
+	mutex_lock(&s.impl.mutex);
+	defer mutex_unlock(&s.impl.mutex);
+
+	s.impl.count += count;
+	cond_signal(&s.impl.cond);
+}
+
+
+
 
 } // !ODIN_SYNC_USE_PTHREADS
diff --git a/core/sync/sync2/primitives_pthreads.odin b/core/sync/sync2/primitives_pthreads.odin
index e85cff7fc..5fd43d871 100644
--- a/core/sync/sync2/primitives_pthreads.odin
+++ b/core/sync/sync2/primitives_pthreads.odin
@@ -5,6 +5,7 @@ package sync2
 when #config(ODIN_SYNC_USE_PTHREADS, true) {
 
 import "core:time"
+import "core:runtime"
 import "core:sys/unix"
 
 _Mutex_State :: enum i32 {
@@ -83,7 +84,7 @@ _rw_mutex_shared_lock :: proc(rw: ^RW_Mutex) {
 	state := atomic_load(&rw.impl.state);
 	for state & (RW_Mutex_State_Is_Writing|RW_Mutex_State_Writer_Mask) == 0 {
 		ok: bool;
-		state, ok = atomic_cxchgweak(&rw.impl.state, state, state + RW_Mutex_State_Reader);
+		state, ok = atomic_compare_exchange_weak(&rw.impl.state, state, state + RW_Mutex_State_Reader);
 		if ok {
 			return;
 		}
@@ -106,7 +107,7 @@ _rw_mutex_shared_unlock :: proc(rw: ^RW_Mutex) {
 _rw_mutex_try_shared_lock :: proc(rw: ^RW_Mutex) -> bool {
 	state := atomic_load(&rw.impl.state);
 	if state & (RW_Mutex_State_Is_Writing|RW_Mutex_State_Writer_Mask) == 0 {
-		_, ok := atomic_cxchg(&rw.impl.state, state, state + RW_Mutex_State_Reader);
+		_, ok := atomic_compare_exchange_strong(&rw.impl.state, state, state + RW_Mutex_State_Reader);
 		if ok {
 			return true;
 		}
@@ -120,6 +121,53 @@ _rw_mutex_try_shared_lock :: proc(rw: ^RW_Mutex) -> bool {
 	return false;
 }
 
+
+_Recursive_Mutex :: struct {
+	owner:     int,
+	recursion: int,
+	mutex: Mutex,
+}
+
+_recursive_mutex_lock :: proc(m: ^Recursive_Mutex) {
+	tid := runtime.current_thread_id();
+	if tid != m.impl.owner {
+		mutex_lock(&m.impl.mutex);
+	}
+	// inside the lock
+	m.impl.owner = tid;
+	m.impl.recursion += 1;
+}
+
+_recursive_mutex_unlock :: proc(m: ^Recursive_Mutex) {
+	tid := runtime.current_thread_id();
+	assert(tid == m.impl.owner);
+	m.impl.recursion -= 1;
+	recursion := m.impl.recursion;
+	if recursion == 0 {
+		m.impl.owner = 0;
+	}
+	if recursion == 0 {
+		mutex_unlock(&m.impl.mutex);
+	}
+	// outside the lock
+
+}
+
+_recursive_mutex_try_lock :: proc(m: ^Recursive_Mutex) -> bool {
+	tid := runtime.current_thread_id();
+	if m.impl.owner == tid {
+		return mutex_try_lock(&m.impl.mutex);
+	}
+	if !mutex_try_lock(&m.impl.mutex) {
+		return false;
+	}
+	// inside the lock
+	m.impl.owner = tid;
+	m.impl.recursion += 1;
+	return true;
+}
+
+
 _Cond :: struct {
 	pthread_cond: unix.pthread_cond_t,
 }
@@ -150,5 +198,34 @@ _cond_broadcast :: proc(c: ^Cond) {
 	assert(err == 0);
 }
 
+_Sema :: struct {
+	mutex: Mutex,
+	cond:  Cond,
+	count: int,
+}
+
+_sema_wait :: proc(s: ^Sema) {
+	mutex_lock(&s.impl.mutex);
+	defer mutex_unlock(&s.impl.mutex);
+
+	for s.impl.count == 0 {
+		cond_wait(&s.impl.cond, &s.impl.mutex);
+	}
+
+	s.impl.count -= 1;
+	if s.impl.count > 0 {
+		cond_signal(&s.impl.cond);
+	}
+}
+
+_sema_post :: proc(s: ^Sema, count := 1) {
+	mutex_lock(&s.impl.mutex);
+	defer mutex_unlock(&s.impl.mutex);
+
+	s.impl.count += count;
+	cond_signal(&s.impl.cond);
+}
+
+
 
 } // ODIN_SYNC_USE_PTHREADS
diff --git a/core/sync/sync2/primitives_windows.odin b/core/sync/sync2/primitives_windows.odin
index 02b6cd733..219af0162 100644
--- a/core/sync/sync2/primitives_windows.odin
+++ b/core/sync/sync2/primitives_windows.odin
@@ -50,6 +50,56 @@ _rw_mutex_try_shared_lock :: proc(rw: ^RW_Mutex) -> bool {
 }
 
 
+_Recursive_Mutex :: struct {
+	owner:       u32,
+	claim_count: i32,
+}
+
+_recursive_mutex_lock :: proc(m: ^Recursive_Mutex) {
+	tid := win32.GetCurrentThreadId();
+	for {
+		prev_owner := atomic_compare_exchange_strong_acquire(&m.impl.owner, tid, 0);
+		switch prev_owner {
+		case 0, tid:
+			m.impl.claim_count += 1;
+			// inside the lock
+			return;
+		}
+
+		win32.WaitOnAddress(
+			&m.impl.owner,
+			&prev_owner,
+			size_of(prev_owner),
+			win32.INFINITE,
+		);
+	}
+}
+
+_recursive_mutex_unlock :: proc(m: ^Recursive_Mutex) {
+	m.impl.claim_count -= 1;
+	if m.impl.claim_count != 0 {
+		return;
+	}
+	atomic_exchange_release(&m.impl.owner, 0);
+	win32.WakeByAddressSingle(&m.impl.owner);
+	// outside the lock
+
+}
+
+_recursive_mutex_try_lock :: proc(m: ^Recursive_Mutex) -> bool {
+	tid := win32.GetCurrentThreadId();
+	prev_owner := atomic_compare_exchange_strong_acquire(&m.impl.owner, tid, 0);
+	switch prev_owner {
+	case 0, tid:
+		m.impl.claim_count += 1;
+		// inside the lock
+		return true;
+	}
+	return false;
+}
+
+
+
 
 _Cond :: struct {
 	cond: win32.CONDITION_VARIABLE,
@@ -71,3 +121,35 @@ _cond_signal :: proc(c: ^Cond) {
 _cond_broadcast :: proc(c: ^Cond) {
 	win32.WakeAllConditionVariable(&c.impl.cond);
 }
+
+
+_Sema :: struct {
+	count: i32,
+}
+
+_sema_wait :: proc(s: ^Sema) {
+	for {
+		original_count := s.impl.count;
+		for original_count == 0 {
+			win32.WaitOnAddress(
+				&s.impl.count,
+				&original_count,
+				size_of(original_count),
+				win32.INFINITE,
+			);
+			original_count = s.impl.count;
+		}
+		if original_count == atomic_compare_exchange_strong(&s.impl.count, original_count-1, original_count) {
+			return;
+		}
+	}
+}
+
+_sema_post :: proc(s: ^Sema, count := 1) {
+	atomic_add(&s.impl.count, i32(count));
+	if count == 1 {
+		win32.WakeByAddressSingle(&s.impl.count);
+	} else {
+		win32.WakeByAddressAll(&s.impl.count);
+	}
+}
diff --git a/core/testing/runner.odin b/core/testing/runner.odin
index efeaa04f6..e3286988c 100644
--- a/core/testing/runner.odin
+++ b/core/testing/runner.odin
@@ -3,7 +3,6 @@ package testing
 
 import "core:io"
 import "core:os"
-import "core:strings"
 import "core:slice"
 
 reset_t :: proc(t: ^T) {
@@ -55,12 +54,9 @@ runner :: proc(internal_tests: []Internal_Test) -> bool {
 
 		logf(t, "[Test: %s]", it.name);
 
-		// TODO(bill): Catch panics
-		{
-			it.p(t);
-		}
+		run_internal_test(t, it);
 
-		if t.error_count != 0 {
+		if failed(t) {
 			logf(t, "[%s : FAILURE]", it.name);
 		} else {
 			logf(t, "[%s : SUCCESS]", it.name);
diff --git a/core/testing/runner_other.odin b/core/testing/runner_other.odin
new file mode 100644
index 000000000..0bd95e10a
--- /dev/null
+++ b/core/testing/runner_other.odin
@@ -0,0 +1,8 @@
+//+private
+//+build !windows
+package testing
+
+run_internal_test :: proc(t: ^T, it: Internal_Test) {
+	// TODO(bill): Catch panics on other platforms
+	it.p(t);
+}
diff --git a/core/testing/runner_windows.odin b/core/testing/runner_windows.odin
new file mode 100644
index 000000000..d8633f703
--- /dev/null
+++ b/core/testing/runner_windows.odin
@@ -0,0 +1,191 @@
+//+private
+//+build windows
+package testing
+
+import win32 "core:sys/windows"
+import "core:runtime"
+import "intrinsics"
+
+
+Sema :: struct {
+	count: i32,
+}
+
+sema_reset :: proc "contextless" (s: ^Sema) {
+	intrinsics.atomic_store(&s.count, 0);
+}
+sema_wait :: proc "contextless" (s: ^Sema) {
+	for {
+		original_count := s.count;
+		for original_count == 0 {
+			win32.WaitOnAddress(
+				&s.count,
+				&original_count,
+				size_of(original_count),
+				win32.INFINITE,
+			);
+			original_count = s.count;
+		}
+		if original_count == intrinsics.atomic_cxchg(&s.count, original_count-1, original_count) {
+			return;
+		}
+	}
+}
+
+sema_post :: proc "contextless" (s: ^Sema, count := 1) {
+	intrinsics.atomic_add(&s.count, i32(count));
+	if count == 1 {
+		win32.WakeByAddressSingle(&s.count);
+	} else {
+		win32.WakeByAddressAll(&s.count);
+	}
+}
+
+
+Thread_Proc :: #type proc(^Thread);
+
+MAX_USER_ARGUMENTS :: 8;
+
+Thread :: struct {
+	using specific: Thread_Os_Specific,
+	procedure:      Thread_Proc,
+
+	t:       ^T,
+	it:      Internal_Test,
+	success: bool,
+
+	init_context: Maybe(runtime.Context),
+
+	creation_allocator: runtime.Allocator,
+}
+
+Thread_Os_Specific :: struct {
+	win32_thread:    win32.HANDLE,
+	win32_thread_id: win32.DWORD,
+	done: bool, // see note in `is_done`
+}
+
+thread_create :: proc(procedure: Thread_Proc) -> ^Thread {
+	__windows_thread_entry_proc :: proc "stdcall" (t_: rawptr) -> win32.DWORD {
+		t := (^Thread)(t_);
+		context = runtime.default_context();
+		c := context;
+		if ic, ok := t.init_context.?; ok {
+			c = ic;
+		}
+		context = c;
+
+		t.procedure(t);
+
+		if t.init_context == nil {
+			if context.temp_allocator.data == &runtime.global_default_temp_allocator_data {
+				runtime.default_temp_allocator_destroy(auto_cast context.temp_allocator.data);
+			}
+		}
+
+		intrinsics.atomic_store(&t.done, true);
+		return 0;
+	}
+
+
+	thread := new(Thread);
+	if thread == nil {
+		return nil;
+	}
+	thread.creation_allocator = context.allocator;
+
+	win32_thread_id: win32.DWORD;
+	win32_thread := win32.CreateThread(nil, 0, __windows_thread_entry_proc, thread, win32.CREATE_SUSPENDED, &win32_thread_id);
+	if win32_thread == nil {
+		free(thread, thread.creation_allocator);
+		return nil;
+	}
+	thread.procedure       = procedure;
+	thread.win32_thread    = win32_thread;
+	thread.win32_thread_id = win32_thread_id;
+	thread.init_context = context;
+
+	return thread;
+}
+
+thread_start :: proc "contextless" (thread: ^Thread) {
+	win32.ResumeThread(thread.win32_thread);
+}
+
+thread_join_and_destroy :: proc(thread: ^Thread) {
+	if thread.win32_thread != win32.INVALID_HANDLE {
+		win32.WaitForSingleObject(thread.win32_thread, win32.INFINITE);
+		win32.CloseHandle(thread.win32_thread);
+		thread.win32_thread = win32.INVALID_HANDLE;
+	}
+	free(thread, thread.creation_allocator);
+}
+
+thread_terminate :: proc "contextless" (thread: ^Thread, exit_code: int) {
+	win32.TerminateThread(thread.win32_thread, u32(exit_code));
+}
+
+
+
+
+global_threaded_runner_semaphore: Sema;
+global_exception_handler: rawptr;
+global_current_thread: ^Thread;
+global_current_t: ^T;
+
+run_internal_test :: proc(t: ^T, it: Internal_Test) {
+	thread := thread_create(proc(thread: ^Thread) {
+		exception_handler_proc :: proc "stdcall" (ExceptionInfo: ^win32.EXCEPTION_POINTERS) -> win32.LONG {
+			switch ExceptionInfo.ExceptionRecord.ExceptionCode {
+			case
+				win32.EXCEPTION_DATATYPE_MISALIGNMENT,
+				win32.EXCEPTION_BREAKPOINT,
+				win32.EXCEPTION_ACCESS_VIOLATION,
+				win32.EXCEPTION_ILLEGAL_INSTRUCTION,
+				win32.EXCEPTION_ARRAY_BOUNDS_EXCEEDED,
+				win32.EXCEPTION_STACK_OVERFLOW:
+
+				sema_post(&global_threaded_runner_semaphore);
+				return win32.EXCEPTION_EXECUTE_HANDLER;
+			}
+
+			return win32.EXCEPTION_CONTINUE_SEARCH;
+		}
+		global_exception_handler = win32.AddVectoredExceptionHandler(0, exception_handler_proc);
+
+		context.assertion_failure_proc = proc(prefix, message: string, loc: runtime.Source_Code_Location) {
+			errorf(t=global_current_t, format="%s %s", args={prefix, message}, loc=loc);
+			intrinsics.trap();
+		};
+
+		thread.it.p(thread.t);
+
+		thread.success = true;
+		sema_post(&global_threaded_runner_semaphore);
+	});
+
+	sema_reset(&global_threaded_runner_semaphore);
+	global_current_t = t;
+
+	t._fail_now = proc() -> ! {
+		intrinsics.trap();
+	};
+
+	thread.t = t;
+	thread.it = it;
+	thread.success = false;
+
+	thread_start(thread);
+
+	sema_wait(&global_threaded_runner_semaphore);
+	thread_terminate(thread, int(!thread.success));
+	thread_join_and_destroy(thread);
+
+	win32.RemoveVectoredExceptionHandler(global_exception_handler);
+
+	if !thread.success && t.error_count == 0 {
+		t.error_count += 1;
+	}
+
+	return;
+}
diff --git a/core/testing/testing.odin b/core/testing/testing.odin
index a431d8575..ec47ca4d4 100644
--- a/core/testing/testing.odin
+++ b/core/testing/testing.odin
@@ -25,16 +25,21 @@ T :: struct {
 	w: io.Writer,
 
 	cleanups: [dynamic]Internal_Cleanup,
+
+	_fail_now: proc() -> !,
 }
 
 
 error :: proc(t: ^T, args: ..any, loc := #caller_location) {
-	log(t=t, args=args, loc=loc);
+	fmt.wprintf(t.w, "%v: ", loc);
+	fmt.wprintln(t.w, ..args);
 	t.error_count += 1;
 }
 
 errorf :: proc(t: ^T, format: string, args: ..any, loc := #caller_location) {
-	logf(t=t, format=format, args=args, loc=loc);
+	fmt.wprintf(t.w, "%v: ", loc);
+	fmt.wprintf(t.w, format, ..args);
+	fmt.wprintln(t.w);
 	t.error_count += 1;
 }
 
@@ -43,6 +48,13 @@ fail :: proc(t: ^T) {
 	t.error_count += 1;
 }
 
+fail_now :: proc(t: ^T) {
+	fail(t);
+	if t._fail_now != nil {
+		t._fail_now();
+	}
+}
+
 failed :: proc(t: ^T) -> bool {
 	return t.error_count != 0;
 }
diff --git a/core/time/time.odin b/core/time/time.odin
index 00d7e529a..c75549b17 100644
--- a/core/time/time.odin
+++ b/core/time/time.odin
@@ -262,19 +262,18 @@ datetime_to_time :: proc(year, month, day, hour, minute, second: int, nsec := in
 		return;
 	}
 
+	ok = true;
+
 	_y := year  - 1970;
 	_m := month - 1;
 	_d := day   - 1;
 
-	if _m < 0 || _m > 11 {
+	if month < 1 || month > 12 {
 		_m %= 12; ok = false;
 	}
-	if _d < 0 || _m > 30 {
+	if day   < 1 || day   > 31 {
 		_d %= 31; ok = false;
 	}
-	if _m < 0 || _m > 11 {
-		_m %= 12; ok = false;
-	}
 
 	s := i64(0);
 	div, mod := divmod(_y, 400);
diff --git a/core/unicode/tables.odin b/core/unicode/tables.odin
index bb858fd04..ff4793402 100644
--- a/core/unicode/tables.odin
+++ b/core/unicode/tables.odin
@@ -12,7 +12,6 @@ package unicode
 @(private) pLo    :: pLl | pLu; // a letter that is neither upper nor lower case.
 @(private) pLmask :: pLo;
 
-@(static)
 char_properties := [MAX_LATIN1+1]u8{
 	0x00 = pC,       // '\x00'
 	0x01 = pC,       // '\x01'
@@ -273,7 +272,6 @@ char_properties := [MAX_LATIN1+1]u8{
 };
 
 
-@(static)
 alpha_ranges := [?]i32{
 	0x00d8,  0x00f6,
 	0x00f8,  0x01f5,
@@ -429,7 +427,6 @@ alpha_ranges := [?]i32{
 	0xffda,  0xffdc,
 };
 
-@(static)
 alpha_singlets := [?]i32{
 	0x00aa,
 	0x00b5,
@@ -465,7 +462,6 @@ alpha_singlets := [?]i32{
 	0xfe74,
 };
 
-@(static)
 space_ranges := [?]i32{
 	0x0009,  0x000d, // tab and newline
 	0x0020,  0x0020, // space
@@ -481,7 +477,6 @@ space_ranges := [?]i32{
 	0xfeff,  0xfeff,
 };
 
-@(static)
 unicode_spaces := [?]i32{
 	0x0009, // tab
 	0x000a, // LF
@@ -499,7 +494,6 @@ unicode_spaces := [?]i32{
 	0xfeff, // unknown
 };
 
-@(static)
 to_upper_ranges := [?]i32{
 	0x0061,  0x007a, 468, // a-z A-Z
 	0x00e0,  0x00f6, 468,
@@ -538,7 +532,6 @@ to_upper_ranges := [?]i32{
 	0xff41,  0xff5a, 468,
 };
 
-@(static)
 to_upper_singlets := [?]i32{
 	0x00ff, 621,
 	0x0101, 499,
@@ -882,7 +875,6 @@ to_upper_singlets := [?]i32{
 	0x1ff3, 509,
 };
 
-@(static)
 to_lower_ranges := [?]i32{
 	0x0041,  0x005a, 532, // A-Z a-z
 	0x00c0,  0x00d6, 532, // - -
@@ -922,7 +914,6 @@ to_lower_ranges := [?]i32{
 	0xff21,  0xff3a, 532, // - -
 };
 
-@(static)
 to_lower_singlets := [?]i32{
 	0x0100, 501,
 	0x0102, 501,
@@ -1259,7 +1250,6 @@ to_lower_singlets := [?]i32{
 	0x1ffc, 491,
 };
 
-@(static)
 to_title_singlets := [?]i32{
 	0x01c4, 501,
 	0x01c6, 499,