aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/compress/shoco/model.odin148
-rw-r--r--core/compress/shoco/shoco.odin318
-rw-r--r--examples/all/all_main.odin2
-rw-r--r--tests/core/assets/Shoco/LICENSE26
-rw-r--r--tests/core/assets/Shoco/LICENSE.shocobin0 -> 1269 bytes
-rw-r--r--tests/core/assets/Shoco/README.md95
-rw-r--r--tests/core/assets/Shoco/README.md.shocobin0 -> 2227 bytes
-rw-r--r--tests/core/compress/test_core_compress.odin57
8 files changed, 645 insertions, 1 deletions
diff --git a/core/compress/shoco/model.odin b/core/compress/shoco/model.odin
new file mode 100644
index 000000000..49e3dd97e
--- /dev/null
+++ b/core/compress/shoco/model.odin
@@ -0,0 +1,148 @@
+/*
+ This file was generated, so don't edit this by hand.
+ Transliterated from https://github.com/Ed-von-Schleck/shoco/blob/master/shoco_model.h,
+ which is an English word model.
+*/
+
+// package shoco is an implementation of the shoco short string compressor
+package shoco
+
+DEFAULT_MODEL :: Shoco_Model {
+ min_char = 39,
+ max_char = 122,
+ characters_by_id = {
+ 'e', 'a', 'i', 'o', 't', 'h', 'n', 'r', 's', 'l', 'u', 'c', 'w', 'm', 'd', 'b', 'p', 'f', 'g', 'v', 'y', 'k', '-', 'H', 'M', 'T', '\'', 'B', 'x', 'I', 'W', 'L',
+ },
+ ids_by_character = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 27, -1, -1, -1, -1, -1, 23, 29, -1, -1, 31, 24, -1, -1, -1, -1, -1, -1, 25, -1, -1, 30, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 15, 11, 14, 0, 17, 18, 5, 2, -1, 21, 9, 13, 6, 3, 16, -1, 7, 8, 4, 10, 19, 12, 28, 20, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ },
+ successors_by_bigram = {
+ {7, 4, 12, -1, 6, -1, 1, 0, 3, 5, -1, 9, -1, 8, 2, -1, 15, 14, -1, 10, 11, -1, -1, -1, -1, -1, -1, -1, 13, -1, -1, -1},
+ {-1, -1, 6, -1, 1, -1, 0, 3, 2, 4, 15, 11, -1, 9, 5, 10, 13, -1, 12, 8, 7, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {9, 11, -1, 4, 2, -1, 0, 8, 1, 5, -1, 6, -1, 3, 7, 15, -1, 12, 10, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, 14, 7, 5, -1, 1, 2, 8, 9, 0, 15, 6, 4, 11, -1, 12, 3, -1, 10, -1, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {2, 4, 3, 1, 5, 0, -1, 6, 10, 9, 7, 12, 11, -1, -1, -1, -1, 13, -1, -1, 8, -1, 15, -1, -1, -1, 14, -1, -1, -1, -1, -1},
+ {0, 1, 2, 3, 4, -1, -1, 5, 9, 10, 6, -1, -1, 8, 15, 11, -1, 14, -1, -1, 7, -1, 13, -1, -1, -1, 12, -1, -1, -1, -1, -1},
+ {2, 8, 7, 4, 3, -1, 9, -1, 6, 11, -1, 5, -1, -1, 0, -1, -1, 14, 1, 15, 10, 12, -1, -1, -1, -1, 13, -1, -1, -1, -1, -1},
+ {0, 3, 1, 2, 6, -1, 9, 8, 4, 12, 13, 10, -1, 11, 7, -1, -1, 15, 14, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {0, 6, 3, 4, 1, 2, -1, -1, 5, 10, 7, 9, 11, 12, -1, -1, 8, 14, -1, -1, 15, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {0, 6, 2, 5, 9, -1, -1, -1, 10, 1, 8, -1, 12, 14, 4, -1, 15, 7, -1, 13, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {8, 10, 9, 15, 1, -1, 4, 0, 3, 2, -1, 6, -1, 12, 11, 13, 7, 14, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {1, 3, 6, 0, 4, 2, -1, 7, 13, 8, 9, 11, -1, -1, 15, -1, -1, -1, -1, -1, 10, 5, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {3, 0, 1, 4, -1, 2, 5, 6, 7, 8, -1, 14, -1, -1, 9, 15, -1, 12, -1, -1, -1, 10, 11, -1, -1, -1, 13, -1, -1, -1, -1, -1},
+ {0, 1, 3, 2, 15, -1, 12, -1, 7, 14, 4, -1, -1, 9, -1, 8, 5, 10, -1, -1, 6, -1, 13, -1, -1, -1, 11, -1, -1, -1, -1, -1},
+ {0, 3, 1, 2, -1, -1, 12, 6, 4, 9, 7, -1, -1, 14, 8, -1, -1, 15, 11, 13, 5, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {0, 5, 7, 2, 10, 13, -1, 6, 8, 1, 3, -1, -1, 14, 15, 11, -1, -1, -1, 12, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {0, 2, 6, 3, 7, 10, -1, 1, 9, 4, 8, -1, -1, 15, -1, 12, 5, -1, -1, -1, 11, -1, 13, -1, -1, -1, 14, -1, -1, -1, -1, -1},
+ {1, 3, 4, 0, 7, -1, 12, 2, 11, 8, 6, 13, -1, -1, -1, -1, -1, 5, -1, -1, 10, 15, 9, -1, -1, -1, 14, -1, -1, -1, -1, -1},
+ {1, 3, 5, 2, 13, 0, 9, 4, 7, 6, 8, -1, -1, 15, -1, 11, -1, -1, 10, -1, 14, -1, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {0, 2, 1, 3, -1, -1, -1, 6, -1, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {1, 11, 4, 0, 3, -1, 13, 12, 2, 7, -1, -1, 15, 10, 5, 8, 14, -1, -1, -1, -1, -1, 9, -1, -1, -1, 6, -1, -1, -1, -1, -1},
+ {0, 9, 2, 14, 15, 4, 1, 13, 3, 5, -1, -1, 10, -1, -1, -1, -1, 6, 12, -1, 7, -1, 8, -1, -1, -1, 11, -1, -1, -1, -1, -1},
+ {-1, 2, 14, -1, 1, 5, 8, 7, 4, 12, -1, 6, 9, 11, 13, 3, 10, 15, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {0, 1, 3, 2, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {4, 3, 1, 5, -1, -1, -1, 0, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {2, 8, 4, 1, -1, 0, -1, 6, -1, -1, 5, -1, 7, -1, -1, -1, -1, -1, -1, -1, 10, -1, -1, 9, -1, -1, -1, -1, -1, -1, -1, -1},
+ {12, 5, -1, -1, 1, -1, -1, 7, 0, 3, -1, 2, -1, 4, 6, -1, -1, -1, -1, 8, -1, -1, 15, -1, 13, 9, -1, -1, -1, -1, -1, 11},
+ {1, 3, 2, 4, -1, -1, -1, 5, -1, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, 8, -1, -1},
+ {5, 3, 4, 12, 1, 6, -1, -1, -1, -1, 8, 2, -1, -1, -1, -1, 0, 9, -1, -1, 11, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, 0, -1, 1, 12, 3, -1, -1, -1, -1, 5, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1, 6, -1, 10},
+ {2, 3, 1, 4, -1, 0, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1},
+ {5, 1, 3, 0, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, 9, -1, -1, 6, -1, 7},
+ },
+ successors_reversed = {
+ {'s', 't', 'c', 'l', 'm', 'a', 'd', 'r', 'v', 'T', 'A', 'L', 'e', 'M', 'Y', '-'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'-', 't', 'a', 'b', 's', 'h', 'c', 'r', 'n', 'w', 'p', 'm', 'l', 'd', 'i', 'f'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'u', 'e', 'i', 'a', 'o', 'r', 'y', 'l', 'I', 'E', 'R', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'e', 'a', 'o', 'i', 'u', 'A', 'y', 'E', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'t', 'n', 'f', 's', '\'', 'm', 'I', 'N', 'A', 'E', 'L', 'Z', 'r', 'V', 'R', 'C'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'o', 'a', 'y', 'i', 'u', 'e', 'I', 'L', 'D', '\'', 'E', 'Y', '\x00', '\x00', '\x00', '\x00'},
+ {'r', 'i', 'y', 'a', 'e', 'o', 'u', 'Y', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'h', 'o', 'e', 'E', 'i', 'u', 'r', 'w', 'a', 'H', 'y', 'R', 'Z', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'h', 'i', 'e', 'a', 'o', 'r', 'I', 'y', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'n', 't', 's', 'r', 'l', 'd', 'i', 'y', 'v', 'm', 'b', 'c', 'g', 'p', 'k', 'u'},
+ {'e', 'l', 'o', 'u', 'y', 'a', 'r', 'i', 's', 'j', 't', 'b', 'v', 'h', 'm', 'd'},
+ {'o', 'e', 'h', 'a', 't', 'k', 'i', 'r', 'l', 'u', 'y', 'c', 'q', 's', '-', 'd'},
+ {'e', 'i', 'o', 'a', 's', 'y', 'r', 'u', 'd', 'l', '-', 'g', 'n', 'v', 'm', 'f'},
+ {'r', 'n', 'd', 's', 'a', 'l', 't', 'e', 'm', 'c', 'v', 'y', 'i', 'x', 'f', 'p'},
+ {'o', 'e', 'r', 'a', 'i', 'f', 'u', 't', 'l', '-', 'y', 's', 'n', 'c', '\'', 'k'},
+ {'h', 'e', 'o', 'a', 'r', 'i', 'l', 's', 'u', 'n', 'g', 'b', '-', 't', 'y', 'm'},
+ {'e', 'a', 'i', 'o', 't', 'r', 'u', 'y', 'm', 's', 'l', 'b', '\'', '-', 'f', 'd'},
+ {'n', 's', 't', 'm', 'o', 'l', 'c', 'd', 'r', 'e', 'g', 'a', 'f', 'v', 'z', 'b'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'e', 'n', 'i', 's', 'h', 'l', 'f', 'y', '-', 'a', 'w', '\'', 'g', 'r', 'o', 't'},
+ {'e', 'l', 'i', 'y', 'd', 'o', 'a', 'f', 'u', 't', 's', 'k', 'w', 'v', 'm', 'p'},
+ {'e', 'a', 'o', 'i', 'u', 'p', 'y', 's', 'b', 'm', 'f', '\'', 'n', '-', 'l', 't'},
+ {'d', 'g', 'e', 't', 'o', 'c', 's', 'i', 'a', 'n', 'y', 'l', 'k', '\'', 'f', 'v'},
+ {'u', 'n', 'r', 'f', 'm', 't', 'w', 'o', 's', 'l', 'v', 'd', 'p', 'k', 'i', 'c'},
+ {'e', 'r', 'a', 'o', 'l', 'p', 'i', 't', 'u', 's', 'h', 'y', 'b', '-', '\'', 'm'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'e', 'i', 'o', 'a', 's', 'y', 't', 'd', 'r', 'n', 'c', 'm', 'l', 'u', 'g', 'f'},
+ {'e', 't', 'h', 'i', 'o', 's', 'a', 'u', 'p', 'c', 'l', 'w', 'm', 'k', 'f', 'y'},
+ {'h', 'o', 'e', 'i', 'a', 't', 'r', 'u', 'y', 'l', 's', 'w', 'c', 'f', '\'', '-'},
+ {'r', 't', 'l', 's', 'n', 'g', 'c', 'p', 'e', 'i', 'a', 'd', 'm', 'b', 'f', 'o'},
+ {'e', 'i', 'a', 'o', 'y', 'u', 'r', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'a', 'i', 'h', 'e', 'o', 'n', 'r', 's', 'l', 'd', 'k', '-', 'f', '\'', 'c', 'b'},
+ {'p', 't', 'c', 'a', 'i', 'e', 'h', 'q', 'u', 'f', '-', 'y', 'o', '\x00', '\x00', '\x00'},
+ {'o', 'e', 's', 't', 'i', 'd', '\'', 'l', 'b', '-', 'm', 'a', 'r', 'n', 'p', 'w'},
+ },
+
+ character_count = 32,
+ successor_count = 16,
+
+ max_successor_n = 7,
+ packs = {
+ { 0x80000000, 1, 2, { 26, 24, 24, 24, 24, 24, 24, 24 }, { 15, 3, 0, 0, 0, 0, 0, 0 }, 0xc0, 0x80 },
+ { 0xc0000000, 2, 4, { 25, 22, 19, 16, 16, 16, 16, 16 }, { 15, 7, 7, 7, 0, 0, 0, 0 }, 0xe0, 0xc0 },
+ { 0xe0000000, 4, 8, { 23, 19, 15, 11, 8, 5, 2, 0 }, { 31, 15, 15, 15, 7, 7, 7, 3 }, 0xf0, 0xe0 },
+ },
+} \ No newline at end of file
diff --git a/core/compress/shoco/shoco.odin b/core/compress/shoco/shoco.odin
new file mode 100644
index 000000000..9c5008f5d
--- /dev/null
+++ b/core/compress/shoco/shoco.odin
@@ -0,0 +1,318 @@
+/*
+ Copyright 2022 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-3 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+
+ An implementation of [shoco](https://github.com/Ed-von-Schleck/shoco) by Christian Schramm.
+*/
+
+// package shoco is an implementation of the shoco short string compressor
+package shoco
+
+import "core:intrinsics"
+import "core:compress"
+
+Shoco_Pack :: struct {
+ word: u32,
+ bytes_packed: i8,
+ bytes_unpacked: i8,
+ offsets: [8]u16,
+ masks: [8]i16,
+ header_mask: u8,
+ header: u8,
+}
+
+Shoco_Model :: struct {
+ min_char: u8,
+ max_char: u8,
+ characters_by_id: []u8,
+ ids_by_character: [256]i16,
+ successors_by_bigram: [][]i8,
+ successors_reversed: [][]u8,
+
+ character_count: u8,
+ successor_count: u8,
+ max_successor_n: i8,
+ packs: []Shoco_Pack,
+}
+
+compress_bound :: proc(uncompressed_size: int) -> (worst_case_compressed_size: int) {
+ // Worst case compression happens when input is non-ASCII (128-255)
+ // Encoded as 0x00 + the byte in question.
+ return uncompressed_size * 2
+}
+
+decompress_bound :: proc(compressed_size: int, model := DEFAULT_MODEL) -> (maximum_decompressed_size: int) {
+ // Best case compression is 2:1
+ most: f64
+ for pack in model.packs {
+ val := f64(compressed_size) / f64(pack.bytes_packed) * f64(pack.bytes_unpacked)
+ most = max(most, val)
+ }
+ return int(most)
+}
+
+find_best_encoding :: proc(indices: []i16, n_consecutive: i8, model := DEFAULT_MODEL) -> (res: int) {
+ for p := len(model.packs); p > 0; p -= 1 {
+ pack := model.packs[p - 1]
+ if n_consecutive >= pack.bytes_unpacked {
+ have_index := true
+ for i := 0; i < int(pack.bytes_unpacked); i += 1 {
+ if indices[i] > pack.masks[i] {
+ have_index = false
+ break
+ }
+ }
+ if have_index {
+ return p - 1
+ }
+ }
+ }
+ return -1
+}
+
+validate_model :: proc(model: Shoco_Model) -> (int, compress.Error) {
+ if len(model.successors_reversed) != int(model.max_char - model.min_char) {
+ return 0, .Unknown_Compression_Method
+ }
+
+ if len(model.characters_by_id) != int(model.character_count) {
+ return 0, .Unknown_Compression_Method
+ }
+
+ if len(model.successors_by_bigram) != int(model.character_count) || len(model.successors_by_bigram[0]) != int(model.character_count) {
+ return 0, .Unknown_Compression_Method
+ }
+
+ if len(model.successors_reversed[0]) != int(model.successor_count) {
+ return 0, .Unknown_Compression_Method
+ }
+
+ // Model seems legit.
+ return 0, nil
+}
+
+// Decompresses into provided buffer.
+decompress_slice_to_output_buffer :: proc(input: []u8, output: []u8, model := DEFAULT_MODEL) -> (size: int, err: compress.Error) {
+ inp, inp_end := 0, len(input)
+ out, out_end := 0, len(output)
+
+ validate_model(model) or_return
+
+ for inp < inp_end {
+ val := transmute(i8)input[inp]
+ mark := int(-1)
+
+ for val < 0 {
+ val <<= 1
+ mark += 1
+ }
+
+ if mark > len(model.packs) {
+ return out, .Unknown_Compression_Method
+ }
+
+ if mark < 0 {
+ if out >= out_end {
+ return out, .Output_Too_Short
+ }
+
+ // Ignore the sentinel value for non-ASCII chars
+ if input[inp] == 0x00 {
+ inp += 1
+ if inp >= inp_end {
+ return out, .Stream_Too_Short
+ }
+ }
+ output[out] = input[inp]
+ inp, out = inp + 1, out + 1
+
+ } else {
+ pack := model.packs[mark]
+
+ if out + int(pack.bytes_unpacked) > out_end {
+ return out, .Output_Too_Short
+ } else if inp + int(pack.bytes_packed) > inp_end {
+ return out, .Stream_Too_Short
+ }
+
+ code := intrinsics.unaligned_load((^u32)(&input[inp]))
+ when ODIN_ENDIAN == .Little {
+ code = intrinsics.byte_swap(code)
+ }
+
+ // Unpack the leading char
+ offset := pack.offsets[0]
+ mask := pack.masks[0]
+
+ last_chr := model.characters_by_id[(code >> offset) & u32(mask)]
+ output[out] = last_chr
+
+ // Unpack the successor chars
+ for i := 1; i < int(pack.bytes_unpacked); i += 1 {
+ offset = pack.offsets[i]
+ mask = pack.masks[i]
+
+ last_chr = model.successors_reversed[last_chr - model.min_char][(code >> offset) & u32(mask)]
+ output[out + i] = last_chr
+ }
+
+ out += int(pack.bytes_unpacked)
+ inp += int(pack.bytes_packed)
+ }
+ }
+
+ return out, nil
+}
+
+decompress_slice_to_string :: proc(input: []u8, model := DEFAULT_MODEL, allocator := context.allocator) -> (res: string, err: compress.Error) {
+ context.allocator = allocator
+
+ if len(input) == 0 {
+ return "", .Stream_Too_Short
+ }
+
+ max_output_size := decompress_bound(len(input), model)
+
+ buf: [dynamic]u8
+ if !resize(&buf, max_output_size) {
+ return "", .Out_Of_Memory
+ }
+
+ length, result := decompress_slice_to_output_buffer(input, buf[:])
+ resize(&buf, length)
+ return string(buf[:]), result
+}
+decompress :: proc{decompress_slice_to_output_buffer, decompress_slice_to_string}
+
+compress_string_to_buffer :: proc(input: string, output: []u8, model := DEFAULT_MODEL, allocator := context.allocator) -> (size: int, err: compress.Error) {
+ inp, inp_end := 0, len(input)
+ out, out_end := 0, len(output)
+ output := output
+
+ validate_model(model) or_return
+
+ indices := make([]i16, model.max_successor_n + 1)
+ defer delete(indices)
+
+ last_resort := false
+
+ encode: for inp < inp_end {
+ if last_resort {
+ last_resort = false
+
+ if input[inp] & 0x80 == 0x80 {
+ // Non-ASCII case
+ if out + 2 > out_end {
+ return out, .Output_Too_Short
+ }
+
+ // Put in a sentinel byte
+ output[out] = 0x00
+ out += 1
+ } else {
+ // An ASCII byte
+ if out + 1 > out_end {
+ return out, .Output_Too_Short
+ }
+ }
+ output[out] = input[inp]
+ out, inp = out + 1, inp + 1
+ } else {
+ // Find the longest string of known successors
+ indices[0] = model.ids_by_character[input[inp]]
+ last_chr_index := indices[0]
+
+ if last_chr_index < 0 {
+ last_resort = true
+ continue encode
+ }
+
+ rest := inp_end - inp
+ n_consecutive: i8 = 1
+ for ; n_consecutive <= model.max_successor_n; n_consecutive += 1 {
+ if inp_end > 0 && int(n_consecutive) == rest {
+ break
+ }
+
+ current_index := model.ids_by_character[input[inp + int(n_consecutive)]]
+ if current_index < 0 { // '\0' is always -1
+ break
+ }
+
+ successor_index := model.successors_by_bigram[last_chr_index][current_index]
+ if successor_index < 0 {
+ break
+ }
+
+ indices[n_consecutive] = i16(successor_index)
+ last_chr_index = current_index
+ }
+
+ if n_consecutive < 2 {
+ last_resort = true
+ continue encode
+ }
+
+ pack_n := find_best_encoding(indices, n_consecutive)
+ if pack_n >= 0 {
+ if out + int(model.packs[pack_n].bytes_packed) > out_end {
+ return out, .Output_Too_Short
+ }
+
+ pack := model.packs[pack_n]
+ code := pack.word
+
+ for i := 0; i < int(pack.bytes_unpacked); i += 1 {
+ code |= u32(indices[i]) << pack.offsets[i]
+ }
+
+ // In the little-endian world, we need to swap what's in the register to match the memory representation.
+ when ODIN_ENDIAN == .Little {
+ code = intrinsics.byte_swap(code)
+ }
+ out_ptr := raw_data(output[out:])
+
+ switch pack.bytes_packed {
+ case 4:
+ intrinsics.unaligned_store(transmute(^u32)out_ptr, code)
+ case 2:
+ intrinsics.unaligned_store(transmute(^u16)out_ptr, u16(code))
+ case 1:
+ intrinsics.unaligned_store(transmute(^u8)out_ptr, u8(code))
+ case:
+ return out, .Unknown_Compression_Method
+ }
+
+ out += int(pack.bytes_packed)
+ inp += int(pack.bytes_unpacked)
+ } else {
+ last_resort = true
+ continue encode
+ }
+ }
+ }
+ return out, nil
+}
+
+compress_string :: proc(input: string, model := DEFAULT_MODEL, allocator := context.allocator) -> (output: []u8, err: compress.Error) {
+ context.allocator = allocator
+
+ if len(input) == 0 {
+ return {}, .Stream_Too_Short
+ }
+
+ max_output_size := compress_bound(len(input))
+
+ buf: [dynamic]u8
+ if !resize(&buf, max_output_size) {
+ return {}, .Out_Of_Memory
+ }
+
+ length, result := compress_string_to_buffer(input, buf[:])
+ resize(&buf, length)
+ return buf[:length], result
+}
+compress :: proc{compress_string_to_buffer, compress_string} \ No newline at end of file
diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin
index 4f5bfbdc1..27f199062 100644
--- a/examples/all/all_main.odin
+++ b/examples/all/all_main.odin
@@ -10,6 +10,7 @@ import c "core:c"
import libc "core:c/libc"
import compress "core:compress"
+import shoco "core:compress/shoco"
import gzip "core:compress/gzip"
import zlib "core:compress/zlib"
@@ -115,6 +116,7 @@ _ :: bytes
_ :: c
_ :: libc
_ :: compress
+_ :: shoco
_ :: gzip
_ :: zlib
_ :: bit_array
diff --git a/tests/core/assets/Shoco/LICENSE b/tests/core/assets/Shoco/LICENSE
new file mode 100644
index 000000000..9ca94bcdf
--- /dev/null
+++ b/tests/core/assets/Shoco/LICENSE
@@ -0,0 +1,26 @@
+Copyright (c) 2016-2021 Ginger Bill. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/tests/core/assets/Shoco/LICENSE.shoco b/tests/core/assets/Shoco/LICENSE.shoco
new file mode 100644
index 000000000..5d5e4d623
--- /dev/null
+++ b/tests/core/assets/Shoco/LICENSE.shoco
Binary files differ
diff --git a/tests/core/assets/Shoco/README.md b/tests/core/assets/Shoco/README.md
new file mode 100644
index 000000000..9e46f80d0
--- /dev/null
+++ b/tests/core/assets/Shoco/README.md
@@ -0,0 +1,95 @@
+<p align="center">
+ <img src="misc/logo-slim.png" alt="Odin logo" style="width:65%">
+ <br/>
+ The Data-Oriented Language for Sane Software Development.
+ <br/>
+ <br/>
+ <a href="https://github.com/odin-lang/odin/releases/latest">
+ <img src="https://img.shields.io/github/release/odin-lang/odin.svg">
+ </a>
+ <a href="https://github.com/odin-lang/odin/releases/latest">
+ <img src="https://img.shields.io/badge/platforms-Windows%20|%20Linux%20|%20macOS-green.svg">
+ </a>
+ <br>
+ <a href="https://discord.gg/odinlang">
+ <img src="https://img.shields.io/discord/568138951836172421?logo=discord">
+ </a>
+ <a href="https://github.com/odin-lang/odin/actions">
+ <img src="https://github.com/odin-lang/odin/workflows/CI/badge.svg?branch=master&event=push">
+ </a>
+</p>
+
+# The Odin Programming Language
+
+
+Odin is a general-purpose programming language with distinct typing, built for high performance, modern systems, and built-in data-oriented data types. The Odin Programming Language, the C alternative for the joy of programming.
+
+Website: [https://odin-lang.org/](https://odin-lang.org/)
+
+```odin
+package main
+
+import "core:fmt"
+
+main :: proc() {
+ program := "+ + * 😃 - /"
+ accumulator := 0
+
+ for token in program {
+ switch token {
+ case '+': accumulator += 1
+ case '-': accumulator -= 1
+ case '*': accumulator *= 2
+ case '/': accumulator /= 2
+ case '😃': accumulator *= accumulator
+ case: // Ignore everything else
+ }
+ }
+
+ fmt.printf("The program \"%s\" calculates the value %d\n",
+ program, accumulator)
+}
+
+```
+
+## Documentation
+
+#### [Getting Started](https://odin-lang.org/docs/install)
+
+Instructions for downloading and installing the Odin compiler and libraries.
+
+#### [Nightly Builds](https://odin-lang.org/docs/nightly/)
+
+Get the latest nightly builds of Odin.
+
+### Learning Odin
+
+#### [Overview of Odin](https://odin-lang.org/docs/overview)
+
+An overview of the Odin programming language.
+
+#### [Frequently Asked Questions (FAQ)](https://odin-lang.org/docs/faq)
+
+Answers to common questions about Odin.
+
+#### [Packages](https://pkg.odin-lang.org/)
+
+Documentation for all the official packages part of the [core](https://pkg.odin-lang.org/core/) and [vendor](https://pkg.odin-lang.org/vendor/) library collections.
+
+#### [The Odin Wiki](https://github.com/odin-lang/Odin/wiki)
+
+A wiki maintained by the Odin community.
+
+#### [Odin Discord](https://discord.gg/sVBPHEv)
+
+Get live support and talk with other odiners on the Odin Discord.
+
+### Articles
+
+#### [The Odin Blog](https://odin-lang.org/news/)
+
+The official blog of the Odin programming language, featuring announcements, news, and in-depth articles by the Odin team and guests.
+
+## Warnings
+
+* The Odin compiler is still in development.
diff --git a/tests/core/assets/Shoco/README.md.shoco b/tests/core/assets/Shoco/README.md.shoco
new file mode 100644
index 000000000..013f4f469
--- /dev/null
+++ b/tests/core/assets/Shoco/README.md.shoco
Binary files differ
diff --git a/tests/core/compress/test_core_compress.odin b/tests/core/compress/test_core_compress.odin
index 51952a568..ee7233e52 100644
--- a/tests/core/compress/test_core_compress.odin
+++ b/tests/core/compress/test_core_compress.odin
@@ -7,13 +7,14 @@ package test_core_compress
List of contributors:
Jeroen van Rijn: Initial implementation.
- A test suite for ZLIB, GZIP.
+ A test suite for ZLIB, GZIP and Shoco.
*/
import "core:testing"
import "core:compress/zlib"
import "core:compress/gzip"
+import "core:compress/shoco"
import "core:bytes"
import "core:fmt"
@@ -48,6 +49,7 @@ main :: proc() {
t := testing.T{w=w}
zlib_test(&t)
gzip_test(&t)
+ shoco_test(&t)
fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
if TEST_fail > 0 {
@@ -134,3 +136,56 @@ gzip_test :: proc(t: ^testing.T) {
expect(t, false, error)
}
}
+
+@test
+shoco_test :: proc(t: ^testing.T) {
+
+ Shoco_Tests :: []struct{
+ compressed: []u8,
+ raw: []u8,
+ short_pack: int,
+ short_sentinel: int,
+ }{
+ { #load("../assets/Shoco/README.md.shoco"), #load("../assets/Shoco/README.md"), 10, 1006 },
+ { #load("../assets/Shoco/LICENSE.shoco"), #load("../assets/Shoco/LICENSE"), 25, 68 },
+ }
+
+ for v in Shoco_Tests {
+ expected_raw := len(v.raw)
+ expected_compressed := len(v.compressed)
+
+ biggest_unpacked := shoco.decompress_bound(expected_compressed)
+ biggest_packed := shoco.compress_bound(expected_raw)
+
+ buffer := make([]u8, max(biggest_packed, biggest_unpacked))
+ defer delete(buffer)
+
+ size, err := shoco.decompress(v.compressed, buffer[:])
+ msg := fmt.tprintf("Expected `decompress` to return `nil`, got %v", err)
+ expect(t, err == nil, msg)
+
+ msg = fmt.tprintf("Decompressed %v bytes into %v. Expected to decompress into %v bytes.", len(v.compressed), size, expected_raw)
+ expect(t, size == expected_raw, msg)
+ expect(t, string(buffer[:size]) == string(v.raw), "Decompressed contents don't match.")
+
+ size, err = shoco.compress(string(v.raw), buffer[:])
+ expect(t, err == nil, "Expected `compress` to return `nil`.")
+
+ msg = fmt.tprintf("Compressed %v bytes into %v. Expected to compress into %v bytes.", expected_raw, size, expected_compressed)
+ expect(t, size == expected_compressed, msg)
+
+ size, err = shoco.decompress(v.compressed, buffer[:expected_raw - 10])
+ msg = fmt.tprintf("Decompressing into too small a buffer returned %v, expected `.Output_Too_Short`", err)
+ expect(t, err == .Output_Too_Short, msg)
+
+ size, err = shoco.compress(string(v.raw), buffer[:expected_compressed - 10])
+ msg = fmt.tprintf("Compressing into too small a buffer returned %v, expected `.Output_Too_Short`", err)
+ expect(t, err == .Output_Too_Short, msg)
+
+ size, err = shoco.decompress(v.compressed[:v.short_pack], buffer[:])
+ expect(t, err == .Stream_Too_Short, "Expected `decompress` to return `Stream_Too_Short` because there was no more data after selecting a pack.")
+
+ size, err = shoco.decompress(v.compressed[:v.short_sentinel], buffer[:])
+ expect(t, err == .Stream_Too_Short, "Expected `decompress` to return `Stream_Too_Short` because there was no more data after non-ASCII sentinel.")
+ }
+} \ No newline at end of file