aboutsummaryrefslogtreecommitdiff
path: root/core/compress
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2022-04-22 16:55:47 +0200
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2022-04-22 16:55:47 +0200
commite799476f90537ca173205a64da1fbf87b894b42e (patch)
treee8234bbba2c4e3d9e0608079a57a9112a1054dcf /core/compress
parentb4f8efcbe6cbf2d9b17dce0758ecbb29f4bd6451 (diff)
[compress/shoco] Add short string compressor.
Diffstat (limited to 'core/compress')
-rw-r--r--core/compress/shoco/model.odin148
-rw-r--r--core/compress/shoco/shoco.odin318
2 files changed, 466 insertions, 0 deletions
diff --git a/core/compress/shoco/model.odin b/core/compress/shoco/model.odin
new file mode 100644
index 000000000..49e3dd97e
--- /dev/null
+++ b/core/compress/shoco/model.odin
@@ -0,0 +1,148 @@
+/*
+ This file was generated, so don't edit this by hand.
+ Transliterated from https://github.com/Ed-von-Schleck/shoco/blob/master/shoco_model.h,
+ which is an English word model.
+*/
+
+// package shoco is an implementation of the shoco short string compressor
+package shoco
+
+DEFAULT_MODEL :: Shoco_Model {
+ min_char = 39,
+ max_char = 122,
+ characters_by_id = {
+ 'e', 'a', 'i', 'o', 't', 'h', 'n', 'r', 's', 'l', 'u', 'c', 'w', 'm', 'd', 'b', 'p', 'f', 'g', 'v', 'y', 'k', '-', 'H', 'M', 'T', '\'', 'B', 'x', 'I', 'W', 'L',
+ },
+ ids_by_character = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 27, -1, -1, -1, -1, -1, 23, 29, -1, -1, 31, 24, -1, -1, -1, -1, -1, -1, 25, -1, -1, 30, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 15, 11, 14, 0, 17, 18, 5, 2, -1, 21, 9, 13, 6, 3, 16, -1, 7, 8, 4, 10, 19, 12, 28, 20, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ },
+ successors_by_bigram = {
+ {7, 4, 12, -1, 6, -1, 1, 0, 3, 5, -1, 9, -1, 8, 2, -1, 15, 14, -1, 10, 11, -1, -1, -1, -1, -1, -1, -1, 13, -1, -1, -1},
+ {-1, -1, 6, -1, 1, -1, 0, 3, 2, 4, 15, 11, -1, 9, 5, 10, 13, -1, 12, 8, 7, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {9, 11, -1, 4, 2, -1, 0, 8, 1, 5, -1, 6, -1, 3, 7, 15, -1, 12, 10, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, 14, 7, 5, -1, 1, 2, 8, 9, 0, 15, 6, 4, 11, -1, 12, 3, -1, 10, -1, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {2, 4, 3, 1, 5, 0, -1, 6, 10, 9, 7, 12, 11, -1, -1, -1, -1, 13, -1, -1, 8, -1, 15, -1, -1, -1, 14, -1, -1, -1, -1, -1},
+ {0, 1, 2, 3, 4, -1, -1, 5, 9, 10, 6, -1, -1, 8, 15, 11, -1, 14, -1, -1, 7, -1, 13, -1, -1, -1, 12, -1, -1, -1, -1, -1},
+ {2, 8, 7, 4, 3, -1, 9, -1, 6, 11, -1, 5, -1, -1, 0, -1, -1, 14, 1, 15, 10, 12, -1, -1, -1, -1, 13, -1, -1, -1, -1, -1},
+ {0, 3, 1, 2, 6, -1, 9, 8, 4, 12, 13, 10, -1, 11, 7, -1, -1, 15, 14, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {0, 6, 3, 4, 1, 2, -1, -1, 5, 10, 7, 9, 11, 12, -1, -1, 8, 14, -1, -1, 15, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {0, 6, 2, 5, 9, -1, -1, -1, 10, 1, 8, -1, 12, 14, 4, -1, 15, 7, -1, 13, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {8, 10, 9, 15, 1, -1, 4, 0, 3, 2, -1, 6, -1, 12, 11, 13, 7, 14, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {1, 3, 6, 0, 4, 2, -1, 7, 13, 8, 9, 11, -1, -1, 15, -1, -1, -1, -1, -1, 10, 5, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {3, 0, 1, 4, -1, 2, 5, 6, 7, 8, -1, 14, -1, -1, 9, 15, -1, 12, -1, -1, -1, 10, 11, -1, -1, -1, 13, -1, -1, -1, -1, -1},
+ {0, 1, 3, 2, 15, -1, 12, -1, 7, 14, 4, -1, -1, 9, -1, 8, 5, 10, -1, -1, 6, -1, 13, -1, -1, -1, 11, -1, -1, -1, -1, -1},
+ {0, 3, 1, 2, -1, -1, 12, 6, 4, 9, 7, -1, -1, 14, 8, -1, -1, 15, 11, 13, 5, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {0, 5, 7, 2, 10, 13, -1, 6, 8, 1, 3, -1, -1, 14, 15, 11, -1, -1, -1, 12, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {0, 2, 6, 3, 7, 10, -1, 1, 9, 4, 8, -1, -1, 15, -1, 12, 5, -1, -1, -1, 11, -1, 13, -1, -1, -1, 14, -1, -1, -1, -1, -1},
+ {1, 3, 4, 0, 7, -1, 12, 2, 11, 8, 6, 13, -1, -1, -1, -1, -1, 5, -1, -1, 10, 15, 9, -1, -1, -1, 14, -1, -1, -1, -1, -1},
+ {1, 3, 5, 2, 13, 0, 9, 4, 7, 6, 8, -1, -1, 15, -1, 11, -1, -1, 10, -1, 14, -1, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {0, 2, 1, 3, -1, -1, -1, 6, -1, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {1, 11, 4, 0, 3, -1, 13, 12, 2, 7, -1, -1, 15, 10, 5, 8, 14, -1, -1, -1, -1, -1, 9, -1, -1, -1, 6, -1, -1, -1, -1, -1},
+ {0, 9, 2, 14, 15, 4, 1, 13, 3, 5, -1, -1, 10, -1, -1, -1, -1, 6, 12, -1, 7, -1, 8, -1, -1, -1, 11, -1, -1, -1, -1, -1},
+ {-1, 2, 14, -1, 1, 5, 8, 7, 4, 12, -1, 6, 9, 11, 13, 3, 10, 15, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {0, 1, 3, 2, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {4, 3, 1, 5, -1, -1, -1, 0, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {2, 8, 4, 1, -1, 0, -1, 6, -1, -1, 5, -1, 7, -1, -1, -1, -1, -1, -1, -1, 10, -1, -1, 9, -1, -1, -1, -1, -1, -1, -1, -1},
+ {12, 5, -1, -1, 1, -1, -1, 7, 0, 3, -1, 2, -1, 4, 6, -1, -1, -1, -1, 8, -1, -1, 15, -1, 13, 9, -1, -1, -1, -1, -1, 11},
+ {1, 3, 2, 4, -1, -1, -1, 5, -1, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, 8, -1, -1},
+ {5, 3, 4, 12, 1, 6, -1, -1, -1, -1, 8, 2, -1, -1, -1, -1, 0, 9, -1, -1, 11, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, 0, -1, 1, 12, 3, -1, -1, -1, -1, 5, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1, 6, -1, 10},
+ {2, 3, 1, 4, -1, 0, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1},
+ {5, 1, 3, 0, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, 9, -1, -1, 6, -1, 7},
+ },
+ successors_reversed = {
+ {'s', 't', 'c', 'l', 'm', 'a', 'd', 'r', 'v', 'T', 'A', 'L', 'e', 'M', 'Y', '-'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'-', 't', 'a', 'b', 's', 'h', 'c', 'r', 'n', 'w', 'p', 'm', 'l', 'd', 'i', 'f'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'u', 'e', 'i', 'a', 'o', 'r', 'y', 'l', 'I', 'E', 'R', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'e', 'a', 'o', 'i', 'u', 'A', 'y', 'E', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'t', 'n', 'f', 's', '\'', 'm', 'I', 'N', 'A', 'E', 'L', 'Z', 'r', 'V', 'R', 'C'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'o', 'a', 'y', 'i', 'u', 'e', 'I', 'L', 'D', '\'', 'E', 'Y', '\x00', '\x00', '\x00', '\x00'},
+ {'r', 'i', 'y', 'a', 'e', 'o', 'u', 'Y', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'h', 'o', 'e', 'E', 'i', 'u', 'r', 'w', 'a', 'H', 'y', 'R', 'Z', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'h', 'i', 'e', 'a', 'o', 'r', 'I', 'y', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'n', 't', 's', 'r', 'l', 'd', 'i', 'y', 'v', 'm', 'b', 'c', 'g', 'p', 'k', 'u'},
+ {'e', 'l', 'o', 'u', 'y', 'a', 'r', 'i', 's', 'j', 't', 'b', 'v', 'h', 'm', 'd'},
+ {'o', 'e', 'h', 'a', 't', 'k', 'i', 'r', 'l', 'u', 'y', 'c', 'q', 's', '-', 'd'},
+ {'e', 'i', 'o', 'a', 's', 'y', 'r', 'u', 'd', 'l', '-', 'g', 'n', 'v', 'm', 'f'},
+ {'r', 'n', 'd', 's', 'a', 'l', 't', 'e', 'm', 'c', 'v', 'y', 'i', 'x', 'f', 'p'},
+ {'o', 'e', 'r', 'a', 'i', 'f', 'u', 't', 'l', '-', 'y', 's', 'n', 'c', '\'', 'k'},
+ {'h', 'e', 'o', 'a', 'r', 'i', 'l', 's', 'u', 'n', 'g', 'b', '-', 't', 'y', 'm'},
+ {'e', 'a', 'i', 'o', 't', 'r', 'u', 'y', 'm', 's', 'l', 'b', '\'', '-', 'f', 'd'},
+ {'n', 's', 't', 'm', 'o', 'l', 'c', 'd', 'r', 'e', 'g', 'a', 'f', 'v', 'z', 'b'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'e', 'n', 'i', 's', 'h', 'l', 'f', 'y', '-', 'a', 'w', '\'', 'g', 'r', 'o', 't'},
+ {'e', 'l', 'i', 'y', 'd', 'o', 'a', 'f', 'u', 't', 's', 'k', 'w', 'v', 'm', 'p'},
+ {'e', 'a', 'o', 'i', 'u', 'p', 'y', 's', 'b', 'm', 'f', '\'', 'n', '-', 'l', 't'},
+ {'d', 'g', 'e', 't', 'o', 'c', 's', 'i', 'a', 'n', 'y', 'l', 'k', '\'', 'f', 'v'},
+ {'u', 'n', 'r', 'f', 'm', 't', 'w', 'o', 's', 'l', 'v', 'd', 'p', 'k', 'i', 'c'},
+ {'e', 'r', 'a', 'o', 'l', 'p', 'i', 't', 'u', 's', 'h', 'y', 'b', '-', '\'', 'm'},
+ {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'e', 'i', 'o', 'a', 's', 'y', 't', 'd', 'r', 'n', 'c', 'm', 'l', 'u', 'g', 'f'},
+ {'e', 't', 'h', 'i', 'o', 's', 'a', 'u', 'p', 'c', 'l', 'w', 'm', 'k', 'f', 'y'},
+ {'h', 'o', 'e', 'i', 'a', 't', 'r', 'u', 'y', 'l', 's', 'w', 'c', 'f', '\'', '-'},
+ {'r', 't', 'l', 's', 'n', 'g', 'c', 'p', 'e', 'i', 'a', 'd', 'm', 'b', 'f', 'o'},
+ {'e', 'i', 'a', 'o', 'y', 'u', 'r', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'},
+ {'a', 'i', 'h', 'e', 'o', 'n', 'r', 's', 'l', 'd', 'k', '-', 'f', '\'', 'c', 'b'},
+ {'p', 't', 'c', 'a', 'i', 'e', 'h', 'q', 'u', 'f', '-', 'y', 'o', '\x00', '\x00', '\x00'},
+ {'o', 'e', 's', 't', 'i', 'd', '\'', 'l', 'b', '-', 'm', 'a', 'r', 'n', 'p', 'w'},
+ },
+
+ character_count = 32,
+ successor_count = 16,
+
+ max_successor_n = 7,
+ packs = {
+ { 0x80000000, 1, 2, { 26, 24, 24, 24, 24, 24, 24, 24 }, { 15, 3, 0, 0, 0, 0, 0, 0 }, 0xc0, 0x80 },
+ { 0xc0000000, 2, 4, { 25, 22, 19, 16, 16, 16, 16, 16 }, { 15, 7, 7, 7, 0, 0, 0, 0 }, 0xe0, 0xc0 },
+ { 0xe0000000, 4, 8, { 23, 19, 15, 11, 8, 5, 2, 0 }, { 31, 15, 15, 15, 7, 7, 7, 3 }, 0xf0, 0xe0 },
+ },
+} \ No newline at end of file
diff --git a/core/compress/shoco/shoco.odin b/core/compress/shoco/shoco.odin
new file mode 100644
index 000000000..9c5008f5d
--- /dev/null
+++ b/core/compress/shoco/shoco.odin
@@ -0,0 +1,318 @@
+/*
+ Copyright 2022 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-3 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+
+ An implementation of [shoco](https://github.com/Ed-von-Schleck/shoco) by Christian Schramm.
+*/
+
+// package shoco is an implementation of the shoco short string compressor
+package shoco
+
+import "core:intrinsics"
+import "core:compress"
+
+Shoco_Pack :: struct {
+ word: u32,
+ bytes_packed: i8,
+ bytes_unpacked: i8,
+ offsets: [8]u16,
+ masks: [8]i16,
+ header_mask: u8,
+ header: u8,
+}
+
+Shoco_Model :: struct {
+ min_char: u8,
+ max_char: u8,
+ characters_by_id: []u8,
+ ids_by_character: [256]i16,
+ successors_by_bigram: [][]i8,
+ successors_reversed: [][]u8,
+
+ character_count: u8,
+ successor_count: u8,
+ max_successor_n: i8,
+ packs: []Shoco_Pack,
+}
+
+compress_bound :: proc(uncompressed_size: int) -> (worst_case_compressed_size: int) {
+ // Worst case compression happens when input is non-ASCII (128-255)
+ // Encoded as 0x00 + the byte in question.
+ return uncompressed_size * 2
+}
+
+decompress_bound :: proc(compressed_size: int, model := DEFAULT_MODEL) -> (maximum_decompressed_size: int) {
+ // Best case compression is 2:1
+ most: f64
+ for pack in model.packs {
+ val := f64(compressed_size) / f64(pack.bytes_packed) * f64(pack.bytes_unpacked)
+ most = max(most, val)
+ }
+ return int(most)
+}
+
+find_best_encoding :: proc(indices: []i16, n_consecutive: i8, model := DEFAULT_MODEL) -> (res: int) {
+ for p := len(model.packs); p > 0; p -= 1 {
+ pack := model.packs[p - 1]
+ if n_consecutive >= pack.bytes_unpacked {
+ have_index := true
+ for i := 0; i < int(pack.bytes_unpacked); i += 1 {
+ if indices[i] > pack.masks[i] {
+ have_index = false
+ break
+ }
+ }
+ if have_index {
+ return p - 1
+ }
+ }
+ }
+ return -1
+}
+
+validate_model :: proc(model: Shoco_Model) -> (int, compress.Error) {
+ if len(model.successors_reversed) != int(model.max_char - model.min_char) {
+ return 0, .Unknown_Compression_Method
+ }
+
+ if len(model.characters_by_id) != int(model.character_count) {
+ return 0, .Unknown_Compression_Method
+ }
+
+ if len(model.successors_by_bigram) != int(model.character_count) || len(model.successors_by_bigram[0]) != int(model.character_count) {
+ return 0, .Unknown_Compression_Method
+ }
+
+ if len(model.successors_reversed[0]) != int(model.successor_count) {
+ return 0, .Unknown_Compression_Method
+ }
+
+ // Model seems legit.
+ return 0, nil
+}
+
+// Decompresses into provided buffer.
+decompress_slice_to_output_buffer :: proc(input: []u8, output: []u8, model := DEFAULT_MODEL) -> (size: int, err: compress.Error) {
+ inp, inp_end := 0, len(input)
+ out, out_end := 0, len(output)
+
+ validate_model(model) or_return
+
+ for inp < inp_end {
+ val := transmute(i8)input[inp]
+ mark := int(-1)
+
+ for val < 0 {
+ val <<= 1
+ mark += 1
+ }
+
+ if mark > len(model.packs) {
+ return out, .Unknown_Compression_Method
+ }
+
+ if mark < 0 {
+ if out >= out_end {
+ return out, .Output_Too_Short
+ }
+
+ // Ignore the sentinel value for non-ASCII chars
+ if input[inp] == 0x00 {
+ inp += 1
+ if inp >= inp_end {
+ return out, .Stream_Too_Short
+ }
+ }
+ output[out] = input[inp]
+ inp, out = inp + 1, out + 1
+
+ } else {
+ pack := model.packs[mark]
+
+ if out + int(pack.bytes_unpacked) > out_end {
+ return out, .Output_Too_Short
+ } else if inp + int(pack.bytes_packed) > inp_end {
+ return out, .Stream_Too_Short
+ }
+
+ code := intrinsics.unaligned_load((^u32)(&input[inp]))
+ when ODIN_ENDIAN == .Little {
+ code = intrinsics.byte_swap(code)
+ }
+
+ // Unpack the leading char
+ offset := pack.offsets[0]
+ mask := pack.masks[0]
+
+ last_chr := model.characters_by_id[(code >> offset) & u32(mask)]
+ output[out] = last_chr
+
+ // Unpack the successor chars
+ for i := 1; i < int(pack.bytes_unpacked); i += 1 {
+ offset = pack.offsets[i]
+ mask = pack.masks[i]
+
+ last_chr = model.successors_reversed[last_chr - model.min_char][(code >> offset) & u32(mask)]
+ output[out + i] = last_chr
+ }
+
+ out += int(pack.bytes_unpacked)
+ inp += int(pack.bytes_packed)
+ }
+ }
+
+ return out, nil
+}
+
+decompress_slice_to_string :: proc(input: []u8, model := DEFAULT_MODEL, allocator := context.allocator) -> (res: string, err: compress.Error) {
+ context.allocator = allocator
+
+ if len(input) == 0 {
+ return "", .Stream_Too_Short
+ }
+
+ max_output_size := decompress_bound(len(input), model)
+
+ buf: [dynamic]u8
+ if !resize(&buf, max_output_size) {
+ return "", .Out_Of_Memory
+ }
+
+ length, result := decompress_slice_to_output_buffer(input, buf[:])
+ resize(&buf, length)
+ return string(buf[:]), result
+}
+decompress :: proc{decompress_slice_to_output_buffer, decompress_slice_to_string}
+
+compress_string_to_buffer :: proc(input: string, output: []u8, model := DEFAULT_MODEL, allocator := context.allocator) -> (size: int, err: compress.Error) {
+ inp, inp_end := 0, len(input)
+ out, out_end := 0, len(output)
+ output := output
+
+ validate_model(model) or_return
+
+ indices := make([]i16, model.max_successor_n + 1)
+ defer delete(indices)
+
+ last_resort := false
+
+ encode: for inp < inp_end {
+ if last_resort {
+ last_resort = false
+
+ if input[inp] & 0x80 == 0x80 {
+ // Non-ASCII case
+ if out + 2 > out_end {
+ return out, .Output_Too_Short
+ }
+
+ // Put in a sentinel byte
+ output[out] = 0x00
+ out += 1
+ } else {
+ // An ASCII byte
+ if out + 1 > out_end {
+ return out, .Output_Too_Short
+ }
+ }
+ output[out] = input[inp]
+ out, inp = out + 1, inp + 1
+ } else {
+ // Find the longest string of known successors
+ indices[0] = model.ids_by_character[input[inp]]
+ last_chr_index := indices[0]
+
+ if last_chr_index < 0 {
+ last_resort = true
+ continue encode
+ }
+
+ rest := inp_end - inp
+ n_consecutive: i8 = 1
+ for ; n_consecutive <= model.max_successor_n; n_consecutive += 1 {
+ if inp_end > 0 && int(n_consecutive) == rest {
+ break
+ }
+
+ current_index := model.ids_by_character[input[inp + int(n_consecutive)]]
+ if current_index < 0 { // '\0' is always -1
+ break
+ }
+
+ successor_index := model.successors_by_bigram[last_chr_index][current_index]
+ if successor_index < 0 {
+ break
+ }
+
+ indices[n_consecutive] = i16(successor_index)
+ last_chr_index = current_index
+ }
+
+ if n_consecutive < 2 {
+ last_resort = true
+ continue encode
+ }
+
+ pack_n := find_best_encoding(indices, n_consecutive)
+ if pack_n >= 0 {
+ if out + int(model.packs[pack_n].bytes_packed) > out_end {
+ return out, .Output_Too_Short
+ }
+
+ pack := model.packs[pack_n]
+ code := pack.word
+
+ for i := 0; i < int(pack.bytes_unpacked); i += 1 {
+ code |= u32(indices[i]) << pack.offsets[i]
+ }
+
+ // In the little-endian world, we need to swap what's in the register to match the memory representation.
+ when ODIN_ENDIAN == .Little {
+ code = intrinsics.byte_swap(code)
+ }
+ out_ptr := raw_data(output[out:])
+
+ switch pack.bytes_packed {
+ case 4:
+ intrinsics.unaligned_store(transmute(^u32)out_ptr, code)
+ case 2:
+ intrinsics.unaligned_store(transmute(^u16)out_ptr, u16(code))
+ case 1:
+ intrinsics.unaligned_store(transmute(^u8)out_ptr, u8(code))
+ case:
+ return out, .Unknown_Compression_Method
+ }
+
+ out += int(pack.bytes_packed)
+ inp += int(pack.bytes_unpacked)
+ } else {
+ last_resort = true
+ continue encode
+ }
+ }
+ }
+ return out, nil
+}
+
+compress_string :: proc(input: string, model := DEFAULT_MODEL, allocator := context.allocator) -> (output: []u8, err: compress.Error) {
+ context.allocator = allocator
+
+ if len(input) == 0 {
+ return {}, .Stream_Too_Short
+ }
+
+ max_output_size := compress_bound(len(input))
+
+ buf: [dynamic]u8
+ if !resize(&buf, max_output_size) {
+ return {}, .Out_Of_Memory
+ }
+
+ length, result := compress_string_to_buffer(input, buf[:])
+ resize(&buf, length)
+ return buf[:length], result
+}
+compress :: proc{compress_string_to_buffer, compress_string} \ No newline at end of file