aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeroen van Rijn <Kelimion@users.noreply.github.com>2022-04-28 18:58:49 +0200
committerJeroen van Rijn <Kelimion@users.noreply.github.com>2022-04-28 18:58:49 +0200
commit2fae6eda2321881ccf8d942e2c27e6a7c29aebfd (patch)
tree859b4a8b949c6c5de3250ce8617a06ce7f62b381
parente53ba3b11612db5c52ecf9b523e4d0ed87f7b1ad (diff)
[i18n] Initial i18n support.
- Add initial GetText .MO parser - Add translation struct and helpers - Pluralized lookup TODO: - Support for more translation catalog file formats.
-rw-r--r--core/i18n/example/i18n_example.odin64
-rw-r--r--core/i18n/example/messages.pot30
-rw-r--r--core/i18n/example/nl_NL.mobin0 -> 672 bytes
-rw-r--r--core/i18n/example/nl_NL.po33
-rw-r--r--core/i18n/gettext.odin163
-rw-r--r--core/i18n/i18n.odin116
6 files changed, 406 insertions, 0 deletions
diff --git a/core/i18n/example/i18n_example.odin b/core/i18n/example/i18n_example.odin
new file mode 100644
index 000000000..f9fb2a353
--- /dev/null
+++ b/core/i18n/example/i18n_example.odin
@@ -0,0 +1,64 @@
+package i18n_example
+
+import "core:mem"
+import "core:fmt"
+import "core:i18n"
+
+LOC :: i18n.get
+
+_main :: proc() {
+ using fmt
+
+ err: i18n.Error
+
+ /*
+ Parse MO file and set it as the active translation so we can omit `get`'s "catalog" parameter.
+ */
+ i18n.ACTIVE, err = i18n.parse_mo(#load("nl_NL.mo"))
+ defer i18n.destroy()
+
+ if err != .None { return }
+
+ /*
+ These are in the .MO catalog.
+ */
+ println("-----")
+ println(LOC(""))
+ println("-----")
+ println(LOC("There are 69,105 leaves here."))
+ println("-----")
+ println(LOC("Hellope, World!"))
+
+ /*
+ For ease of use, pluralized lookup can use both singular and plural form as key for the same translation.
+ */
+ println("-----")
+ printf(LOC("There is %d leaf.\n", 1), 1)
+ printf(LOC("There is %d leaf.\n", 42), 42)
+
+ printf(LOC("There are %d leaves.\n", 1), 1)
+ printf(LOC("There are %d leaves.\n", 42), 42)
+
+ /*
+ This isn't.
+ */
+ println("-----")
+ println(LOC("Come visit us on Discord!"))
+}
+
+main :: proc() {
+ using fmt
+
+ track: mem.Tracking_Allocator
+ mem.tracking_allocator_init(&track, context.allocator)
+ context.allocator = mem.tracking_allocator(&track)
+
+ _main()
+
+ if len(track.allocation_map) > 0 {
+ println()
+ for _, v in track.allocation_map {
+ printf("%v Leaked %v bytes.\n", v.location, v.size)
+ }
+ }
+} \ No newline at end of file
diff --git a/core/i18n/example/messages.pot b/core/i18n/example/messages.pot
new file mode 100644
index 000000000..53d521b6b
--- /dev/null
+++ b/core/i18n/example/messages.pot
@@ -0,0 +1,30 @@
+# Odin i18n Example
+# Copyright (C) 2021 Jeroen van Rijn
+# This file is distributed under the same license as the PACKAGE package.
+# Jeroen van Rijn <Kelimion@users.noreply.github.com>, 2021.
+#
+#, fuzzy
+msgid ""
+msgstr "Project-Id-Version: Example 0.0.1\n"
+ "Report-Msgid-Bugs-To: Jeroen van Rijn <Kelimion@users.noreply.github.com>\n"
+ "POT-Creation-Date: 2021-11-27 19:23+0100\n"
+ "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+ "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+ "Language: en-GB\n"
+ "MIME-Version: 1.0\n"
+ "Content-Type: text/plain; charset=UTF-8\n"
+ "Content-Transfer-Encoding: 8bit\n"
+
+#: i18n_example.odin:28
+msgid "There are 69,105 leaves here."
+msgstr "Er zijn hier 69.105 bladeren."
+
+#: i18n_example.odin:30
+msgid "Hellope, World!"
+msgstr "Hallo, Wereld!"
+
+#: i18n_example.odin:36
+msgid "There is %d leaf.\n"
+msgid_plural "There are %d leaves.\n"
+msgstr[0] "Er is %d blad.\n"
+msgstr[1] "Er zijn %d bladeren.\n" \ No newline at end of file
diff --git a/core/i18n/example/nl_NL.mo b/core/i18n/example/nl_NL.mo
new file mode 100644
index 000000000..0b1a668f4
--- /dev/null
+++ b/core/i18n/example/nl_NL.mo
Binary files differ
diff --git a/core/i18n/example/nl_NL.po b/core/i18n/example/nl_NL.po
new file mode 100644
index 000000000..1b8acbcc1
--- /dev/null
+++ b/core/i18n/example/nl_NL.po
@@ -0,0 +1,33 @@
+# Odin i18n Example
+# Copyright (C) 2021 Jeroen van Rijn
+# This file is distributed under the same license as the PACKAGE package.
+# Jeroen van Rijn <Kelimion@users.noreply.github.com>, 2021.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: Example 0.0.1\n"
+"Report-Msgid-Bugs-To: Jeroen van Rijn <Kelimion@users.noreply.github.com>\n"
+"POT-Creation-Date: 2021-11-27 19:23+0100\n"
+"PO-Revision-Date: 2021-11-28 02:56+0100\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Language-Team: Odin Language Team\n"
+"X-Generator: Poedit 3.0\n"
+"Last-Translator: Jeroen van Rijn\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+"Language: nl_NL\n"
+
+#: i18n_example.odin:28
+msgid "There are 69,105 leaves here."
+msgstr "Er zijn hier 69.105 bladeren."
+
+#: i18n_example.odin:30
+msgid "Hellope, World!"
+msgstr "Hallo, Wereld!"
+
+#: i18n_example.odin:36
+msgid "There is %d leaf.\n"
+msgid_plural "There are %d leaves.\n"
+msgstr[0] "Er is %d blad.\n"
+msgstr[1] "Er zijn %d bladeren.\n"
diff --git a/core/i18n/gettext.odin b/core/i18n/gettext.odin
new file mode 100644
index 000000000..7918e217e
--- /dev/null
+++ b/core/i18n/gettext.odin
@@ -0,0 +1,163 @@
+package i18n
+/*
+ A parser for GNU GetText .MO files.
+
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-3 license.
+
+ A from-scratch implementation based after the specification found here:
+ https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+*/
+import "core:os"
+import "core:strings"
+import "core:bytes"
+
+parse_mo_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
+ context.allocator = allocator
+ /*
+ An MO file should have at least a 4-byte magic, 2 x 2 byte version info,
+ a 4-byte number of strings value, and 2 x 4-byte offsets.
+ */
+ if len(data) < 20 {
+ return {}, .MO_File_Invalid
+ }
+
+ /*
+ Check magic. Should be 0x950412de in native Endianness.
+ */
+ native := true
+ magic := read_u32(data, native) or_return
+
+ if magic != 0x950412de {
+ native = false
+ magic = read_u32(data, native) or_return
+
+ if magic != 0x950412de { return {}, .MO_File_Invalid_Signature }
+ }
+
+ /*
+ We can ignore version_minor at offset 6.
+ */
+ version_major := read_u16(data[4:]) or_return
+ if version_major > 1 { return {}, .MO_File_Unsupported_Version }
+
+ count := read_u32(data[ 8:]) or_return
+ original_offset := read_u32(data[12:]) or_return
+ translated_offset := read_u32(data[16:]) or_return
+
+ if count == 0 { return {}, .Empty_Translation_Catalog }
+
+ /*
+ Initalize Translation, interner and optional pluralizer.
+ */
+ translation = new(Translation)
+ translation.pluralize = pluralizer
+ strings.intern_init(&translation.intern, allocator, allocator)
+
+ for n := u32(0); n < count; n += 1 {
+ /*
+ Grab string's original length and offset.
+ */
+ offset := original_offset + 8 * n
+ if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }
+
+ o_length := read_u32(data[offset :], native) or_return
+ o_offset := read_u32(data[offset + 4:], native) or_return
+
+ offset = translated_offset + 8 * n
+ if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }
+
+ t_length := read_u32(data[offset :], native) or_return
+ t_offset := read_u32(data[offset + 4:], native) or_return
+
+ max_offset := int(max(o_offset + o_length + 1, t_offset + t_length + 1))
+ if len(data) < max_offset { return translation, .Premature_EOF }
+
+ key := data[o_offset:][:o_length]
+ val := data[t_offset:][:t_length]
+
+ /*
+ Could be a pluralized string.
+ */
+ zero := []byte{0}
+
+ keys := bytes.split(key, zero)
+ vals := bytes.split(val, zero)
+
+ if len(keys) != len(vals) || max(len(keys), len(vals)) > MAX_PLURALS {
+ return translation, .MO_File_Incorrect_Plural_Count
+ }
+
+ for k in keys {
+ interned_key := strings.intern_get(&translation.intern, string(k))
+
+ interned_vals: [MAX_PLURALS]string = {}
+ last_val: string
+
+ i := 0
+ for v in vals {
+ interned_vals[i] = strings.intern_get(&translation.intern, string(v))
+ last_val = interned_vals[i]
+ i += 1
+ }
+ for ; i < MAX_PLURALS; i += 1 {
+ interned_vals[i] = last_val
+ }
+ translation.k_v[interned_key] = interned_vals
+ }
+ delete(vals)
+ delete(keys)
+ }
+ return
+}
+
+parse_mo_file :: proc(filename: string, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
+ context.allocator = allocator
+
+ data, data_ok := os.read_entire_file(filename)
+ defer delete(data)
+
+ if !data_ok { return {}, .File_Error }
+
+ return parse_mo_from_slice(data, pluralizer)
+}
+
+parse_mo :: proc { parse_mo_file, parse_mo_from_slice }
+
+/*
+ Helpers.
+*/
+read_u32 :: proc(data: []u8, native_endian := true) -> (res: u32, err: Error) {
+ if len(data) < size_of(u32) { return 0, .Premature_EOF }
+
+ val := (^u32)(raw_data(data))^
+
+ if native_endian {
+ return val, .None
+ } else {
+ when ODIN_ENDIAN == .Little {
+ return u32(transmute(u32be)val), .None
+ } else {
+ return u32(transmute(u32le)val), .None
+ }
+ }
+}
+
+read_u16 :: proc(data: []u8, native_endian := true) -> (res: u16, err: Error) {
+ if len(data) < size_of(u16) { return 0, .Premature_EOF }
+
+ val := (^u16)(raw_data(data))^
+
+ if native_endian {
+ return val, .None
+ } else {
+ when ODIN_ENDIAN == .Little {
+ return u16(transmute(u16be)val), .None
+ } else {
+ return u16(transmute(u16le)val), .None
+ }
+ }
+} \ No newline at end of file
diff --git a/core/i18n/i18n.odin b/core/i18n/i18n.odin
new file mode 100644
index 000000000..7c72f9858
--- /dev/null
+++ b/core/i18n/i18n.odin
@@ -0,0 +1,116 @@
+package i18n
+/*
+ Internationalization helpers.
+
+ Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's BSD-3 license.
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+*/
+import "core:strings"
+
+/*
+ TODO:
+ - Support for more translation catalog file formats.
+*/
+
+MAX_PLURALS :: 10
+
+/*
+ Currently active catalog.
+*/
+ACTIVE: ^Translation
+
+/*
+ The main data structure. This can be generated from various different file formats, as long as we have a parser for them.
+*/
+Translation :: struct {
+ k_v: map[string][MAX_PLURALS]string,
+ intern: strings.Intern,
+
+ pluralize: proc(number: int) -> int,
+}
+
+Error :: enum {
+ /*
+ General return values.
+ */
+ None = 0,
+ Empty_Translation_Catalog,
+
+ /*
+ Couldn't find, open or read file.
+ */
+ File_Error,
+
+ /*
+ File too short.
+ */
+ Premature_EOF,
+
+ /*
+ GNU Gettext *.MO file errors.
+ */
+ MO_File_Invalid_Signature,
+ MO_File_Unsupported_Version,
+ MO_File_Invalid,
+ MO_File_Incorrect_Plural_Count,
+}
+
+/*
+ Several ways to use:
+ - get(key), which defaults to the singular form and i18n.ACTIVE catalog, or
+ - get(key, number), which returns the appropriate plural from the active catalog, or
+ - get(key, number, catalog) to grab text from a specific one.
+*/
+get :: proc(key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: string) {
+ /*
+ A lot of languages use singular for 1 item and plural for 0 or more than 1 items. This is our default pluralize rule.
+ */
+ plural := 1 if number != 1 else 0
+
+ if catalog.pluralize != nil {
+ plural = catalog.pluralize(number)
+ }
+ return get_by_slot(key, plural, catalog)
+}
+
+/*
+ Several ways to use:
+ - get_by_slot(key), which defaults to the singular form and i18n.ACTIVE catalog, or
+ - get_by_slot(key, slot), which returns the requested plural from the active catalog, or
+ - get_by_slot(key, slot, catalog) to grab text from a specific one.
+
+ If a file format parser doesn't (yet) support plural slots, each of the slots will point at the same string.
+*/
+get_by_slot :: proc(key: string, slot := 0, catalog: ^Translation = ACTIVE) -> (value: string) {
+ if catalog == nil {
+ /*
+ Return the key if the catalog catalog hasn't been initialized yet.
+ */
+ return key
+ }
+
+ /*
+ Return the translation from the requested slot if this key is known, else return the key.
+ */
+ if translations, ok := catalog.k_v[key]; ok {
+ plural := min(max(0, slot), MAX_PLURALS - 1)
+ return translations[plural]
+ }
+ return key
+}
+
+/*
+ Same for destroy:
+ - destroy(), to clean up the currently active catalog catalog i18n.ACTIVE
+ - destroy(catalog), to clean up a specific catalog.
+*/
+destroy :: proc(catalog: ^Translation = ACTIVE) {
+ if catalog != nil {
+ strings.intern_destroy(&catalog.intern)
+ delete(catalog.k_v)
+ free(catalog)
+ }
+} \ No newline at end of file