aboutsummaryrefslogtreecommitdiff
path: root/core/text/i18n/gettext.odin
blob: b0e3dae677e89b460a838895f5afcdd02c5d3330 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
package i18n
/*
	A parser for GNU GetText .MO files.

	Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
	Made available under Odin's license.

	A from-scratch implementation based after the specification found here:
		https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html

	Options are ignored as they're not applicable to this format.
	They're part of the signature for consistency with other catalog formats.

	List of contributors:
		Jeroen van Rijn: Initial implementation.
*/
import "core:strings"
import "core:bytes"

parse_mo_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
	context.allocator = allocator
	/*
		An MO file should have at least a 4-byte magic, 2 x 2 byte version info,
		a 4-byte number of strings value, and 2 x 4-byte offsets.
	*/
	if len(data) < 20 {
		return {}, .MO_File_Invalid
	}

	// Check magic. Should be 0x950412de in native Endianness.
	native := true
	magic  := read_u32(data, native) or_return

	if magic != 0x950412de {
		native = false
		magic  = read_u32(data, native) or_return
		if magic != 0x950412de { return {}, .MO_File_Invalid_Signature }
	}

	// We can ignore version_minor at offset 6.
	version_major := read_u16(data[4:]) or_return
	if version_major > 1 { return {}, .MO_File_Unsupported_Version }

	count             := read_u32(data[ 8:]) or_return
	original_offset   := read_u32(data[12:]) or_return
	translated_offset := read_u32(data[16:]) or_return

	if count == 0 { return {}, .Empty_Translation_Catalog }

	// Initalize Translation, interner and optional pluralizer.
	translation = new(Translation)
	translation.pluralize = pluralizer
	strings.intern_init(&translation.intern, allocator, allocator)

	for n := u32(0); n < count; n += 1 {
		// Grab string's original length and offset.
		offset := original_offset + 8 * n
		if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }

		o_length := read_u32(data[offset    :], native) or_return
		o_offset := read_u32(data[offset + 4:], native) or_return

		offset = translated_offset + 8 * n
		if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }

		t_length := read_u32(data[offset    :], native) or_return
		t_offset := read_u32(data[offset + 4:], native) or_return

		max_offset := int(max(o_offset + o_length + 1, t_offset + t_length + 1))
		if len(data) < max_offset { return translation, .Premature_EOF }

		key_data := data[o_offset:][:o_length]
		val_data := data[t_offset:][:t_length]

		// Could be a pluralized string.
		zero := []byte{0}
		keys := bytes.split(key_data, zero); defer delete(keys)
		vals := bytes.split(val_data, zero); defer delete(vals)

		if (len(keys) != 1 && len(keys) != 2) || len(vals) > MAX_PLURALS {
			return translation, .MO_File_Incorrect_Plural_Count
		}

		for k in keys {
			section_name := ""
			key          := string(k)

			// Scan for <context>EOT<key>
			for ch, i in k {
				if ch == 0x04 {
					section_name = string(k[:i])
					key          = string(k[i+1:])
					break
				}
			}

			// If we merge sections, then all entries end in the "" context.
			if options.merge_sections {
				section_name = ""
			}

			section_name, _ = strings.intern_get(&translation.intern, section_name)
			if section_name not_in translation.k_v {
				translation.k_v[section_name] = {}
			}

			section         := &translation.k_v[section_name]
			interned_key, _ := strings.intern_get(&translation.intern, string(key))

			// Duplicate key should not be allowed.
			if interned_key in section {
				return translation, .Duplicate_Key
			}

			interned_vals := make([]string, len(vals))
			last_val: string

			for v, i in vals {
				interned_vals[i], _ = strings.intern_get(&translation.intern, string(v))
				last_val = interned_vals[i]
			}
			section[interned_key] = interned_vals
		}
	}
	return
}

@(private)
read_u32 :: proc(data: []u8, native_endian := true) -> (res: u32, err: Error) {
	if len(data) < size_of(u32) { return 0, .Premature_EOF }

	val := (^u32)(raw_data(data))^

	if native_endian {
		return val, .None
	} else {
		when ODIN_ENDIAN == .Little {
			return u32(transmute(u32be)val), .None
		} else {
			return u32(transmute(u32le)val), .None
		}
	}
}

@(private)
read_u16 :: proc(data: []u8, native_endian := true) -> (res: u16, err: Error) {
	if len(data) < size_of(u16) { return 0, .Premature_EOF }

	val := (^u16)(raw_data(data))^

	if native_endian {
		return val, .None
	} else {
		when ODIN_ENDIAN == .Little {
			return u16(transmute(u16be)val), .None
		} else {
			return u16(transmute(u16le)val), .None
		}
	}
}