1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
|
package i18n
/*
A parser for GNU GetText .MO files.
Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's license.
A from-scratch implementation based after the specification found here:
https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
Options are ignored as they're not applicable to this format.
They're part of the signature for consistency with other catalog formats.
List of contributors:
Jeroen van Rijn: Initial implementation.
*/
import "core:os"
import "core:strings"
import "core:bytes"
parse_mo_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
context.allocator = allocator
/*
An MO file should have at least a 4-byte magic, 2 x 2 byte version info,
a 4-byte number of strings value, and 2 x 4-byte offsets.
*/
if len(data) < 20 {
return {}, .MO_File_Invalid
}
/*
Check magic. Should be 0x950412de in native Endianness.
*/
native := true
magic := read_u32(data, native) or_return
if magic != 0x950412de {
native = false
magic = read_u32(data, native) or_return
if magic != 0x950412de { return {}, .MO_File_Invalid_Signature }
}
/*
We can ignore version_minor at offset 6.
*/
version_major := read_u16(data[4:]) or_return
if version_major > 1 { return {}, .MO_File_Unsupported_Version }
count := read_u32(data[ 8:]) or_return
original_offset := read_u32(data[12:]) or_return
translated_offset := read_u32(data[16:]) or_return
if count == 0 { return {}, .Empty_Translation_Catalog }
/*
Initalize Translation, interner and optional pluralizer.
*/
translation = new(Translation)
translation.pluralize = pluralizer
strings.intern_init(&translation.intern, allocator, allocator)
for n := u32(0); n < count; n += 1 {
/*
Grab string's original length and offset.
*/
offset := original_offset + 8 * n
if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }
o_length := read_u32(data[offset :], native) or_return
o_offset := read_u32(data[offset + 4:], native) or_return
offset = translated_offset + 8 * n
if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }
t_length := read_u32(data[offset :], native) or_return
t_offset := read_u32(data[offset + 4:], native) or_return
max_offset := int(max(o_offset + o_length + 1, t_offset + t_length + 1))
if len(data) < max_offset { return translation, .Premature_EOF }
key_data := data[o_offset:][:o_length]
val_data := data[t_offset:][:t_length]
/*
Could be a pluralized string.
*/
zero := []byte{0}
keys := bytes.split(key_data, zero); defer delete(keys)
vals := bytes.split(val_data, zero); defer delete(vals)
if (len(keys) != 1 && len(keys) != 2) || len(vals) > MAX_PLURALS {
return translation, .MO_File_Incorrect_Plural_Count
}
for k in keys {
section_name := ""
key := string(k)
// Scan for <context>EOT<key>
for ch, i in k {
if ch == 0x04 {
section_name = string(k[:i])
key = string(k[i+1:])
break
}
}
// If we merge sections, then all entries end in the "" context.
if options.merge_sections {
section_name = ""
}
section_name, _ = strings.intern_get(&translation.intern, section_name)
if section_name not_in translation.k_v {
translation.k_v[section_name] = {}
}
section := &translation.k_v[section_name]
interned_key, _ := strings.intern_get(&translation.intern, string(key))
// Duplicate key should not be allowed.
if interned_key in section {
return translation, .Duplicate_Key
}
interned_vals := make([]string, len(vals))
last_val: string
for v, i in vals {
interned_vals[i], _ = strings.intern_get(&translation.intern, string(v))
last_val = interned_vals[i]
}
section[interned_key] = interned_vals
}
}
return
}
parse_mo_file :: proc(filename: string, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
context.allocator = allocator
data, data_ok := os.read_entire_file(filename)
defer delete(data)
if !data_ok { return {}, .File_Error }
return parse_mo_from_bytes(data, options, pluralizer, allocator)
}
parse_mo :: proc { parse_mo_file, parse_mo_from_bytes }
/*
Helpers.
*/
read_u32 :: proc(data: []u8, native_endian := true) -> (res: u32, err: Error) {
if len(data) < size_of(u32) { return 0, .Premature_EOF }
val := (^u32)(raw_data(data))^
if native_endian {
return val, .None
} else {
when ODIN_ENDIAN == .Little {
return u32(transmute(u32be)val), .None
} else {
return u32(transmute(u32le)val), .None
}
}
}
read_u16 :: proc(data: []u8, native_endian := true) -> (res: u16, err: Error) {
if len(data) < size_of(u16) { return 0, .Premature_EOF }
val := (^u16)(raw_data(data))^
if native_endian {
return val, .None
} else {
when ODIN_ENDIAN == .Little {
return u16(transmute(u16be)val), .None
} else {
return u16(transmute(u16le)val), .None
}
}
}
|