aboutsummaryrefslogtreecommitdiff
path: root/core/unicode/utf16/utf16.odin
blob: 4c76956ccbf87fb92468d6f0d9621190ae7a1770 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
package utf16

REPLACEMENT_CHAR :: '\ufffd';
MAX_RUNE         :: '\U0010ffff';

_surr1           :: 0xd800;
_surr2           :: 0xdc00;
_surr3           :: 0xe000;
_surr_self       :: 0x10000;


is_surrogate :: proc(r: rune) -> bool {
	return _surr1 <= r && r < _surr3;
}

decode_surrogate_pair :: proc(r1, r2: rune) -> rune {
	if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
		return (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self;
	}
	return REPLACEMENT_CHAR;
}


encode_surrogate_pair :: proc(c: rune) -> (r1, r2: rune) {
	r := c;
	if r < _surr_self || r > MAX_RUNE {
		return REPLACEMENT_CHAR, REPLACEMENT_CHAR;
	}
	r -= _surr_self;
	return _surr1 + (r>>10)&0x3ff, _surr2 + r&0x3ff;
}

encode :: proc(d: []u16, s: []rune) -> int {
	n, m := 0, len(d);
	loop: for r in s {
		switch r {
		case 0..<_surr1, _surr3 ..< _surr_self:
			if m+1 < n { break loop; }
			d[n] = u16(r);
			n += 1;

		case _surr_self ..= MAX_RUNE:
			if m+2 < n { break loop; }
			r1, r2 := encode_surrogate_pair(r);
			d[n]    = u16(r1);
			d[n+1]  = u16(r2);
			n += 2;

		case:
			if m+1 < n { break loop; }
			d[n] = u16(REPLACEMENT_CHAR);
			n += 1;
		}
	}
	return n;
}


encode_string :: proc(d: []u16, s: string) -> int {
	n, m := 0, len(d);
	loop: for r in s {
		switch r {
		case 0..<_surr1, _surr3 ..< _surr_self:
			if m+1 < n { break loop; }
			d[n] = u16(r);
			n += 1;

		case _surr_self ..= MAX_RUNE:
			if m+2 < n { break loop; }
			r1, r2 := encode_surrogate_pair(r);
			d[n]    = u16(r1);
			d[n+1]  = u16(r2);
			n += 2;

		case:
			if m+1 < n { break loop; }
			d[n] = u16(REPLACEMENT_CHAR);
			n += 1;
		}
	}
	return n;
}