<<
path:
root/public/blog.git/html/modules/uniform/rune.jai
blob: ddf426567f0123ba34b14cc8124a9ac5bdfacf48
[raw]
[clear marker]
1 * This source is adapted from the utf-8 library by Rob Pike and Ken Thompson.
2 * Copyright (c) 2002 by Lucent Technologies.
3 * Permission to use, copy, modify, and distribute this software for any
4 * purpose without fee is hereby granted, provided that this entire notice
5 * is included in all copies of any software which is or includes a copy
6 * or modification of this software and in all copies of the supporting
7 * documentation for such software.
8 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
9 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
10 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
11 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
16// @ToDo: Why is this signed!?
19UTFmax :: 4; /* maximum bytes per rune */
20Runesync :: 0x80; /* cannot represent part of a UTF sequence (<) */
21Runeself :: 0x80; /* rune and UTF sequences are the same (<) */
22Runeerror :: 0xFFFD; /* decoding error in UTF */
23Runemax : s32 : 0x10FFFF; /* maximum rune value */
25// @ToDo: Move to unicode module?
26rune_from_string :: (str: string) -> Rune, len: int {
28 * one character sequence
38 * two character sequence
41 if str.count < 2 return Runeerror, 0;
43 c1 := cast(s32)(str[1]) ^ Tx;
44 if c1 & Testx return Runeerror, 0;
47 if c < T2 return Runeerror, 0;
49 result := ((c << Bitx) | c1) & Rune2;
50 if result <= Rune1 return Runeerror, 0;
56 * three character sequence
57 * 0800-FFFF => T3 Tx Tx
59 if str.count < 3 return Runeerror, 0;
61 c2 := cast(s32)(str[2]) ^ Tx;
62 if c2 & Testx return Runeerror, 0;
65 result := ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
66 if result <= Rune2 return Runeerror, 0;
72 * four character sequence (21-bit value)
73 * 10000-1FFFFF => T4 Tx Tx Tx
75 if str.count < 4 return Runeerror, 0;
77 c3 := cast(s32)(str[3]) ^ Tx;
78 if c3 & Testx return Runeerror, 0;
81 result := ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
82 if result <= Rune3 return Runeerror, 0;
90bytes_from_rune :: (rune: Rune) -> [UTFmax] u8, len: int {
91 /* Runes are signed, so convert to unsigned for range check. */
96 * one character sequence
97 * 00000-0007F => 00-7F
100 result[0] = cast(u8) c;
105 * two character sequence
109 result[0] = T2 | cast(u8)(c >> (1*Bitx));
110 result[1] = Tx | cast(u8)(c & Maskx);
115 * If the Rune is out of range, convert it to the error rune.
116 * Do this test here because the error rune encodes to three bytes.
117 * Doing it earlier would duplicate work, since an out of range
118 * Rune wouldn't have fit in one or two bytes.
120 if c > cast(u32)Runemax {
125 * three character sequence
126 * 0800-FFFF => T3 Tx Tx
129 result[0] = T3 | cast(u8)(c >> (2*Bitx));
130 result[1] = Tx | cast(u8)((c >> (1*Bitx)) & Maskx);
131 result[2] = Tx | cast(u8)(c & Maskx);
136 * four character sequence (21-bit value)
137 * 10000-1FFFFF => T4 Tx Tx Tx
139 result[0] = T4 | cast(u8)(c >> (3*Bitx));
140 result[1] = Tx | cast(u8)((c >> (2*Bitx)) & Maskx);
141 result[2] = Tx | cast(u8)((c >> (1*Bitx)) & Maskx);
142 result[3] = Tx | cast(u8)(c & Maskx);
146string_from_runes :: (runes: [] Rune) -> string {
147 builder: String_Builder;
148 defer free_buffers(*builder);
150 bytes, len := bytes_from_rune(it);
151 append(*builder, bytes.data, len);
153 return builder_to_string(*builder);
165T1 : u8 : 0b0000_0000;
166Tx : u8 : 0b1000_0000;
167T2 : u8 : 0b1100_0000;
168T3 : u8 : 0b1110_0000;
169T4 : u8 : 0b1111_0000;
170T5 : u8 : 0b1111_1000;
172Rune1 :: 0b0000_0000_0111_1111;
173Rune2 :: 0b0000_0111_1111_1111;
174Rune3 :: 0b1111_1111_1111_1111;
175Rune4 :: 0b0001_1111_1111_1111_1111_1111;
177Maskx : u8 : 0b0011_1111;
178Testx : u8 : 0b1100_0000;