Logo

index : blog

---

  • summary
  • about
  • tree
  • log
  • branches
<< path: root/public/blog.git/html/modules/uniform/unicode_casefold.jai blob: be23566fc4d17ab35e7931d47e6413e53b30f5c2 [raw] [clear marker]

        
0#scope_module
1
2EvenOdd :: 1;
3OddEven :: -1;
4EvenOddSkip :: 1<<30;
5OddEvenSkip :: EvenOddSkip + 1;
6
7CaseFold :: struct {
8 lo: Rune;
9 hi: Rune;
10 delta: s32;
11}
12
13// Searches the case folding tables and returns the CaseFold* that contains r.
14// If there isn't one, returns the CaseFold* with smallest f.lo bigger than r.
15// If there isn't one, returns null.
16lookup_casefold :: (folds: [] CaseFold, r: Rune) -> *CaseFold {
17 // Binary search for entry containing r.
18 f := folds;
19 while f.count {
20 m := f.count / 2;
21 if f[m].lo <= r && r <= f[m].hi {
22 return *f[m];
23 }
24 if r < f[m].lo {
25 f.count = m;
26 } else {
27 f.data += m + 1;
28 f.count -= m + 1;
29 }
30 }
31
32 // There is no entry that contains r, but f points
33 // where it would have been. Unless f points at
34 // the end of the array, it points at the next entry
35 // after r.
36 if f.count return *f[0];
37
38 // No entry contains r; no entry contains runes > r.
39 return null;
40}
41
42// Returns the result of applying the fold f to the rune r.
43apply_fold :: (f: *CaseFold, r: Rune) -> Rune {
44 if f.delta == {
45 case;
46 return r + f.delta;
47
48 case EvenOddSkip; // even <-> odd but only applies to every other
49 if (r - f.lo) % 2 return r;
50 #through;
51 case EvenOdd; // even <-> odd
52 if r % 2 == 0 return r + 1;
53
54 return r - 1;
55
56 case OddEvenSkip; // odd <-> even but only applies to every other
57 if (r - f.lo) % 2 return r;
58 #through;
59 case OddEven; // odd <-> even
60 if r%2 == 1 return r + 1;
61
62 return r - 1;
63 }
64}
65
66// Returns the next Rune in r's folding cycle (see unicode_casefold.h).
67// Examples:
68// CycleFoldRune('A') = 'a'
69// CycleFoldRune('a') = 'A'
70//
71// CycleFoldRune('K') = 'k'
72// CycleFoldRune('k') = 0x212A (Kelvin)
73// CycleFoldRune(0x212A) = 'K'
74//
75// CycleFoldRune('?') = '?'
76cycle_fold_rune :: (r: Rune) -> Rune {
77 f:= lookup_casefold(unicode_casefold, r);
78 if f == null || r < f.lo return r;
79
80 return apply_fold(f, r);
81}
82
83unicode_casefold :: CaseFold.[
84 .{65, 90, 32 },
85 .{97, 106, -32 },
86 .{107, 107, 8383 },
87 .{108, 114, -32 },
88 .{115, 115, 268 },
89 .{116, 122, -32 },
90 .{181, 181, 743 },
91 .{192, 214, 32 },
92 .{216, 222, 32 },
93 .{223, 223, 7615 },
94 .{224, 228, -32 },
95 .{229, 229, 8262 },
96 .{230, 246, -32 },
97 .{248, 254, -32 },
98 .{255, 255, 121 },
99 .{256, 303, EvenOdd },
100 .{306, 311, EvenOdd },
101 .{313, 328, OddEven },
102 .{330, 375, EvenOdd },
103 .{376, 376, -121 },
104 .{377, 382, OddEven },
105 .{383, 383, -300 },
106 .{384, 384, 195 },
107 .{385, 385, 210 },
108 .{386, 389, EvenOdd },
109 .{390, 390, 206 },
110 .{391, 392, OddEven },
111 .{393, 394, 205 },
112 .{395, 396, OddEven },
113 .{398, 398, 79 },
114 .{399, 399, 202 },
115 .{400, 400, 203 },
116 .{401, 402, OddEven },
117 .{403, 403, 205 },
118 .{404, 404, 207 },
119 .{405, 405, 97 },
120 .{406, 406, 211 },
121 .{407, 407, 209 },
122 .{408, 409, EvenOdd },
123 .{410, 410, 163 },
124 .{412, 412, 211 },
125 .{413, 413, 213 },
126 .{414, 414, 130 },
127 .{415, 415, 214 },
128 .{416, 421, EvenOdd },
129 .{422, 422, 218 },
130 .{423, 424, OddEven },
131 .{425, 425, 218 },
132 .{428, 429, EvenOdd },
133 .{430, 430, 218 },
134 .{431, 432, OddEven },
135 .{433, 434, 217 },
136 .{435, 438, OddEven },
137 .{439, 439, 219 },
138 .{440, 441, EvenOdd },
139 .{444, 445, EvenOdd },
140 .{447, 447, 56 },
141 .{452, 452, EvenOdd },
142 .{453, 453, OddEven },
143 .{454, 454, -2 },
144 .{455, 455, OddEven },
145 .{456, 456, EvenOdd },
146 .{457, 457, -2 },
147 .{458, 458, EvenOdd },
148 .{459, 459, OddEven },
149 .{460, 460, -2 },
150 .{461, 476, OddEven },
151 .{477, 477, -79 },
152 .{478, 495, EvenOdd },
153 .{497, 497, OddEven },
154 .{498, 498, EvenOdd },
155 .{499, 499, -2 },
156 .{500, 501, EvenOdd },
157 .{502, 502, -97 },
158 .{503, 503, -56 },
159 .{504, 543, EvenOdd },
160 .{544, 544, -130 },
161 .{546, 563, EvenOdd },
162 .{570, 570, 10795 },
163 .{571, 572, OddEven },
164 .{573, 573, -163 },
165 .{574, 574, 10792 },
166 .{575, 576, 10815 },
167 .{577, 578, OddEven },
168 .{579, 579, -195 },
169 .{580, 580, 69 },
170 .{581, 581, 71 },
171 .{582, 591, EvenOdd },
172 .{592, 592, 10783 },
173 .{593, 593, 10780 },
174 .{594, 594, 10782 },
175 .{595, 595, -210 },
176 .{596, 596, -206 },
177 .{598, 599, -205 },
178 .{601, 601, -202 },
179 .{603, 603, -203 },
180 .{604, 604, 42319 },
181 .{608, 608, -205 },
182 .{609, 609, 42315 },
183 .{611, 611, -207 },
184 .{613, 613, 42280 },
185 .{614, 614, 42308 },
186 .{616, 616, -209 },
187 .{617, 617, -211 },
188 .{618, 618, 42308 },
189 .{619, 619, 10743 },
190 .{620, 620, 42305 },
191 .{623, 623, -211 },
192 .{625, 625, 10749 },
193 .{626, 626, -213 },
194 .{629, 629, -214 },
195 .{637, 637, 10727 },
196 .{640, 640, -218 },
197 .{642, 642, 42307 },
198 .{643, 643, -218 },
199 .{647, 647, 42282 },
200 .{648, 648, -218 },
201 .{649, 649, -69 },
202 .{650, 651, -217 },
203 .{652, 652, -71 },
204 .{658, 658, -219 },
205 .{669, 669, 42261 },
206 .{670, 670, 42258 },
207 .{837, 837, 84 },
208 .{880, 883, EvenOdd },
209 .{886, 887, EvenOdd },
210 .{891, 893, 130 },
211 .{895, 895, 116 },
212 .{902, 902, 38 },
213 .{904, 906, 37 },
214 .{908, 908, 64 },
215 .{910, 911, 63 },
216 .{913, 929, 32 },
217 .{931, 931, 31 },
218 .{932, 939, 32 },
219 .{940, 940, -38 },
220 .{941, 943, -37 },
221 .{945, 945, -32 },
222 .{946, 946, 30 },
223 .{947, 948, -32 },
224 .{949, 949, 64 },
225 .{950, 951, -32 },
226 .{952, 952, 25 },
227 .{953, 953, 7173 },
228 .{954, 954, 54 },
229 .{955, 955, -32 },
230 .{956, 956, -775 },
231 .{957, 959, -32 },
232 .{960, 960, 22 },
233 .{961, 961, 48 },
234 .{962, 962, EvenOdd },
235 .{963, 965, -32 },
236 .{966, 966, 15 },
237 .{967, 968, -32 },
238 .{969, 969, 7517 },
239 .{970, 971, -32 },
240 .{972, 972, -64 },
241 .{973, 974, -63 },
242 .{975, 975, 8 },
243 .{976, 976, -62 },
244 .{977, 977, 35 },
245 .{981, 981, -47 },
246 .{982, 982, -54 },
247 .{983, 983, -8 },
248 .{984, 1007, EvenOdd },
249 .{1008, 1008, -86 },
250 .{1009, 1009, -80 },
251 .{1010, 1010, 7 },
252 .{1011, 1011, -116 },
253 .{1012, 1012, -92 },
254 .{1013, 1013, -96 },
255 .{1015, 1016, OddEven },
256 .{1017, 1017, -7 },
257 .{1018, 1019, EvenOdd },
258 .{1021, 1023, -130 },
259 .{1024, 1039, 80 },
260 .{1040, 1071, 32 },
261 .{1072, 1073, -32 },
262 .{1074, 1074, 6222 },
263 .{1075, 1075, -32 },
264 .{1076, 1076, 6221 },
265 .{1077, 1085, -32 },
266 .{1086, 1086, 6212 },
267 .{1087, 1088, -32 },
268 .{1089, 1090, 6210 },
269 .{1091, 1097, -32 },
270 .{1098, 1098, 6204 },
271 .{1099, 1103, -32 },
272 .{1104, 1119, -80 },
273 .{1120, 1122, EvenOdd },
274 .{1123, 1123, 6180 },
275 .{1124, 1153, EvenOdd },
276 .{1162, 1215, EvenOdd },
277 .{1216, 1216, 15 },
278 .{1217, 1230, OddEven },
279 .{1231, 1231, -15 },
280 .{1232, 1327, EvenOdd },
281 .{1329, 1366, 48 },
282 .{1377, 1414, -48 },
283 .{4256, 4293, 7264 },
284 .{4295, 4295, 7264 },
285 .{4301, 4301, 7264 },
286 .{4304, 4346, 3008 },
287 .{4349, 4351, 3008 },
288 .{5024, 5103, 38864 },
289 .{5104, 5109, 8 },
290 .{5112, 5117, -8 },
291 .{7296, 7296, -6254 },
292 .{7297, 7297, -6253 },
293 .{7298, 7298, -6244 },
294 .{7299, 7299, -6242 },
295 .{7300, 7300, EvenOdd },
296 .{7301, 7301, -6243 },
297 .{7302, 7302, -6236 },
298 .{7303, 7303, -6181 },
299 .{7304, 7304, 35266 },
300 .{7312, 7354, -3008 },
301 .{7357, 7359, -3008 },
302 .{7545, 7545, 35332 },
303 .{7549, 7549, 3814 },
304 .{7566, 7566, 35384 },
305 .{7680, 7776, EvenOdd },
306 .{7777, 7777, 58 },
307 .{7778, 7829, EvenOdd },
308 .{7835, 7835, -59 },
309 .{7838, 7838, -7615 },
310 .{7840, 7935, EvenOdd },
311 .{7936, 7943, 8 },
312 .{7944, 7951, -8 },
313 .{7952, 7957, 8 },
314 .{7960, 7965, -8 },
315 .{7968, 7975, 8 },
316 .{7976, 7983, -8 },
317 .{7984, 7991, 8 },
318 .{7992, 7999, -8 },
319 .{8000, 8005, 8 },
320 .{8008, 8013, -8 },
321 .{8017, 8017, 8 },
322 .{8019, 8019, 8 },
323 .{8021, 8021, 8 },
324 .{8023, 8023, 8 },
325 .{8025, 8025, -8 },
326 .{8027, 8027, -8 },
327 .{8029, 8029, -8 },
328 .{8031, 8031, -8 },
329 .{8032, 8039, 8 },
330 .{8040, 8047, -8 },
331 .{8048, 8049, 74 },
332 .{8050, 8053, 86 },
333 .{8054, 8055, 100 },
334 .{8056, 8057, 128 },
335 .{8058, 8059, 112 },
336 .{8060, 8061, 126 },
337 .{8064, 8071, 8 },
338 .{8072, 8079, -8 },
339 .{8080, 8087, 8 },
340 .{8088, 8095, -8 },
341 .{8096, 8103, 8 },
342 .{8104, 8111, -8 },
343 .{8112, 8113, 8 },
344 .{8115, 8115, 9 },
345 .{8120, 8121, -8 },
346 .{8122, 8123, -74 },
347 .{8124, 8124, -9 },
348 .{8126, 8126, -7289 },
349 .{8131, 8131, 9 },
350 .{8136, 8139, -86 },
351 .{8140, 8140, -9 },
352 .{8144, 8145, 8 },
353 .{8152, 8153, -8 },
354 .{8154, 8155, -100 },
355 .{8160, 8161, 8 },
356 .{8165, 8165, 7 },
357 .{8168, 8169, -8 },
358 .{8170, 8171, -112 },
359 .{8172, 8172, -7 },
360 .{8179, 8179, 9 },
361 .{8184, 8185, -128 },
362 .{8186, 8187, -126 },
363 .{8188, 8188, -9 },
364 .{8486, 8486, -7549 },
365 .{8490, 8490, -8415 },
366 .{8491, 8491, -8294 },
367 .{8498, 8498, 28 },
368 .{8526, 8526, -28 },
369 .{8544, 8559, 16 },
370 .{8560, 8575, -16 },
371 .{8579, 8580, OddEven },
372 .{9398, 9423, 26 },
373 .{9424, 9449, -26 },
374 .{11264, 11310, 48 },
375 .{11312, 11358, -48 },
376 .{11360, 11361, EvenOdd },
377 .{11362, 11362, -10743 },
378 .{11363, 11363, -3814 },
379 .{11364, 11364, -10727 },
380 .{11365, 11365, -10795 },
381 .{11366, 11366, -10792 },
382 .{11367, 11372, OddEven },
383 .{11373, 11373, -10780 },
384 .{11374, 11374, -10749 },
385 .{11375, 11375, -10783 },
386 .{11376, 11376, -10782 },
387 .{11378, 11379, EvenOdd },
388 .{11381, 11382, OddEven },
389 .{11390, 11391, -10815 },
390 .{11392, 11491, EvenOdd },
391 .{11499, 11502, OddEven },
392 .{11506, 11507, EvenOdd },
393 .{11520, 11557, -7264 },
394 .{11559, 11559, -7264 },
395 .{11565, 11565, -7264 },
396 .{42560, 42570, EvenOdd },
397 .{42571, 42571, -35267 },
398 .{42572, 42605, EvenOdd },
399 .{42624, 42651, EvenOdd },
400 .{42786, 42799, EvenOdd },
401 .{42802, 42863, EvenOdd },
402 .{42873, 42876, OddEven },
403 .{42877, 42877, -35332 },
404 .{42878, 42887, EvenOdd },
405 .{42891, 42892, OddEven },
406 .{42893, 42893, -42280 },
407 .{42896, 42899, EvenOdd },
408 .{42900, 42900, 48 },
409 .{42902, 42921, EvenOdd },
410 .{42922, 42922, -42308 },
411 .{42923, 42923, -42319 },
412 .{42924, 42924, -42315 },
413 .{42925, 42925, -42305 },
414 .{42926, 42926, -42308 },
415 .{42928, 42928, -42258 },
416 .{42929, 42929, -42282 },
417 .{42930, 42930, -42261 },
418 .{42931, 42931, 928 },
419 .{42932, 42943, EvenOdd },
420 .{42946, 42947, EvenOdd },
421 .{42948, 42948, -48 },
422 .{42949, 42949, -42307 },
423 .{42950, 42950, -35384 },
424 .{42951, 42954, OddEven },
425 .{42997, 42998, OddEven },
426 .{43859, 43859, -928 },
427 .{43888, 43967, -38864 },
428 .{65313, 65338, 32 },
429 .{65345, 65370, -32 },
430 .{66560, 66599, 40 },
431 .{66600, 66639, -40 },
432 .{66736, 66771, 40 },
433 .{66776, 66811, -40 },
434 .{68736, 68786, 64 },
435 .{68800, 68850, -64 },
436 .{71840, 71871, 32 },
437 .{71872, 71903, -32 },
438 .{93760, 93791, 32 },
439 .{93792, 93823, -32 },
440 .{125184, 125217, 34 },
441 .{125218, 125251, -34 },
442];
443
444// 1384 groups, 1414 pairs, 200 ranges
445unicode_tolower :: CaseFold.[
446 .{65, 90, 32 },
447 .{181, 181, 775 },
448 .{192, 214, 32 },
449 .{216, 222, 32 },
450 .{256, 302, EvenOddSkip },
451 .{306, 310, EvenOddSkip },
452 .{313, 327, OddEvenSkip },
453 .{330, 374, EvenOddSkip },
454 .{376, 376, -121 },
455 .{377, 381, OddEvenSkip },
456 .{383, 383, -268 },
457 .{385, 385, 210 },
458 .{386, 388, EvenOddSkip },
459 .{390, 390, 206 },
460 .{391, 391, OddEven },
461 .{393, 394, 205 },
462 .{395, 395, OddEven },
463 .{398, 398, 79 },
464 .{399, 399, 202 },
465 .{400, 400, 203 },
466 .{401, 401, OddEven },
467 .{403, 403, 205 },
468 .{404, 404, 207 },
469 .{406, 406, 211 },
470 .{407, 407, 209 },
471 .{408, 408, EvenOdd },
472 .{412, 412, 211 },
473 .{413, 413, 213 },
474 .{415, 415, 214 },
475 .{416, 420, EvenOddSkip },
476 .{422, 422, 218 },
477 .{423, 423, OddEven },
478 .{425, 425, 218 },
479 .{428, 428, EvenOdd },
480 .{430, 430, 218 },
481 .{431, 431, OddEven },
482 .{433, 434, 217 },
483 .{435, 437, OddEvenSkip },
484 .{439, 439, 219 },
485 .{440, 440, EvenOdd },
486 .{444, 444, EvenOdd },
487 .{452, 452, 2 },
488 .{453, 453, OddEven },
489 .{455, 455, 2 },
490 .{456, 456, EvenOdd },
491 .{458, 458, 2 },
492 .{459, 475, OddEvenSkip },
493 .{478, 494, EvenOddSkip },
494 .{497, 497, 2 },
495 .{498, 500, EvenOddSkip },
496 .{502, 502, -97 },
497 .{503, 503, -56 },
498 .{504, 542, EvenOddSkip },
499 .{544, 544, -130 },
500 .{546, 562, EvenOddSkip },
501 .{570, 570, 10795 },
502 .{571, 571, OddEven },
503 .{573, 573, -163 },
504 .{574, 574, 10792 },
505 .{577, 577, OddEven },
506 .{579, 579, -195 },
507 .{580, 580, 69 },
508 .{581, 581, 71 },
509 .{582, 590, EvenOddSkip },
510 .{837, 837, 116 },
511 .{880, 882, EvenOddSkip },
512 .{886, 886, EvenOdd },
513 .{895, 895, 116 },
514 .{902, 902, 38 },
515 .{904, 906, 37 },
516 .{908, 908, 64 },
517 .{910, 911, 63 },
518 .{913, 929, 32 },
519 .{931, 939, 32 },
520 .{962, 962, EvenOdd },
521 .{975, 975, 8 },
522 .{976, 976, -30 },
523 .{977, 977, -25 },
524 .{981, 981, -15 },
525 .{982, 982, -22 },
526 .{984, 1006, EvenOddSkip },
527 .{1008, 1008, -54 },
528 .{1009, 1009, -48 },
529 .{1012, 1012, -60 },
530 .{1013, 1013, -64 },
531 .{1015, 1015, OddEven },
532 .{1017, 1017, -7 },
533 .{1018, 1018, EvenOdd },
534 .{1021, 1023, -130 },
535 .{1024, 1039, 80 },
536 .{1040, 1071, 32 },
537 .{1120, 1152, EvenOddSkip },
538 .{1162, 1214, EvenOddSkip },
539 .{1216, 1216, 15 },
540 .{1217, 1229, OddEvenSkip },
541 .{1232, 1326, EvenOddSkip },
542 .{1329, 1366, 48 },
543 .{4256, 4293, 7264 },
544 .{4295, 4295, 7264 },
545 .{4301, 4301, 7264 },
546 .{5112, 5117, -8 },
547 .{7296, 7296, -6222 },
548 .{7297, 7297, -6221 },
549 .{7298, 7298, -6212 },
550 .{7299, 7300, -6210 },
551 .{7301, 7301, -6211 },
552 .{7302, 7302, -6204 },
553 .{7303, 7303, -6180 },
554 .{7304, 7304, 35267 },
555 .{7312, 7354, -3008 },
556 .{7357, 7359, -3008 },
557 .{7680, 7828, EvenOddSkip },
558 .{7835, 7835, -58 },
559 .{7838, 7838, -7615 },
560 .{7840, 7934, EvenOddSkip },
561 .{7944, 7951, -8 },
562 .{7960, 7965, -8 },
563 .{7976, 7983, -8 },
564 .{7992, 7999, -8 },
565 .{8008, 8013, -8 },
566 .{8025, 8025, -8 },
567 .{8027, 8027, -8 },
568 .{8029, 8029, -8 },
569 .{8031, 8031, -8 },
570 .{8040, 8047, -8 },
571 .{8072, 8079, -8 },
572 .{8088, 8095, -8 },
573 .{8104, 8111, -8 },
574 .{8120, 8121, -8 },
575 .{8122, 8123, -74 },
576 .{8124, 8124, -9 },
577 .{8126, 8126, -7173 },
578 .{8136, 8139, -86 },
579 .{8140, 8140, -9 },
580 .{8152, 8153, -8 },
581 .{8154, 8155, -100 },
582 .{8168, 8169, -8 },
583 .{8170, 8171, -112 },
584 .{8172, 8172, -7 },
585 .{8184, 8185, -128 },
586 .{8186, 8187, -126 },
587 .{8188, 8188, -9 },
588 .{8486, 8486, -7517 },
589 .{8490, 8490, -8383 },
590 .{8491, 8491, -8262 },
591 .{8498, 8498, 28 },
592 .{8544, 8559, 16 },
593 .{8579, 8579, OddEven },
594 .{9398, 9423, 26 },
595 .{11264, 11310, 48 },
596 .{11360, 11360, EvenOdd },
597 .{11362, 11362, -10743 },
598 .{11363, 11363, -3814 },
599 .{11364, 11364, -10727 },
600 .{11367, 11371, OddEvenSkip },
601 .{11373, 11373, -10780 },
602 .{11374, 11374, -10749 },
603 .{11375, 11375, -10783 },
604 .{11376, 11376, -10782 },
605 .{11378, 11378, EvenOdd },
606 .{11381, 11381, OddEven },
607 .{11390, 11391, -10815 },
608 .{11392, 11490, EvenOddSkip },
609 .{11499, 11501, OddEvenSkip },
610 .{11506, 11506, EvenOdd },
611 .{42560, 42604, EvenOddSkip },
612 .{42624, 42650, EvenOddSkip },
613 .{42786, 42798, EvenOddSkip },
614 .{42802, 42862, EvenOddSkip },
615 .{42873, 42875, OddEvenSkip },
616 .{42877, 42877, -35332 },
617 .{42878, 42886, EvenOddSkip },
618 .{42891, 42891, OddEven },
619 .{42893, 42893, -42280 },
620 .{42896, 42898, EvenOddSkip },
621 .{42902, 42920, EvenOddSkip },
622 .{42922, 42922, -42308 },
623 .{42923, 42923, -42319 },
624 .{42924, 42924, -42315 },
625 .{42925, 42925, -42305 },
626 .{42926, 42926, -42308 },
627 .{42928, 42928, -42258 },
628 .{42929, 42929, -42282 },
629 .{42930, 42930, -42261 },
630 .{42931, 42931, 928 },
631 .{42932, 42942, EvenOddSkip },
632 .{42946, 42946, EvenOdd },
633 .{42948, 42948, -48 },
634 .{42949, 42949, -42307 },
635 .{42950, 42950, -35384 },
636 .{42951, 42953, OddEvenSkip },
637 .{42997, 42997, OddEven },
638 .{43888, 43967, -38864 },
639 .{65313, 65338, 32 },
640 .{66560, 66599, 40 },
641 .{66736, 66771, 40 },
642 .{68736, 68786, 64 },
643 .{71840, 71871, 32 },
644 .{93760, 93791, 32 },
645 .{125184, 125217, 34 },
646];
647
Copyright 2026  E766CB298A6D1E64 | Git-Thing heavily inspired by cgit