<<
path:
root/public/blog.git/html/modules/uniform/char_class.jai
blob: 01647b76c645116de30fbfd5949da31eca424399
[raw]
[clear marker]
2ALPHA_MASK :: (1<<26) - 1;
5 upper: u32; // bitmap of A-Z
6 lower: u32; // bitmap of a-z
8 // @ToDo. @Speed: Do we need a faster data structure?
9 // TODO: Clean up the builders properly!!
10 ranges: [..] RuneRange;
14uninit :: (using cc: *CharClass) {
18copy :: (using cc: *CharClass) -> *CharClass {
19 new_cc := New(CharClass);
20 memcpy(new_cc, cc, size_of(CharClass));
21 new_ranges: [..] RuneRange;
22 array_copy(*new_ranges, cc.ranges);
23 new_cc.ranges = new_ranges;
32less :: (a: RuneRange, b: RuneRange) -> bool {
36find_range :: (ranges: [] RuneRange, r: RuneRange) -> RuneRange, index: int, found: bool {
38 end := ranges.count - 1;
41 middle := (end + start) / 2;
42 range := ranges[middle];
45 } else if less(r, range) {
48 return range, middle, true;
52 return .{}, start, false;
55make_range :: (lo: Rune, hi: Rune) -> RuneRange {
62add_range :: (using ccb: *CharClass, lo: Rune, hi: Rune) -> bool {
63 if hi < lo return false;
65 if lo <= #char "z" && hi >= #char "A" {
66 // Overlaps some alpha, maybe not all.
67 // Update bitmaps telling which ASCII letters are in the set.
68 lo1 := max(lo, #char "A");
69 hi1 := min(hi, #char "Z");
71 upper |= cast(u32)(((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - #char "A"));
74 lo1 = max(lo, #char "a");
75 hi1 = min(hi, #char "z");
77 lower |= cast(u32)(((1 << (hi1 - lo1 + 1)) - 1) << (lo1 - #char "a"));
81 { // Check whether lo, hi is already in the class.
82 range, index, found := find_range(ranges, make_range(lo, lo));
83 if found && range.lo <= lo && hi <= range.hi return false;
86 // Look for a range abutting lo on the left.
87 // If it exists, take it out and increase our range.
89 range, index, found := find_range(ranges, make_range(lo - 1, lo - 1));
95 nrunes -= range.hi - range.lo + 1;
96 array_ordered_remove_by_index(*ranges, index);
100 // Look for a range abutting hi on the right.
101 // If it exists, take it out and increase our range.
103 range, index, found := find_range(ranges, make_range(hi + 1, hi + 1));
106 nrunes -= range.hi - range.lo + 1;
107 array_ordered_remove_by_index(*ranges, index);
111 // Look for ranges between lo and hi. Take them out.
112 // This is only safe because the set has no overlapping ranges.
113 // We've already removed any ranges abutting lo and hi, so
114 // any that overlap [lo, hi] must be contained within it.
115 new_range := make_range(lo, hi);
120 range, index, found = find_range(ranges, new_range);
124 nrunes -= range.hi - range.lo + 1;
125 array_ordered_remove_by_index(*ranges, index);
128 // Finally, add [lo, hi].
129 nrunes += hi - lo + 1;
130 array_insert_at(*ranges, new_range, index);
134add_char_class :: (using ccb: *CharClass, other: CharClass) {
136 add_range(ccb, it.lo, it.hi);
140contains :: (using ccb: *CharClass, r: Rune) -> bool {
141 range, index, found := find_range(ccb.ranges, make_range(r, r));
145// Does the character class behave the same on A-Z as on a-z?
146folds_ascii :: (using ccb: *CharClass) -> bool {
147 return ((upper ^ lower) & ALPHA_MASK) == 0;
150// copy :: (using ccb: *CharClass) -> *CharClass {
151// CharClass* cc = new CharClass;
152// for (iterator it = begin(); it != end(); ++it)
153// cc->ranges_.insert(RuneRange(it->lo, it->hi));
154// cc->upper_ = upper_;
155// cc->lower_ = lower_;
156// cc->nrunes_ = nrunes_;
161remove_above :: (using ccb: *CharClass, r: Rune) {
162 if r >= Runemax return;
168 lower &= cast(u32)(ALPHA_MASK >> (#char "z" - r));
176 upper &= cast(u32)(ALPHA_MASK >> (#char "Z" - r));
181 range, index, found := find_range(ranges, make_range(r + 1, Runemax));
183 nrunes -= range.hi - range.lo + 1;
186 ranges[index] = range;
187 nrunes += range.hi - range.lo + 1;
189 array_ordered_remove_by_index(*ranges, index);
194negate :: (using ccb: *CharClass) {
195 // In negation, first range begins at 0, unless
196 // the current class begins at 0.
200 if ranges.count && ranges[0].lo == 0 {
201 nextlo = ranges[0].hi + 1;
204 while src < ranges.count {
206 ranges[dest].lo = nextlo;
207 ranges[dest].hi = r.lo - 1;
212 if nextlo <= Runemax {
213 if dest == ranges.count {
214 array_reserve(*ranges, ranges.count + 1);
215 ranges.count = dest + 1;
217 ranges[dest].lo = nextlo;
218 ranges[dest].hi = Runemax;
223 upper = ALPHA_MASK & ~upper;
224 lower = ALPHA_MASK & ~lower;
225 nrunes = Runemax+1 - nrunes;
228finish_char_class :: (using ccb: *CharClass) {
229 // Does the character class behave the same on A-Z as on a-z?
230 folds_ascii = (((upper ^ lower) & ALPHA_MASK) == 0);