Logo

index : blog

---

  • summary
  • about
  • tree
  • log
  • branches
<< path: root/public/blog.git/html/modules/uniform/program.jai blob: 103161f849abdae7d79f7906f275dd34effb2366 [raw] [clear marker]

        
0#scope_module
1
2// Opcodes for Inst
3InstOp :: enum u32 {
4 kInstAlt :: 0; // choose between out_ and out1_
5 kInstAltMatch; // Alt: out_ is [00-FF] and back, out1_ is match; or vice versa.
6 kInstByteRange; // next (possible case-folded) byte must be in [lo_, hi_]
7 kInstCapture; // capturing parenthesis number cap_
8 kInstEmptyWidth; // empty-width special (^ $ ...); bit(s) set in empty_
9 kInstMatch; // found a match!
10 kInstNop; // no-op; occasionally unavoidable
11 kInstFail; // never match; occasionally unavoidable
12}
13
14// Bit flags for empty-width specials
15EmptyOp :: enum_flags {
16 kEmptyBeginLine :: 1<<0; // ^ - beginning of line
17 kEmptyEndLine :: 1<<1; // $ - end of line
18 kEmptyBeginText :: 1<<2; // \A - beginning of text
19 kEmptyEndText :: 1<<3; // \z - end of text
20 kEmptyWordBoundary :: 1<<4; // \b - word boundary
21 kEmptyNonWordBoundary :: 1<<5; // \B - not \b
22 kEmptyAllFlags :: (1<<6)-1;
23}
24
25// Single instruction in regexp program.
26Inst :: struct {
27 // // Getters
28 // int id(Prog* p) { return static_cast<int>(this - p->inst_.data()); }
29 // int out1() { DCHECK(opcode() == kInstAlt || opcode() == kInstAltMatch); return out1_; }
30 // int cap() { DCHECK_EQ(opcode(), kInstCapture); return cap_; }
31 // int lo() { DCHECK_EQ(opcode(), kInstByteRange); return lo_; }
32 // int hi() { DCHECK_EQ(opcode(), kInstByteRange); return hi_; }
33 // int foldcase() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_&1; }
34 // int hint() { DCHECK_EQ(opcode(), kInstByteRange); return hint_foldcase_>>1; }
35 // int match_id() { DCHECK_EQ(opcode(), kInstMatch); return match_id_; }
36 // EmptyOp empty() { DCHECK_EQ(opcode(), kInstEmptyWidth); return empty_; }
37
38 // Maximum instruction id.
39 // (Must fit in out_opcode_. PatchList/last steal another bit.)
40 MAX_INST :: (1<<28) - 1;
41
42 out_opcode: u32; // 28 bits: out, 1 bit: last, 3 (low) bits: opcode
43 union { // additional instruction arguments:
44 out1: u32; // opcode == kInstAlt
45 // alternate next instruction
46 cap: s32; // opcode == kInstCapture
47 // Index of capture register (holds text
48 // position recorded by capturing parentheses).
49 // For \n (the submatch for the nth parentheses),
50 // the left parenthesis captures into register 2*n
51 // and the right one captures into register 2*n+1.
52
53 match_id: s32; // opcode == kInstMatch
54 // Match ID to identify this match (for re2::Set).
55
56 struct { // opcode == kInstByteRange
57 lo: u8; // byte range is lo_-hi_ inclusive
58 hi: u8; //
59 hint_foldcase: u16; // 15 bits: hint, 1 (low) bit: foldcase
60 // hint to execution engines: the delta to the
61 // next instruction (in the current list) worth
62 // exploring iff this instruction matched; 0
63 // means there are no remaining possibilities,
64 // which is most likely for character classes.
65 // foldcase: A-Z -> a-z before checking range.
66 }
67
68 empty: EmptyOp; // opcode == kInstEmptyWidth
69 // empty_ is bitwise OR of kEmpty* flags above.
70 };
71}
72
73get_opcode :: (using inst: Inst) -> InstOp {
74 return cast(InstOp) (out_opcode & 7);
75}
76
77get_last :: (using inst: Inst) -> bool {
78 return cast(bool)((out_opcode >> 3) & 1);
79}
80
81get_out :: (using inst: Inst) -> u32 {
82 return out_opcode >> 4;
83}
84
85set_opcode :: (using inst: *Inst, opcode: InstOp) {
86 out_opcode = out_opcode & 0xFFFF_FFF8 | cast(u32)opcode;
87}
88
89set_last :: (using inst: *Inst) {
90 out_opcode = out_opcode & 0xFFFF_FFF7 | cast(u32)(1 << 3);
91}
92
93set_out :: (using inst: *Inst, out: int) {
94 out_opcode = out_opcode & 0x0F | cast(u32)(out << 4);
95}
96
97set_out_opcode :: (using inst: *Inst, out: int, opcode: InstOp) {
98 out_opcode = out_opcode & 0x08 | cast(u32)(out << 4) | cast(u32)opcode;
99}
100
101
102get_foldcase :: (using inst: Inst) -> bool {
103 assert(get_opcode(inst) == .kInstByteRange);
104 return cast(bool) (hint_foldcase & 1);
105}
106
107get_hint :: (using inst: Inst) -> int {
108 assert(get_opcode(inst) == .kInstByteRange);
109 return hint_foldcase >> 1;
110}
111
112greedy :: (using inst: Inst, p: *Prog) -> bool {
113 assert(get_opcode(inst) == .kInstAltMatch);
114 out_inst := p.inst[get_out(inst)];
115 return get_opcode(out_inst) == .kInstByteRange ||
116 (get_opcode(out_inst) == .kInstNop && get_opcode(p.inst[get_out(out_inst)]) == .kInstByteRange);
117}
118
119// Does this inst (an kInstByteRange) match c?
120// @ToDo: should this int be Rune?
121matches :: (using inst: Inst, c: int) -> bool {
122 assert(get_opcode(inst) == .kInstByteRange);
123 if (get_foldcase(inst) && #char "A" <= c && c <= #char "Z") {
124 c += #char "a" - #char "A";
125 }
126 return lo <= c && c <= hi;
127}
128
129
130// Returns whether byte c is a word character: ASCII only.
131// Used by the implementation of \b and \B.
132// This is not right for Unicode, but:
133// - it's hard to get right in a byte-at-a-time matching world
134// (the DFA has only one-byte lookahead).
135// - even if the lookahead were possible, the Progs would be huge.
136// This crude approximation is the same one PCRE uses.
137is_word_char :: (c: u8) -> bool {
138 return (#char "A" <= c && c <= #char "Z") ||
139 (#char "a" <= c && c <= #char "z") ||
140 (#char "0" <= c && c <= #char "9") ||
141 c == #char "_";
142}
143
144// Compiled form of regexp program.
145Prog :: struct {
146 // Whether to anchor the search.
147 Anchor :: enum {
148 UNANCHORED; // match anywhere
149 ANCHORED; // match only starting at beginning of text
150 }
151
152 // Kind of match to look for (for anchor != kFullMatch)
153 //
154 // kLongestMatch mode finds the overall longest
155 // match but still makes its submatch choices the way
156 // Perl would, not in the way prescribed by POSIX.
157 // The POSIX rules are much more expensive to implement,
158 // and no one has needed them.
159 //
160 // kFullMatch is not strictly necessary -- we could use
161 // kLongestMatch and then check the length of the match -- but
162 // the matching code can run faster if it knows to consider only
163 // full matches.
164 MatchKind :: enum {
165 kFirstMatch; // like Perl, PCRE
166 kLongestMatch; // like egrep or POSIX
167 kFullMatch; // match only entire text; implies anchor==ANCHORED
168 kManyMatch; // for SearchDFA, records set of matches
169 };
170
171 anchor_start: bool; // regexp has explicit start anchor
172 anchor_end: bool; // regexp has explicit end anchor
173 reversed: bool; // whether program runs backward over input
174 did_flatten: bool; // has Flatten been called?
175 did_onepass: bool; // has IsOnePass been called?
176
177 start: int; // entry point for program
178 start_unanchored: int; // unanchored entry point for program
179 bytemap_range: int; // bytemap_[x] < bytemap_range_
180 prefix_size: int; // size of prefix (0 if no prefix)
181 prefix_front: s16 = -1; // first byte of prefix (-1 if no prefix)
182 prefix_back: s16 = -1; // last byte of prefix (-1 if no prefix)
183
184 list_count: int; // count of lists (see above)
185 inst_count: [#run enum_highest_value(InstOp) + 1] int; // count of instructions by opcode
186 list_heads: [..] u16; // sparse array enumerating list heads
187 // not populated if size_ is overly large
188
189 inst: [..] Inst; // pointer to instruction array
190 onepass_nodes: [..] u8; // data for OnePass nodes
191
192 dfa_mem: s64; // Maximum memory for DFAs.
193 // dfa_first: *DFA; // DFA cached for kFirstMatch/kManyMatch
194 // dfa_longest: *DFA; // DFA cached for kLongestMatch/kFullMatch
195
196 bytemap: [256] u8; // map from input bytes to byte classes
197
198 // std::once_flag dfa_first_once_;
199 // std::once_flag dfa_longest_once_;
200}
201
202 // Inst *inst(int id) { return &inst_[id]; }
203 // int start() { return start_; }
204 // void set_start(int start) { start_ = start; }
205 // int start_unanchored() { return start_unanchored_; }
206 // void set_start_unanchored(int start) { start_unanchored_ = start; }
207 // int size() { return size_; }
208 // bool reversed() { return reversed_; }
209 // void set_reversed(bool reversed) { reversed_ = reversed; }
210 // int list_count() { return list_count_; }
211 // int inst_count(InstOp op) { return inst_count_[op]; }
212 // uint16_t* list_heads() { return list_heads_.data(); }
213 // int64_t dfa_mem() { return dfa_mem_; }
214 // void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
215 // bool anchor_start() { return anchor_start_; }
216 // void set_anchor_start(bool b) { anchor_start_ = b; }
217 // bool anchor_end() { return anchor_end_; }
218 // void set_anchor_end(bool b) { anchor_end_ = b; }
219 // int bytemap_range() { return bytemap_range_; }
220 // const uint8_t* bytemap() { return bytemap_; }
221 // bool can_prefix_accel() { return prefix_size_ != 0; }
222
223 // // Accelerates to the first likely occurrence of the prefix.
224 // // Returns a pointer to the first byte or NULL if not found.
225 // const void* PrefixAccel(const void* data, size_t size) {
226 // DCHECK_GE(prefix_size_, 1);
227 // return prefix_size_ == 1 ? memchr(data, prefix_front_, size)
228 // : PrefixAccel_FrontAndBack(data, size);
229 // }
230
231
232
233can_bit_state :: (using prog: *Prog) -> bool {
234 return list_heads.count > 0;
235}
236
237init_alt :: (inst: *Inst, out: u32, out1: u32) {
238 assert(inst.out_opcode == 0);
239 set_out_opcode(inst, out, .kInstAlt);
240 inst.out1 = out1;
241}
242
243init_byte_range :: (inst: *Inst, lo: u8, hi: u8, foldcase: bool, out: int) {
244 assert(inst.out_opcode == 0);
245 set_out_opcode(inst, out, .kInstByteRange);
246 inst.lo = lo;
247 inst.hi = hi;
248 inst.hint_foldcase = (cast(u16)foldcase)&1;
249}
250
251init_capture :: (inst: *Inst, cap: s32, out: u32) {
252 assert(inst.out_opcode == 0);
253 set_out_opcode(inst, out, .kInstCapture);
254 inst.cap = cap;
255}
256
257init_empty_width :: (inst: *Inst, empty: EmptyOp, out: u32) {
258 assert(inst.out_opcode == 0);
259 set_out_opcode(inst, out, .kInstEmptyWidth);
260 inst.empty = empty;
261}
262
263init_match :: (inst: *Inst, id: s32) {
264 assert(inst.out_opcode == 0);
265 set_opcode(inst, .kInstMatch);
266 inst.match_id = id;
267}
268
269init_nop :: (inst: *Inst) {
270 assert(inst.out_opcode == 0);
271 set_opcode(inst, .kInstNop);
272}
273
274init_fail :: (inst: *Inst) {
275 assert(inst.out_opcode == 0);
276 set_opcode(inst, .kInstFail);
277}
278
279// Peep-hole optimizer.
280optimize :: (using prog: *Prog) {
281 q: Sparse_Set;
282 init(*q, inst.count);
283
284 // Eliminate nops. Most are taken out during compilation
285 // but a few are hard to avoid.
286 q.count = 0;
287 add_if_nonzero(*q, start);
288 for id: q {
289 ip := *inst[id];
290 j := get_out(ip.*);
291 // ToDo: Why is this a pointer????
292 jp: *Inst;
293 while true {
294 if j == 0 break;
295 jp = *inst[j];
296 if get_opcode(jp.*) != .kInstNop break;
297 j = get_out(jp.*);
298 }
299 set_out(ip, j);
300 add_if_nonzero(*q, j);
301 if get_opcode(ip.*) == .kInstAlt {
302 j = ip.out1;
303 while true {
304 if j == 0 break;
305 jp = *inst[j];
306 if get_opcode(jp.*) != .kInstNop break;
307 j = get_out(jp.*);
308 }
309 ip.out1 = j;
310 add_if_nonzero(*q, j);
311 }
312 }
313
314 // Insert kInstAltMatch instructions
315 // Look for
316 // ip: Alt -> j | k
317 // j: ByteRange [00-FF] -> ip
318 // k: Match
319 // or the reverse (the above is the greedy one).
320 // Rewrite Alt to AltMatch.
321 q.count = 0;
322 add_if_nonzero(*q, start);
323 for id: q {
324 ip := *inst[id];
325 add_if_nonzero(*q, get_out(ip.*));
326 if get_opcode(ip.*) == .kInstAlt {
327 add_if_nonzero(*q, ip.out1);
328
329 // ToDo: Why are these pointers????
330 j := inst[get_out(ip.*)];
331 k := inst[ip.out1];
332 if get_opcode(j) == .kInstByteRange && get_out(j) == id && j.lo == 0x00 && j.hi == 0xFF && is_match(prog, k) {
333 set_opcode(ip, .kInstAltMatch);
334 continue;
335 }
336 if is_match(prog, j) && get_opcode(k) == .kInstByteRange && get_out(k) == id && k.lo == 0x00 && k.hi == 0xFF {
337 set_opcode(ip, .kInstAltMatch);
338 }
339 }
340 }
341}
342
343add_if_nonzero :: (set: *Sparse_Set, i: int) {
344 if i != 0 {
345 add(set, i);
346 }
347}
348
349// Is ip a guaranteed match at end of text, perhaps after some capturing?
350is_match :: (prog: *Prog, i_in: Inst) -> bool {
351 i := i_in;
352 while true {
353 if #complete get_opcode(i) == {
354 case .kInstAlt; #through;
355 case .kInstAltMatch; #through;
356 case .kInstByteRange; #through;
357 case .kInstFail; #through;
358 case .kInstEmptyWidth;
359 return false;
360
361 case .kInstCapture; #through;
362 case .kInstNop;
363 i = prog.inst[get_out(i)];
364
365 case .kInstMatch;
366 return true;
367 }
368 }
369 // Should not be needed
370 assert(false);
371 return false;
372}
373
374empty_flags :: (text: string, pos: int) -> EmptyOp {
375 flags: EmptyOp;
376
377 // ^ and \A
378 if pos == 0 {
379 flags |= EmptyOp.kEmptyBeginText | .kEmptyBeginLine;
380 } else if text[pos - 1] == #char "\n" {
381 flags |= .kEmptyBeginLine;
382 }
383
384 // $ and \z
385 if pos == text.count {
386 flags |= EmptyOp.kEmptyEndText | .kEmptyEndLine;
387 } else if text[pos] == #char "\n" { // @ToDo: I removed a pos < count check here. Can pos ever be greater than count??
388 flags |= .kEmptyEndLine;
389 }
390
391 // \b and \B
392 if pos == 0 && text.count == 0 {
393 // no word boundary here
394 } else if pos == 0 {
395 if is_word_char(text[pos]) {
396 flags |= .kEmptyWordBoundary;
397 }
398 } else if pos == text.count {
399 if is_word_char(text[pos-1]) {
400 flags |= .kEmptyWordBoundary;
401 }
402 } else {
403 if (is_word_char(text[pos-1]) != is_word_char(text[pos])) {
404 flags |= .kEmptyWordBoundary;
405 }
406 }
407 if !(flags & .kEmptyWordBoundary) {
408 flags |= .kEmptyNonWordBoundary;
409 }
410
411 return flags;
412}
413
414compute_byte_map :: (using prog: *Prog) {
415 // Fill in bytemap with byte classes for the program.
416 // Ranges of bytes that are treated indistinguishably
417 // will be mapped to a single byte class.
418 builder: ByteMapBuilder;
419 init(*builder);
420
421 // Don't repeat the work for ^ and $.
422 marked_line_boundaries := false;
423 // Don't repeat the work for \b and \B.
424 marked_word_boundaries := false;
425
426 for id: 0..inst.count - 1 {
427 ip := *inst[id];
428 if get_opcode(ip.*) == .kInstByteRange {
429 lo: s32 = ip.lo;
430 hi: s32 = ip.hi;
431 mark(*builder, lo, hi);
432 if get_foldcase(ip.*) && lo <= #char "z" && hi >= #char "a" {
433 foldlo := lo;
434 foldhi := hi;
435 if foldlo < #char "a" {
436 foldlo = #char "a";
437 }
438 if foldhi > #char "z" {
439 foldhi = #char "z";
440 }
441 if (foldlo <= foldhi) {
442 foldlo += #char "A" - #char "a";
443 foldhi += #char "A" - #char "a";
444 mark(*builder, foldlo, foldhi);
445 }
446 }
447 // If this Inst is not the last Inst in its list AND the next Inst is
448 // also a ByteRange AND the Insts have the same out, defer the merge.
449 if !get_last(ip.*) && get_opcode(inst[id + 1]) == .kInstByteRange && get_out(ip.*) == get_out(inst[id+1]) {
450 continue;
451 }
452 merge(*builder);
453 } else if get_opcode(ip.*) == .kInstEmptyWidth {
454 if ip.empty & (EmptyOp.kEmptyBeginLine|.kEmptyEndLine) && !marked_line_boundaries {
455 mark(*builder, #char "\n", #char "\n");
456 merge(*builder);
457 marked_line_boundaries = true;
458 }
459 if ip.empty & (EmptyOp.kEmptyWordBoundary|.kEmptyNonWordBoundary) && !marked_word_boundaries {
460 // We require two batches here: the first for ranges that are word
461 // characters, the second for ranges that are not word characters.
462 for isword: bool.[true, false] {
463 i := 0;
464 j: int;
465 while i < 256 {
466 j = i + 1;
467 while j < 256 && is_word_char(cast(u8)i) == is_word_char(cast(u8)j) {
468 j += 1;
469 }
470 if is_word_char(cast(u8)i) == isword {
471 mark(*builder, cast(u8)i, cast(u8)(j - 1));
472 }
473 i = j;
474 }
475 merge(*builder);
476 }
477 marked_word_boundaries = true;
478 }
479 }
480 }
481
482 bytemap, bytemap_range = build(*builder);
483}
484
485// ByteMapBuilder implements a coloring algorithm.
486//
487// The first phase is a series of "mark and merge" batches: we mark one or more
488// [lo-hi] ranges, then merge them into our internal state. Batching is not for
489// performance; rather, it means that the ranges are treated indistinguishably.
490//
491// Internally, the ranges are represented using a bitmap that stores the splits
492// and a vector that stores the colors; both of them are indexed by the ranges'
493// last bytes. Thus, in order to merge a [lo-hi] range, we split at lo-1 and at
494// hi (if not already split), then recolor each range in between. The color map
495// (i.e. from the old color to the new color) is maintained for the lifetime of
496// the batch and so underpins this somewhat obscure approach to set operations.
497//
498// The second phase builds the bytemap from our internal state: we recolor each
499// range, then store the new color (which is now the byte class) in each of the
500// corresponding array elements. Finally, we output the number of byte classes.
501ByteMapBuilder :: struct {
502 splits: Bit_Array;
503 colors: [256] int;
504 nextcolor: int;
505 colormap: [..] Color_Pair;
506 ranges: [..] Range;
507
508 Range :: struct {
509 lo: u8;
510 hi: u8;
511 }
512 Color_Pair :: struct {
513 a: int;
514 b: int;
515 }
516}
517
518// Initial state: the [0-255] range has color 256.
519// This will avoid problems during the second phase,
520// in which we assign byte classes numbered from 0.
521init :: (using b: *ByteMapBuilder) {
522 init_bit_array(*splits, 256);
523 set_bit(*splits, 255);
524 colors[255] = 256;
525 nextcolor = 257;
526}
527
528mark :: (using builder: *ByteMapBuilder, lo: s32, hi: s32) {
529 assert(lo <= hi);
530
531 // Ignore any [0-255] ranges. They cause us to recolor every range, which
532 // has no effect on the eventual result and is therefore a waste of time.
533 if lo == 0 && hi == 255 return;
534
535 r: Range;
536 r.lo = cast(u8) lo;
537 r.hi = cast(u8) hi;
538 array_add(*ranges, r);
539}
540
541find_next_set_bit :: (using set: Bit_Array, start: int) -> s16 {
542 i := start >> 6;
543 // Mask out everything below start
544 word := slots[i] & (0xFFFF_FFFF_FFFF_FFFF << (start & 63));
545 if word {
546 return cast(u8)((i << 6) + bit_scan_forward(word) - 1);
547 }
548
549 i += 1;
550 while i < slots.count {
551 if slots[i] {
552 return cast(u8)((i << 6) + bit_scan_forward(slots[i]) - 1);
553 }
554 i += 1;
555 }
556 return -1;
557}
558
559merge :: (using builder: *ByteMapBuilder) {
560 for r: ranges {
561 lo := (cast(s16)r.lo) - 1;
562 hi: s16 = r.hi;
563
564 if lo >= 0 && !splits[lo] {
565 set_bit(*splits, lo);
566 next := find_next_set_bit(splits, lo + 1);
567 colors[lo] = colors[next];
568 }
569
570 if !splits[hi] {
571 set_bit(*splits, hi);
572 next := find_next_set_bit(splits, hi + 1);
573 colors[hi] = colors[next];
574 }
575
576 c := lo + 1;
577 while c < 256 {
578 next := find_next_set_bit(splits, c);
579 colors[next] = recolor(builder, colors[next]);
580 if next == hi break;
581
582 c = next + 1;
583 }
584 }
585 colormap.count = 0;
586 ranges.count = 0;
587}
588
589build :: (using builder: *ByteMapBuilder) -> [256] u8, bytemap_range: int {
590 // Assign byte classes numbered from 0.
591 nextcolor = 0;
592
593 bytemap: [256] u8;
594 c := 0;
595 while c < 256 {
596 next := find_next_set_bit(splits, c);
597 b := cast(u8) recolor(builder, colors[next]);
598 while c <= next {
599 bytemap[c] = b;
600 c += 1;
601 }
602 }
603
604 return bytemap, nextcolor;
605}
606
607recolor :: (using builder: *ByteMapBuilder, oldcolor: int) -> int {
608 // Yes, this is a linear search. There can be at most 256
609 // colors and there will typically be far fewer than that.
610 // Also, we need to consider keys *and* values in order to
611 // avoid recoloring a given range more than once per batch.
612 for colormap {
613 if it.a == oldcolor || it.b == oldcolor return it.b;
614 }
615
616 newcolor := nextcolor;
617 nextcolor += 1;
618 p: Color_Pair;
619 p.a = oldcolor;
620 p.b = newcolor;
621 array_add(*colormap, p);
622 return newcolor;
623}
624
625
626// Prog::Flatten() implements a graph rewriting algorithm.
627//
628// The overall process is similar to epsilon removal, but retains some epsilon
629// transitions: those from Capture and EmptyWidth instructions; and those from
630// nullable subexpressions. (The latter avoids quadratic blowup in transitions
631// in the worst case.) It might be best thought of as Alt instruction elision.
632//
633// In conceptual terms, it divides the Prog into "trees" of instructions, then
634// traverses the "trees" in order to produce "lists" of instructions. A "tree"
635// is one or more instructions that grow from one "root" instruction to one or
636// more "leaf" instructions; if a "tree" has exactly one instruction, then the
637// "root" is also the "leaf". In most cases, a "root" is the successor of some
638// "leaf" (i.e. the "leaf" instruction's out() returns the "root" instruction)
639// and is considered a "successor root". A "leaf" can be a ByteRange, Capture,
640// EmptyWidth or Match instruction. However, this is insufficient for handling
641// nested nullable subexpressions correctly, so in some cases, a "root" is the
642// dominator of the instructions reachable from some "successor root" (i.e. it
643// has an unreachable predecessor) and is considered a "dominator root". Since
644// only Alt instructions can be "dominator roots" (other instructions would be
645// "leaves"), only Alt instructions are required to be marked as predecessors.
646//
647// Dividing the Prog into "trees" comprises two passes: marking the "successor
648// roots" and the predecessors; and marking the "dominator roots". Sorting the
649// "successor roots" by their bytecode offsets enables iteration in order from
650// greatest to least during the second pass; by working backwards in this case
651// and flooding the graph no further than "leaves" and already marked "roots",
652// it becomes possible to mark "dominator roots" without doing excessive work.
653//
654// Traversing the "trees" is just iterating over the "roots" in order of their
655// marking and flooding the graph no further than "leaves" and "roots". When a
656// "leaf" is reached, the instruction is copied with its successor remapped to
657// its "root" number. When a "root" is reached, a Nop instruction is generated
658// with its successor remapped similarly. As each "list" is produced, its last
659// instruction is marked as such. After all of the "lists" have been produced,
660// a pass over their instructions remaps their successors to bytecode offsets.
661flatten :: (using prog: *Prog) {
662 if did_flatten {
663 return;
664 }
665 did_flatten = true;
666
667 // Scratch structures. It's important that these are reused by functions
668 // that we call in loops because they would thrash the heap otherwise.
669 reachable: Sparse_Set;
670 stk: [..] int;
671 init(*reachable, inst.count);
672 array_reserve(*stk, inst.count);
673
674 // First pass: Marks "successor roots" and predecessors.
675 // Builds the mapping from inst-ids to root-ids.
676 rootmap, predmap, preds_list := mark_successors(prog, *reachable, *stk);
677
678 // Second pass: Marks "dominator roots".
679 sorted := array_copy(rootmap.dense);
680 sorted.count = rootmap.count;
681 intro_sort(sorted, (a: $T, b: T) -> int {
682 if a.index < b.index {
683 return -1;
684 } else {
685 return 1;
686 }
687 });
688
689 for < sorted {
690 if it.index != start_unanchored && it.index != start {
691 mark_dominator(prog, it.index, *rootmap, predmap, preds_list, *reachable, *stk);
692 }
693 }
694
695 // Third pass: Emits "lists". Remaps outs to root-ids.
696 // Builds the mapping from root-ids to flat-ids.
697 flatmap: [..] int;
698 flat: [..] Inst;
699 array_resize(*flatmap, rootmap.count, false);
700 array_reserve(*flat, inst.count);
701
702 for rootmap {
703 flatmap[it.value] = flat.count;
704 emit_list(prog, it.index, *rootmap, *flat, *reachable, *stk);
705 set_last(*flat[flat.count - 1]);
706 // We have the bounds of the "list", so this is the
707 // most convenient point at which to compute hints.
708 compute_hints(prog, *flat, flatmap[it.value], flat.count);
709 }
710
711 list_count = flatmap.count;
712 memset(inst_count.data, 0, inst_count.count * size_of(type_of(inst_count[0])));
713 // Fourth pass: Remaps outs to flat-ids.
714 // Counts instructions by opcode.
715 for * flat {
716 if (get_opcode(it.*) != .kInstAltMatch) { // handled in EmitList()
717 set_out(it, flatmap[get_out(it.*)]);
718 }
719 inst_count[get_opcode(it.*)] += 1;
720 }
721
722 total := 0;
723 for inst_count {
724 total += it;
725 }
726 assert(total == flat.count);
727
728 // Remap start_unanchored and start.
729 if start_unanchored == 0 {
730 assert(start == 0);
731 } else if (start_unanchored == start) {
732 start_unanchored = flatmap[1];
733 start = flatmap[1];
734 } else {
735 start_unanchored = flatmap[1];
736 start = flatmap[2];
737 }
738
739 // Finally, replace the old instructions with the new instructions.
740 inst = flat;
741
742 // Populate the list heads for BitState.
743 // 512 instructions limits the memory footprint to 1KiB.
744 if inst.count <= 512 {
745 array_resize(*list_heads, inst.count, false);
746 // 0xFF makes it more obvious if we try to look up a non-head.
747 memset(list_heads.data, 0xFF, inst.count * size_of(type_of(list_heads[0])));
748 for 0..list_count - 1 {
749 list_heads[flatmap[it]] = cast(u16)it;
750 }
751 }
752}
753
754mark_successors :: (using prog: *Prog, reachable: *Sparse_Set, stk: *[..] int) -> rootmap: Sparse_Array(int), predmap: Sparse_Array(int), preds_list: [..] [..] int {
755 get_or_add_preds :: (preds_list: *[..] [..] int, predmap: *Sparse_Array(int), id: int) -> *[..] int {
756 preds: * [..] int;
757 if !contains(predmap.*, id) {
758 add_unchecked(predmap, id, preds_list.count);
759 array_reserve(preds_list, preds_list.count + 1);
760 memset(preds_list.data + preds_list.count, 0, size_of(type_of(preds_list[0])));
761 preds_list.count += 1;
762 preds = *(preds_list.*)[preds_list.count - 1];
763 preds.data = null;
764 preds.count = 0;
765 } else {
766 preds = *(preds_list.*)[get(predmap.*, id)];
767 }
768 return preds;
769 }
770
771 rootmap: Sparse_Array(int);
772 predmap: Sparse_Array(int);
773 init(*rootmap, inst.count);
774 init(*predmap, inst.count);
775 preds_list: [..] [..] int;
776
777 // Mark the kInstFail instruction.
778 add_unchecked(*rootmap, 0, rootmap.count);
779
780 // Mark the start_unanchored and start instructions.
781 if !contains(rootmap, start_unanchored) {
782 add_unchecked(*rootmap, start_unanchored, rootmap.count);
783 }
784 if !contains(rootmap, start) {
785 add_unchecked(*rootmap, start, rootmap.count);
786 }
787
788 reachable.count = 0;
789 stk.count = 0;
790 array_add(stk, start_unanchored);
791 while stk.count {
792 id := pop(stk);
793 if contains(reachable.*, id) continue;
794 add_unchecked(reachable, id);
795
796 i := inst[id];
797 if get_opcode(i) == {
798 case .kInstAltMatch; #through;
799 case .kInstAlt;
800 // Mark this instruction as a predecessor of each out.
801 preds := get_or_add_preds(*preds_list, *predmap, get_out(i));
802 array_add(preds, id);
803 preds = get_or_add_preds(*preds_list, *predmap, i.out1);
804 array_add(preds, id);
805 array_add(stk, i.out1);
806 array_add(stk, get_out(i));
807
808 case .kInstByteRange; #through;
809 case .kInstCapture; #through;
810 case .kInstEmptyWidth;
811 // Mark the out of this instruction as a "root".
812 if !contains(rootmap, get_out(i)) {
813 add_unchecked(*rootmap, get_out(i), rootmap.count);
814 }
815 array_add(stk, get_out(i));
816
817 case .kInstNop;
818 array_add(stk, get_out(i));
819
820 case .kInstMatch; #through;
821 case .kInstFail;
822
823 case;
824 assert(false, "Unhandled opcode: %", get_opcode(i));
825 }
826 }
827
828 return rootmap, predmap, preds_list;
829}
830
831mark_dominator :: (using prog: *Prog, root: int, rootmap: *Sparse_Array(int), predmap: Sparse_Array(int), preds_list: [..] [..] int, reachable: *Sparse_Set, stk: *[..] int) {
832 reachable.count = 0;
833 stk.count = 0;
834 array_add(stk, root);
835
836 while stk.count {
837 id := pop(stk);
838 if contains(reachable.*, id) continue;
839 add_unchecked(reachable, id);
840
841 if id != root && contains(rootmap.*, id) {
842 // We reached another "tree" via epsilon transition.
843 continue;
844 }
845
846 i := inst[id];
847 if #complete get_opcode(i) == {
848 case .kInstAltMatch; #through;
849 case .kInstAlt;
850 array_add(stk, i.out1);
851 array_add(stk, get_out(i));
852
853 case .kInstByteRange; #through;
854 case .kInstCapture; #through;
855 case .kInstEmptyWidth;
856
857 case .kInstNop;
858 array_add(stk, get_out(i));
859
860 case .kInstMatch; #through;
861 case .kInstFail;
862 }
863 }
864
865 for id: reachable {
866 if contains(predmap, id) {
867 for pred: preds_list[get(predmap, id)] {
868 if !contains(reachable.*, pred) {
869 // id has a predecessor that cannot be reached from root!
870 // Therefore, id must be a "root" too - mark it as such.
871 if !contains(rootmap.*, id) {
872 add_unchecked(rootmap, id, rootmap.count);
873 }
874 }
875 }
876 }
877 }
878}
879
880emit_list :: (using prog: *Prog, root: int, rootmap: *Sparse_Array(int), flat: *[..] Inst, reachable: *Sparse_Set, stk: *[..] int) {
881 reachable.count = 0;
882 stk.count = 0;
883 array_add(stk, root);
884 while stk.count {
885 id := pop(stk);
886 if contains(reachable.*, id) continue;
887 add_unchecked(reachable, id);
888
889 if id != root && contains(rootmap.*, id) {
890 // We reached another "tree" via epsilon transition. Emit a kInstNop
891 // instruction so that the Prog does not become quadratically larger.
892 i: Inst;
893 set_opcode(*i, .kInstNop);
894 set_out(*i, get(rootmap.*, id));
895 array_add(flat, i);
896 continue;
897 }
898
899 i := inst[id];
900 if get_opcode(i) == {
901 case .kInstAltMatch;
902 newi: Inst;
903 set_opcode(*newi, .kInstAltMatch);
904 set_out(*newi, flat.count + 1);
905 newi.out1 = cast(u32)flat.count + 2;
906 array_add(flat, newi);
907 array_add(stk, i.out1);
908 array_add(stk, get_out(i));
909
910 case .kInstAlt;
911 array_add(stk, i.out1);
912 array_add(stk, get_out(i));
913
914 case .kInstByteRange; #through;
915 case .kInstCapture; #through;
916 case .kInstEmptyWidth;
917 newi := i;
918 set_out(*newi, get(rootmap.*, get_out(i)));
919 array_add(flat, newi);
920
921 case .kInstNop;
922 array_add(stk, get_out(i));
923
924 case .kInstMatch; #through;
925 case .kInstFail;
926 array_add(flat, i);
927
928 case;
929 assert(false, "Unhandled opcode: %", get_opcode(i));
930 }
931 }
932}
933
934// For each ByteRange instruction in [begin, end), computes a hint to execution
935// engines: the delta to the next instruction (in flat) worth exploring iff the
936// current instruction matched.
937//
938// Implements a coloring algorithm related to ByteMapBuilder, but in this case,
939// colors are instructions and recoloring ranges precisely identifies conflicts
940// between instructions. Iterating backwards over [begin, end) is guaranteed to
941// identify the nearest conflict (if any) with only linear complexity.
942compute_hints :: (using prog: *Prog, flat: *[..] Inst, begin: int, end: int) {
943 recolor :: (splits: *Bit_Array, colors: *[256] int, lo_in: int, hi: int, first: *int, id: int) {
944 // Like ByteMapBuilder, we split at lo-1 and at hi.
945 lo := lo_in - 1;
946
947 if lo >= 0 && !(splits.*)[lo] {
948 set_bit(splits, lo);
949 next := find_next_set_bit(splits.*, lo + 1);
950 (colors.*)[lo] = (colors.*)[next];
951 }
952 if !(splits.*)[hi] {
953 set_bit(splits, hi);
954 next := find_next_set_bit(splits.*, hi + 1);
955 (colors.*)[hi] = (colors.*)[next];
956 }
957
958 c := lo + 1;
959 while c < 256 {
960 next := find_next_set_bit(splits.*, c);
961 // Ratchet backwards...
962 (first.*) = min(first.*, (colors.*)[next]);
963 // Recolor with id - because it's the new nearest conflict!
964 (colors.*)[next] = id;
965 if next == hi break;
966 c = next+1;
967 }
968 }
969
970 splits: Bit_Array;
971 init_bit_array(*splits, 256);
972 colors: [256] int;
973
974 dirty := false;
975 for < id: begin..end {
976 if (id == end || get_opcode((flat.*)[id]) != .kInstByteRange) {
977 if (dirty) {
978 dirty = false;
979 clear_all_bits(*splits);
980 }
981 set_bit(*splits, 255);
982 colors[255] = id;
983 // At this point, the [0-255] range is colored with id.
984 // Thus, hints cannot point beyond id; and if id == end,
985 // hints that would have pointed to id will be 0 instead.
986 continue;
987 }
988 dirty = true;
989
990 // We recolor the [lo-hi] range with id. Note that first ratchets backwards
991 // from end to the nearest conflict (if any) during recoloring.
992 first := end;
993
994 ip := *(flat.*)[id];
995 lo: s32 = ip.lo;
996 hi: s32 = ip.hi;
997 recolor(*splits, *colors, lo, hi, *first, id);
998 if get_foldcase(ip.*) && lo <= #char "z" && hi >= #char "a" {
999 foldlo := lo;
1000 foldhi := hi;
1001 if (foldlo < #char "a") {
1002 foldlo = #char "a";
1003 }
1004 if (foldhi > #char "z") {
1005 foldhi = #char "z";
1006 }
1007 if (foldlo <= foldhi) {
1008 foldlo += #char "A" - #char "a";
1009 foldhi += #char "A" - #char "a";
1010 recolor(*splits, *colors, foldlo, foldhi, *first, id);
1011 }
1012 }
1013
1014 if first != end {
1015 hint := cast(u16) min(first - id, 32767);
1016 ip.hint_foldcase |= hint << 1;
1017 }
1018 }
1019}
1020
1021// Accelerates to the first likely occurrence of the prefix.
1022// Returns position of the first byte or -1 if not found.
1023prefix_accel :: (using prog: *Prog, text: string, pos: int) -> int {
1024 assert(prefix_size >= 1);
1025 if prefix_size == 1 {
1026 result, found := index_of_char_2(text, cast(u8)prefix_front, pos);
1027 if !found return -1;
1028
1029 return result;
1030 } else {
1031 return prefix_accel_front_and_back(prog, text, pos);
1032 }
1033}
1034
1035prefix_accel_front_and_back :: (using prog: *Prog, text: string, pos: int) -> int {
1036 assert(prefix_size >= 2);
1037 if text.count - pos < prefix_size {
1038 return -1;
1039 }
1040
1041 // Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
1042 // This also means that probing for prefix_back_ doesn't go out of bounds.
1043 size := text.count - prefix_size + 1;
1044
1045 // @ToDo Port AVX2 fast path once jai has intrinsics/assembler
1046
1047 for i: pos..size-1 {
1048 if text[i] == prefix_front && text[i + prefix_size - 1] == prefix_back {
1049 return i;
1050 }
1051 }
1052
1053 return -1;
1054}
1055
1056append :: (builder: *String_Builder, inst: Inst) {
1057 if get_opcode(inst) == {
1058 case;
1059 print_to_builder(builder, "opcode %", get_opcode(inst));
1060
1061 case .kInstAlt;
1062 print_to_builder(builder, "alt -> % | %", get_out(inst), inst.out1);
1063
1064 case .kInstAltMatch;
1065 print_to_builder(builder, "altmatch -> % | %", get_out(inst), inst.out1);
1066
1067 case .kInstByteRange;
1068 append(builder, "byte");
1069 if get_foldcase(inst) {
1070 append(builder, "/i");
1071 }
1072 print_to_builder(builder, " [%-%] % -> %", format_byte(inst.lo), format_byte(inst.hi), get_hint(inst), get_out(inst));
1073
1074 case .kInstCapture;
1075 print_to_builder(builder, "capture % -> %", inst.cap, get_out(inst));
1076
1077 case .kInstEmptyWidth;
1078 print_to_builder(builder, "emptywidth % -> %", inst.empty, get_out(inst));
1079
1080 case .kInstMatch;
1081 print_to_builder(builder, "match! %", inst.match_id);
1082
1083 case .kInstNop;
1084 print_to_builder(builder, "nop -> %", get_out(inst));
1085
1086 case .kInstFail;
1087 append(builder, "fail");
1088 }
1089}
1090
1091format_byte :: #bake_arguments formatInt(base = 16, minimum_digits = 2);
1092
1093to_string :: (using prog: Prog) -> string {
1094 if did_flatten {
1095 return to_string_flattened(prog);
1096 } else {
1097 return to_string_unflattened(prog);
1098 }
1099}
1100
1101to_string_flattened :: (using prog: Prog) -> string {
1102 builder: String_Builder;
1103 defer free_buffers(*builder);
1104 for inst {
1105 if get_last(it) {
1106 print_to_builder(*builder, "%. ", it_index);
1107 } else {
1108 print_to_builder(*builder, "%1+ ", it_index);
1109 }
1110 append(*builder, it);
1111 append(*builder, #char "\n");
1112 }
1113 return builder_to_string(*builder);
1114}
1115
1116to_string_unflattened :: (using prog: Prog) -> string {
1117 builder: String_Builder;
1118 defer free_buffers(*builder);
1119
1120 q: Sparse_Set;
1121 init(*q, prog.inst.count);
1122 add_if_nonzero(*q, start);
1123 for q {
1124 i := inst[it];
1125 print_to_builder(*builder, "%. ", it);
1126 append(*builder, i);
1127 append(*builder, #char "\n");
1128 add_if_nonzero(*q, get_out(i));
1129 if get_opcode(i) == .kInstAlt || get_opcode(i) == .kInstAltMatch {
1130 add_if_nonzero(*q, i.out1);
1131 }
1132 }
1133 return builder_to_string(*builder);
1134}
1135
1136#scope_file
1137
1138#import "Bit_Array";
1139#import "IntroSort";
1140#import "Bit_Operations";
1141
1142// @ToDo: Use assembler replacement for memchr instead, once jai supports it
1143index_of_char_2 :: (str: string, char: u8, start := 0) -> int, found:bool {
1144 for start..str.count-1 {
1145 if str[it] == char return it, true;
1146 }
1147 return -1, false;
1148}
1149
Copyright 2026  E766CB298A6D1E64 | Git-Thing heavily inspired by cgit