Logo

index : blog

---

  • summary
  • about
  • tree
  • log
  • branches
<< path: root/public/blog.git/html/modules/uniform/match.jai blob: 8a878f1a9d28749e5f8e4ab403df60bb0cdc0136 [raw] [clear marker]

        
0MAX_ONE_PASS_CAPTURES :: 5;
1
2// You need to free the captures array, but not its contents (which just references the input text)
3match :: (text: string, pattern: string, anchor: Anchor = .UNANCHORED) -> matched: bool, captures: [] string {
4 regexp, success := compile(pattern);
5 if !success {
6 return false, .[];
7 }
8 defer uninit(*regexp);
9
10 matched, captures: = match(text, regexp, anchor);
11 return matched, captures;
12}
13
14// You need to free the captures array, but not its contents (which just references the input text)
15match :: (text: string, re: Regexp, re_anchor_in: Anchor = .UNANCHORED) -> matched: bool, captures: [] string {
16 num_captures := 1 + re.num_capture_groups;
17
18 re_anchor := re_anchor_in;
19 if re.prog.anchor_start && re.prog.anchor_end {
20 re_anchor = .ANCHOR_BOTH;
21 } else if re.prog.anchor_start && re_anchor != .ANCHOR_BOTH {
22 re_anchor = .ANCHOR_START;
23 }
24
25 subtext := text;
26 if re.prefix.count {
27 if re.prefix.count > text.count {
28 return false, .[];
29 }
30
31 if re.prefix_foldcase {
32 if ascii_strcasecmp(re.prefix, subtext) != 0 return false, .[];
33 } else {
34 if memcmp(re.prefix.data, subtext.data, re.prefix.count) != 0 return false, .[];
35 }
36
37 subtext = slice(subtext, re.prefix.count, subtext.count - re.prefix.count);
38
39 // If there is a required prefix, the anchor must be at least Anchor_Start.
40 if re_anchor != .ANCHOR_BOTH {
41 re_anchor = .ANCHOR_START;
42 }
43 }
44
45 anchor := Prog.Anchor.UNANCHORED;
46 kind := Prog.MatchKind.kFirstMatch;
47
48 // @ToDo: For one-pass NFA
49 // can_one_pass := is_one_pass && num_captures <= MAX_ONE_PASS_CAPTURES;
50 // can_bit_state := can_bit_state(prog);
51
52 if #complete re_anchor == {
53 case .UNANCHORED;
54 // @ToDo: Implement!
55 case .ANCHOR_BOTH; #through;
56 case .ANCHOR_START;
57 if re_anchor == .ANCHOR_BOTH {
58 kind = .kFullMatch;
59 }
60 anchor = .ANCHORED;
61 // @ToDo: Implement fast-passes
62 }
63
64 // @ToDo: Implement fast-passes
65
66 matched, submatches := match_nfa(re.prog, subtext, text, re.prefix.count, anchor, kind, num_captures);
67
68 if re.prefix.count && matched {
69 // Prepend prefix to full match
70 if submatches[0].count {
71 submatches[0].data -= re.prefix.count;
72 submatches[0].count += re.prefix.count;
73 } else {
74 submatches[0] = slice(text, 0, re.prefix.count);
75 }
76 }
77
78 return matched, submatches;
79}
80
81// Avoid possible locale nonsense in standard strcasecmp.
82// The string a is known to be all lowercase.
83ascii_strcasecmp :: (a: string, b: string) -> int {
84 for i: 0..a.count-1 {
85 x := a[i];
86 y := b[i];
87 if #char "A" <= y && y <= #char "Z" {
88 y += #char "a" - #char "A";
89 }
90 if x != y {
91 return x - y;
92 }
93 }
94 return 0;
95}
96
97// Execution engines. They all search for the regexp (run the prog)
98// in text, which is in the larger context (used for ^ $ \b etc).
99// Anchor and kind control the kind of search.
100// Returns true if match found, false if not.
101// If match found, fills match[0..nmatch-1] with submatch info.
102// match[0] is overall match, match[1] is first set of parens, etc.
103// If a particular submatch is not matched during the regexp match,
104// it is set to NULL.
105//
106// Matching text == StringPiece(NULL, 0) is treated as any other empty
107// string, but note that on return, it will not be possible to distinguish
108// submatches that matched that empty string from submatches that didn't
109// match anything. Either way, match[i] == NULL.
110
111
112// // Search using DFA: much faster than NFA but only finds
113// // end of match and can use a lot more memory.
114// // Returns whether a match was found.
115// // If the DFA runs out of memory, sets *failed to true and returns false.
116// // If matches != NULL and kind == kManyMatch and there is a match,
117// // SearchDFA fills matches with the match IDs of the final matching state.
118// bool SearchDFA(const StringPiece& text, const StringPiece& context,
119// Anchor anchor, MatchKind kind, StringPiece* match0,
120// bool* failed, SparseSet* matches);
121
122// // The callback issued after building each DFA state with BuildEntireDFA().
123// // If next is null, then the memory budget has been exhausted and building
124// // will halt. Otherwise, the state has been built and next points to an array
125// // of bytemap_range()+1 slots holding the next states as per the bytemap and
126// // kByteEndText. The number of the state is implied by the callback sequence:
127// // the first callback is for state 0, the second callback is for state 1, ...
128// // match indicates whether the state is a matching state.
129// using DFAStateCallback = std::function<void(const int* next, bool match)>;
130
131// // Build the entire DFA for the given match kind.
132// // Usually the DFA is built out incrementally, as needed, which
133// // avoids lots of unnecessary work.
134// // If cb is not empty, it receives one callback per state built.
135// // Returns the number of states built.
136// // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
137// int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb);
138
139// // Controls whether the DFA should bail out early if the NFA would be faster.
140// // FOR TESTING ONLY.
141// static void TEST_dfa_should_bail_when_slow(bool b);
142
143
144compute_num_captures :: (re: *Regexp_Node) -> int {
145 count_pre_visit :: (num: *int, re: *Regexp_Node, v: *void) -> *void, bool {
146 if re.op == .Capture {
147 num.* += 1;
148 }
149 return null, false;
150 }
151
152 num_captures := 0;
153
154 w := Walk(*int, *void).{pre_visit = count_pre_visit};
155 walk(*num_captures, re, null, w);
156
157 return num_captures;
158}
159
160
Copyright 2026  E766CB298A6D1E64 | Git-Thing heavily inspired by cgit