git ptrace.dev


        
            
            
        
        0MAX_ONE_PASS_CAPTURES :: 5;
1
2// You need to free the captures array, but not its contents (which just references the input text)
3match :: (text: string, pattern: string, anchor: Anchor = .UNANCHORED) -> matched: bool, captures: [] string {
4	regexp, success := compile(pattern);
5	if !success {
6		return false, .[];
7	}
8	defer uninit(*regexp);
9
10	matched, captures: = match(text, regexp, anchor);
11	return matched, captures;
12}
13
14// You need to free the captures array, but not its contents (which just references the input text)
15match :: (text: string, re: Regexp, re_anchor_in: Anchor = .UNANCHORED) -> matched: bool, captures: [] string {
16	num_captures := 1 + re.num_capture_groups;
17
18	re_anchor := re_anchor_in;
19	if re.prog.anchor_start && re.prog.anchor_end {
20		re_anchor = .ANCHOR_BOTH;
21	} else if re.prog.anchor_start && re_anchor != .ANCHOR_BOTH {
22		re_anchor = .ANCHOR_START;
23	}
24
25	subtext := text;
26	if re.prefix.count {
27		if re.prefix.count > text.count {
28			return false, .[];
29		}
30
31		if re.prefix_foldcase {
32			if ascii_strcasecmp(re.prefix, subtext) != 0	return false, .[];
33		} else {
34			if memcmp(re.prefix.data, subtext.data, re.prefix.count) != 0		return false, .[];
35		}
36
37		subtext = slice(subtext, re.prefix.count, subtext.count - re.prefix.count);
38
39		// If there is a required prefix, the anchor must be at least Anchor_Start.
40		if re_anchor != .ANCHOR_BOTH {
41			re_anchor = .ANCHOR_START;
42		}
43	}
44
45	anchor := Prog.Anchor.UNANCHORED;
46	kind := Prog.MatchKind.kFirstMatch;
47
48	// @ToDo: For one-pass NFA
49	// can_one_pass := is_one_pass && num_captures <= MAX_ONE_PASS_CAPTURES;
50	// can_bit_state := can_bit_state(prog);
51
52	if #complete re_anchor == {
53		case .UNANCHORED;
54			//	@ToDo: Implement!
55		case .ANCHOR_BOTH; #through;
56		case .ANCHOR_START;
57			if re_anchor == .ANCHOR_BOTH {
58				kind = .kFullMatch;
59			}
60			anchor = .ANCHORED;
61			// @ToDo: Implement fast-passes
62	}
63
64	// @ToDo: Implement fast-passes
65
66	matched, submatches := match_nfa(re.prog, subtext, text, re.prefix.count, anchor, kind, num_captures);
67
68	if re.prefix.count && matched {
69		// Prepend prefix to full match
70		if submatches[0].count {
71			submatches[0].data -= re.prefix.count;
72			submatches[0].count += re.prefix.count;
73		} else {
74			submatches[0] = slice(text, 0, re.prefix.count);
75		}
76	}
77
78	return matched, submatches;
79}
80
81// Avoid possible locale nonsense in standard strcasecmp.
82// The string a is known to be all lowercase.
83ascii_strcasecmp :: (a: string, b: string) -> int {
84	for i: 0..a.count-1 {
85		x := a[i];
86		y := b[i];
87		if #char "A" <= y && y <= #char "Z" {
88			y += #char "a" - #char "A";
89		}
90		if x != y {
91			return x - y;
92		}
93	}
94	return 0;
95}
96
97// Execution engines.  They all search for the regexp (run the prog)
98// in text, which is in the larger context (used for ^ $ \b etc).
99// Anchor and kind control the kind of search.
100// Returns true if match found, false if not.
101// If match found, fills match[0..nmatch-1] with submatch info.
102// match[0] is overall match, match[1] is first set of parens, etc.
103// If a particular submatch is not matched during the regexp match,
104// it is set to NULL.
105//
106// Matching text == StringPiece(NULL, 0) is treated as any other empty
107// string, but note that on return, it will not be possible to distinguish
108// submatches that matched that empty string from submatches that didn't
109// match anything.  Either way, match[i] == NULL.
110
111
112//   // Search using DFA: much faster than NFA but only finds
113//   // end of match and can use a lot more memory.
114//   // Returns whether a match was found.
115//   // If the DFA runs out of memory, sets *failed to true and returns false.
116//   // If matches != NULL and kind == kManyMatch and there is a match,
117//   // SearchDFA fills matches with the match IDs of the final matching state.
118//   bool SearchDFA(const StringPiece& text, const StringPiece& context,
119//                  Anchor anchor, MatchKind kind, StringPiece* match0,
120//                  bool* failed, SparseSet* matches);
121
122//   // The callback issued after building each DFA state with BuildEntireDFA().
123//   // If next is null, then the memory budget has been exhausted and building
124//   // will halt. Otherwise, the state has been built and next points to an array
125//   // of bytemap_range()+1 slots holding the next states as per the bytemap and
126//   // kByteEndText. The number of the state is implied by the callback sequence:
127//   // the first callback is for state 0, the second callback is for state 1, ...
128//   // match indicates whether the state is a matching state.
129//   using DFAStateCallback = std::function<void(const int* next, bool match)>;
130
131//   // Build the entire DFA for the given match kind.
132//   // Usually the DFA is built out incrementally, as needed, which
133//   // avoids lots of unnecessary work.
134//   // If cb is not empty, it receives one callback per state built.
135//   // Returns the number of states built.
136//   // FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
137//   int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb);
138
139//   // Controls whether the DFA should bail out early if the NFA would be faster.
140//   // FOR TESTING ONLY.
141//   static void TEST_dfa_should_bail_when_slow(bool b);
142
143
144compute_num_captures :: (re: *Regexp_Node) -> int {
145	count_pre_visit :: (num: *int, re: *Regexp_Node, v: *void) -> *void, bool {
146		if re.op == .Capture {
147			num.* += 1;
148		}
149		return null, false;
150	}
151
152	num_captures := 0;
153
154	w := Walk(*int, *void).{pre_visit = count_pre_visit};
155	walk(*num_captures, re, null, w);
156
157	return num_captures;
158}
159
160
index : blog