COMMENT_PREFIXES :: "(///?|/\\*{1,9}|#{1,9}|\\-\\-)"; KEYWORDS :: "(TODO|NOTE|IDEA|CONTINUE)"; AUTHOR :: "([@\\-_\\s\\d\\w]+)"; PRIORITY :: "([-+]?\\d+)"; RE_PATTERN :: #run -> string { return tprint("%1\\s*%2:?(\\(%3[\\s]*,?[\\s]*%4?\\))?:?", COMMENT_PREFIXES, KEYWORDS, AUTHOR, PRIORITY ); } Token_Identifier :: enum { NONE :: 0; TODO; NOTE; IDEA; CONTINUE; } Token :: struct { identifier: Token_Identifier; priority: int = 0; author: string = ""; comment: string; line: int; } Match :: struct { match: string; position: int; length: int; groups: []string; } init_pattern :: (mode: Re.ParseFlags = .FoldCase | .LikePerl) { re = Re.compile(RE_PATTERN, mode); } matches_process :: (source: string, matches: [..]Match) -> [..]Token { text_parse :: () -> string #expand { terminator := terminator_get(`it.groups[0]); text := consume_till(`source, terminator, `it.position + `it.length); return text; } parse_int_try :: (s: *string) -> int { v, success := parse_int(s); if !success return 690000; return v; } /** This is inefficient, but since the ordering from [..]Match isn't always chronological, it would lead to a shitty program behavior. Plus I don't expect that people have amalgamations in their projects. */ lines_count :: (s: string, pos_till: int) -> int { assert(pos_till < s.count-1, "Ooohhps"); lines: int = 1; for i: 0..pos_till { if s[i] == "\n" then lines += 1; } return lines; } count_non_empty_elements :: inline (groups: []string) -> count: int { c: int; for * groups { if it.* then c += 1; } return c; } tokens: [..]Token; for matches { capture_count := count_non_empty_elements(it.groups); if it.match.count == 0 then continue; if capture_count == { // Only KEYWORD case 2; text := text_parse(); token: Token; token.identifier = identifier_determine(it.groups[1]); token.comment = text; token.line = lines_count(source, it.position); array_add(*tokens, token); // With author case 4; text := text_parse(); token: Token; token.identifier = identifier_determine(it.groups[1]); token.author = trim(it.groups[3]); token.comment = text; token.line = lines_count(source, it.position); array_add(*tokens, token); // With author and priority case 5; text := text_parse(); token: Token; token.identifier = identifier_determine(it.groups[1]); token.priority = parse_int_try(*trim(it.groups[4])); token.author = trim(it.groups[3]); token.comment = text; token.line = lines_count(source, it.position); array_add(*tokens, token); } } return tokens; } matches_collect :: (source: string) -> (success: bool, matches: [..]Match) { matches: [..]Match; has_match := search_for(source, *matches); return has_match, matches; } #scope_file re: Re.Regexp; is_at_end :: (n: int) -> bool #expand { return n > `source.count-1; } skip_space_return_index :: inline (s: string, start_from: int) -> int { for i: start_from..s.count-1 { if s[i] != " " && s[i] != "\t" && s[i] != "\r" && s[i] != 0x0B then return i; } return start_from; } consume_till :: (source: string, terminator: string, pos: int) -> string { buf: String_Builder; init_string_builder(*buf); // TODO(adam, 3): Quite a bad name, so the intent isn't really clear! no_multiline_comment: bool = !(terminator == "*/"); consume_backend( *buf, pos, source, no_multiline_comment, terminator ); s := builder_to_string(*buf); return trim(s); } consume_backend :: inline ( buf: *String_Builder, pos: int, source: string, no_multiline_comment: bool, terminator: string ) { /** Don't append tabs or whitespace, if it's an empty area like this: # TODO: abc fooo baaar */ more_than_one_space := false; skip_next_iteration := false; for i: pos..source.count-1 { if skip_next_iteration { skip_next_iteration = false; continue; } if source[i] == { case "\t"; #through; case "\r"; #through; case 0x0B; #through; // vertical tab case " "; if !more_than_one_space { more_than_one_space = true; append(buf, source[i]); } case "\n"; if no_multiline_comment ^ is_terminator(source, terminator, i+1) { break; } if is_next_character(source, "\n", i+1) && no_multiline_comment { break; } case "*"; if is_next_character(source, "/", i+1) { break; } case; more_than_one_space = false; cond := true; cond &= source[i] == terminator[0]; if terminator.count == 2 { cond &= is_next_character(source, terminator[1], i+1); if cond { append(buf, " "); skip_next_iteration = true; } } if cond then continue; append(buf, source[i]); } } } is_terminator :: (source: string, terminator: string, idx: int) -> bool { offset := skip_space_return_index(source, idx); cond := true; cond &= !is_at_end(offset) && source[offset] == terminator[0]; if terminator.count == 2 { cond &= !is_at_end(offset + 1) && source[offset + 1] == terminator[1]; } return cond; } is_next_character :: inline (source: string, terminator: u8, idx: int) -> bool { offset := skip_space_return_index(source, idx); return !is_at_end(offset) && source[offset] == terminator; } terminator_get :: (starter: string) -> string { if starts_with(starter, "/*") { // TS "fix" *//* return "*/"; } return starter; } identifier_determine :: (id: string) -> Token_Identifier { if to_lower_copy(id) == { case "todo"; return .TODO; case "note"; return .NOTE; case "idea"; return .IDEA; case "continue"; return .CONTINUE; case; return .NONE; } } search_for :: ( s: string, matches: *[..]Match ) -> has_match: bool { subject := s; has_match := false; while true { if subject.count <= 0 break; diff := s.count - subject.count; matched, captures := Re.match(subject, re); has_match |= matched; if !matched break; c := captures[0]; offset := c.data - subject.data; count := c.count; if offset < 0 || offset >= subject.count break; advance := offset + count; subject.data += advance; subject.count -= advance; match: Match; match.match = c; match.position = offset + diff; match.length = c.count; array_ordered_remove_by_index(*captures, 0); match.groups = NewArray(captures.count, string); for captures match.groups[it_index] = it; array_add(matches, match); } return has_match; }