COMMENT_PREFIXES :: "(///?|/\\*{1,9}|#{1,9}|\\-\\-)";
KEYWORDS         :: "(TODO|NOTE|IDEA|CONTINUE)";
AUTHOR           :: "([@\\-_\\s\\d\\w]+)";
PRIORITY         :: "([-+]?\\d+)";

RE_PATTERN :: #run -> string {
    return tprint("%1\\s*%2:?(\\(%3[\\s]*,?[\\s]*%4?\\))?:?",
        COMMENT_PREFIXES,
        KEYWORDS,
        AUTHOR,
        PRIORITY
    );
}


Token_Identifier :: enum {
    NONE :: 0;
    TODO;
    NOTE;
    IDEA;
    CONTINUE;
}

Token :: struct {
    identifier: Token_Identifier;
    priority: int = 0;
    author: string = "";
    comment: string;
    line: int;
}

Match :: struct {
    match: string;
    position: int;
    length: int;
    groups: []string;
}


init_pattern :: (mode: Re.ParseFlags = .FoldCase | .LikePerl) {
    re = Re.compile(RE_PATTERN, mode);
}

matches_process :: (source: string, matches: [..]Match) -> [..]Token {

    text_parse :: () -> string #expand {
        terminator := terminator_get(`it.groups[0]);
        text := consume_till(`source, terminator, `it.position + `it.length);
        return text;
    }

    parse_int_try :: (s: *string) -> int {
        v, success := parse_int(s);
        if !success return 690000;
        return v;
    }


    /** This is inefficient, but since the ordering from [..]Match
        isn't always chronological, it would lead to a shitty program behavior.

        Plus I don't expect that people have amalgamations in their projects.
    */
    lines_count :: (s: string, pos_till: int) -> int {
        assert(pos_till < s.count-1, "Ooohhps");

        lines: int = 1;
        for i: 0..pos_till {
            if s[i] == "\n" then lines += 1;
        }

        return lines;
    }

    count_non_empty_elements :: inline (groups: []string) -> count: int {
        c: int;
        for * groups {
            if it.* then c += 1;
        }
        return c;
    }


    tokens: [..]Token;

    for matches {
        capture_count := count_non_empty_elements(it.groups);
        if it.match.count == 0 then continue;
        if capture_count == {

        // Only KEYWORD
        case 2;
            text := text_parse();
            token: Token;
            token.identifier = identifier_determine(it.groups[1]);
            token.comment = text;
            token.line = lines_count(source, it.position);
            array_add(*tokens, token);

        // With author
        case 4;
            text := text_parse();
            token: Token;
            token.identifier = identifier_determine(it.groups[1]);
            token.author = trim(it.groups[3]);
            token.comment = text;
            token.line = lines_count(source, it.position);
            array_add(*tokens, token);

        // With author and priority
        case 5;
            text := text_parse();
            token: Token;
            token.identifier = identifier_determine(it.groups[1]);
            token.priority = parse_int_try(*trim(it.groups[4]));
            token.author = trim(it.groups[3]);
            token.comment = text;
            token.line = lines_count(source, it.position);
            array_add(*tokens, token);
        }
    }

    return tokens;
}

matches_collect :: (source: string) -> (success: bool, matches: [..]Match) {
    matches: [..]Match;
    has_match := search_for(source, *matches);
    return has_match, matches;
}


#scope_file


re: Re.Regexp;


is_at_end :: (n: int) -> bool #expand {
    return n > `source.count-1;
}

skip_space_return_index :: inline (s: string, start_from: int) -> int {
    for i: start_from..s.count-1 {
        if s[i] != " "
        && s[i] != "\t"
        && s[i] != "\r"
        && s[i] != 0x0B
        then return i;
    }
    return start_from;
}

consume_till :: (source: string, terminator: string, pos: int) -> string {

    buf: String_Builder;
    init_string_builder(*buf);

    // TODO(adam, 3): Quite a bad name, so the intent isn't really clear!
    no_multiline_comment: bool = !(terminator == "*/");

    consume_backend(
        *buf,
        pos,
        source,
        no_multiline_comment,
        terminator
    );

    s := builder_to_string(*buf);
    return trim(s);
}

consume_backend :: inline (
    buf: *String_Builder,
    pos: int,
    source: string,
    no_multiline_comment: bool,
    terminator: string
) {
    /** Don't append tabs or whitespace, if it's an empty area like this:

        #        TODO:      abc     fooo    baaar

    */
    more_than_one_space := false;
    skip_next_iteration := false;

    for i: pos..source.count-1 {
        if skip_next_iteration {
            skip_next_iteration = false;
            continue;
        }

        if source[i] == {
        case "\t"; #through;
        case "\r"; #through;
        case 0x0B; #through;  // vertical tab
        case " ";
            if !more_than_one_space {
                more_than_one_space = true;
                append(buf, source[i]);
            }
        case "\n";
            if no_multiline_comment ^ is_terminator(source, terminator, i+1) {
                break;
            }
            if is_next_character(source, "\n", i+1) && no_multiline_comment {
                break;
            }
        case "*";
            if is_next_character(source, "/", i+1) {
                break;
            }
        case;
            more_than_one_space = false;

            cond := true;
            cond &= source[i] == terminator[0];

            if terminator.count == 2 {
                cond &= is_next_character(source, terminator[1], i+1);
                if cond {
                    append(buf, " ");
                    skip_next_iteration = true;
                }
            }

            if cond then continue;
            append(buf, source[i]);
        }
    }
}

is_terminator :: (source: string, terminator: string, idx: int) -> bool {
    offset := skip_space_return_index(source, idx);

    cond := true;
    cond &= !is_at_end(offset) && source[offset] == terminator[0];

    if terminator.count == 2 {
        cond &= !is_at_end(offset + 1) && source[offset + 1] == terminator[1];
    }

    return cond;
}

is_next_character :: inline (source: string, terminator: u8, idx: int) -> bool {
    offset := skip_space_return_index(source, idx);
    return !is_at_end(offset) && source[offset] == terminator;
}

terminator_get :: (starter: string) -> string {
    if starts_with(starter, "/*") {     // TS "fix" *//*
        return "*/";
    }
    return starter;
}

identifier_determine :: (id: string) -> Token_Identifier {
    if to_lower_copy(id) == {
    case "todo";     return .TODO;
    case "note";     return .NOTE;
    case "idea";     return .IDEA;
    case "continue"; return .CONTINUE;
    case;
        return .NONE;
    }
}

search_for :: (
    s: string,
    matches: *[..]Match
)
    -> has_match: bool
{
    subject := s;
    has_match := false;

    while true {
        if subject.count <= 0 break;

        diff := s.count - subject.count;

        matched, captures := Re.match(subject, re);

        has_match |= matched;
        if !matched break;

        c := captures[0];
        offset := c.data - subject.data;
        count  := c.count;

        if offset < 0 || offset >= subject.count break;

        advance := offset + count;
        subject.data  += advance;
        subject.count -= advance;

        match: Match;
        match.match = c;
        match.position = offset + diff;
        match.length = c.count;

        array_ordered_remove_by_index(*captures, 0);

        match.groups = NewArray(captures.count, string);
        for captures match.groups[it_index] = it;

        array_add(matches, match);
    }

    return has_match;
}