Search_Response :: enum {
    ERROR :: 0;
    OK;
    NO_RESULT;
    VALIDATION_EMPTY;
    VALIDATION_TOO_LONG;
}


search_run :: (
    ht: *Table(string, []int), query: string
)
    -> (ok: Search_Response, idx: []int = .[])
{
    ok, validated := validate_query(query);
    if ok != .OK return ok;

    found := find_in_documents(ht, validated);

    if found return .OK, found;
    else return .NO_RESULT;
}

search_dump_search_index :: () {
    qtrace_init();
    init_my_pool();
    search_index := New(Table(string, []int));  // @OS GC
    table_ensure_space(search_index, HT_ENSURE_SPACE);
    defer this_allocation_is_not_a_leak(search_index.entries.data);

    documents_md := load_data_or_exit(FP_DUMP_ENTRIES_MD, []Entry);

    build_index(documents_md, search_index);
    assert(search_index.count > 0, "HT is empty");

    ok := marshal_and_save_to_disk(FP_DUMP_SEARCH_INDEX, search_index);
    assert(ok);
}


#scope_file


#import "Hash_Table";
#import "Sort";
#import "Flat_Pool";


is_init: bool;
pool: Flat_Pool;
pool_alloc: Allocator;


init_my_pool :: () {
    if is_init return;
    is_init = true;

    init(*pool);
    pool_alloc.proc = flat_pool_allocator_proc;
    pool_alloc.data = *pool;
}

build_index :: ($$entries: *[]Entry, ht: *Table(string, []int)) {
    data := entries.*;

    buf: [..]int;
    array_reserve(*buf, 512);
    defer this_allocation_is_not_a_leak(buf.data);

    for doc, i: data {
        post_with_title := join(doc.title, doc.post, " ",, pool_alloc);
        sanitized := replace_special_chars_with_space(post_with_title,, pool_alloc);
        lower_case := to_lower_copy(sanitized,, pool_alloc);
        words := split_by_whitespace(lower_case,, pool_alloc);
        defer reset(*pool);

        for word: words {
            trimmed := trim(word,, pool_alloc);
            if !table_contains(ht, trimmed) {
                new := NewArray(1, int);
                new[0] = i;
                new_word := copy_string(trimmed);
                table_add(ht, new_word, new);
                this_allocation_is_not_a_leak(new.data);
            } else {
                found, value := table_find(ht, trimmed);
                assert(found);

                if array_find(value, i) then continue;

                for value array_add(*buf, it);
                array_add(*buf, i);

                new_value := array_copy(buf);
                this_allocation_is_not_a_leak(new_value.data);
                array_reset_keeping_memory(*buf);

                table_set(ht, trimmed, new_value);
            }
        }
    }
}

validate_query :: (query: string) -> (status: Search_Response, []string) {

    is_only_whitespace :: (s: string) -> bool {
        for s if !is_space(it) return false;
        return true;
    }

    terms: []string;

    if query.count > MAX_QUERY_LENGTH      return .VALIDATION_TOO_LONG, terms;
    if !query || is_only_whitespace(query) return .VALIDATION_EMPTY, terms;

    terms = split(query, " ");
    return .OK, terms;
}

// @Memory: Free by caller
find_in_documents :: (ht: *Table(string, []int), query: []string) -> []int {

    Item :: struct {
        value: int;
        count: int;
    }

    compare_count :: (a: Item, b: Item) -> s64 {
        return inline compare_floats(xx b.count, xx a.count);
    }


    buf: [..]int;

    for term: query {
        found, indexes := table_find(ht, to_lower_copy(term));
        if found then { for indexes array_add(*buf, it); }
    }

    if !buf return .[];

    max := -1;
    for buf { if it > max then max = it; }
    assert(max > -1, "Something went shit");

    freq := NewArray(max+1, int);
    for i: 0..buf.count-1 { freq[buf[i]] += 1; }


    entries: [..]Item;

    for i: 0..max {
        if freq[i] > 0 {
            array_add(*entries, .{value = i, count = freq[i]});
        }
    }

    quick_sort(entries, compare_count);

    new := NewArray(entries.count, int, initialized=false);
    for i: 0..entries.count-1  new[i] = entries[i].value;

    return new;
}

/** Dear reader, if you're a professional: look away.
    If you're someone that currently discovers programming, this is
    **NOT** how you replace_special_chars_with_space data!

    I recommend reading the code from 'https://github.com/mozilla/bleach'
*/
replace_special_chars_with_space :: (s: string) -> string {

    has_special_char :: (char: u8) -> bool {
        SPECIAL_CHARACTERS :: "#,;.:_!\"\\()[]{}`*?=/&^°|<>’";

        for SPECIAL_CHARACTERS {
            if it == char return true;
        }
        return false;
    }


    buf: String_Builder;
    init_string_builder(*buf);

    for char: s {
        if has_special_char(char)
        then append(*buf, " ");
        else append(*buf, char);
    }

    return builder_to_string(*buf);
}

split_by_whitespace :: (s: string) -> []string {
    results: [..]string;
    buf: [..]u8;
    array_reset_keeping_memory(*buf);

    trimmed := trim(s);

    for trimmed {
        if it == {
        case " ";  #through;
        case "\r"; #through;
        case "\n"; #through;
        case "\t";
            word := join(xx buf);
            if !word continue;
            array_reset_keeping_memory(*buf);
            array_add(*results, word);
            continue;
        case; array_add(*buf, it);
        }
    }

    word := join(xx buf);
    if word then array_add(*results, word);

    return results;
}