<<
path:
root/public/blog.git/html/src/search/search.jai
blob: 7c0448c188ceeb28381b93940a216eab59e04f93
[raw]
[clear marker]
2Search_Response :: enum {
12 ht: *Table(string, []int), query: string
14 -> (ok: Search_Response, idx: []int = .[])
16 ok, validated := validate_query(query);
17 if ok != .OK return ok;
19 found := find_in_documents(ht, validated);
21 if found return .OK, found;
22 else return .NO_RESULT;
25search_dump_search_index :: () {
28 search_index := New(Table(string, []int)); // @OS GC
29 table_ensure_space(search_index, HT_ENSURE_SPACE);
30 defer this_allocation_is_not_a_leak(search_index.entries.data);
32 documents_md := load_data_or_exit(FP_DUMP_ENTRIES_MD, []Entry);
34 build_index(documents_md, search_index);
35 assert(search_index.count > 0, "HT is empty");
37 ok := marshal_and_save_to_disk(FP_DUMP_SEARCH_INDEX, search_index);
60 pool_alloc.proc = flat_pool_allocator_proc;
61 pool_alloc.data = *pool;
64build_index :: ($$entries: *[]Entry, ht: *Table(string, []int)) {
68 array_reserve(*buf, 512);
69 defer this_allocation_is_not_a_leak(buf.data);
72 post_with_title := join(doc.title, doc.post, " ",, pool_alloc);
73 sanitized := replace_special_chars_with_space(post_with_title,, pool_alloc);
74 lower_case := to_lower_copy(sanitized,, pool_alloc);
75 words := split_by_whitespace(lower_case,, pool_alloc);
79 trimmed := trim(word,, pool_alloc);
80 if !table_contains(ht, trimmed) {
81 new := NewArray(1, int);
83 new_word := copy_string(trimmed);
84 table_add(ht, new_word, new);
85 this_allocation_is_not_a_leak(new.data);
87 found, value := table_find(ht, trimmed);
90 if array_find(value, i) then continue;
92 for value array_add(*buf, it);
95 new_value := array_copy(buf);
96 this_allocation_is_not_a_leak(new_value.data);
97 array_reset_keeping_memory(*buf);
99 table_set(ht, trimmed, new_value);
105validate_query :: (query: string) -> (status: Search_Response, []string) {
107 is_only_whitespace :: (s: string) -> bool {
108 for s if !is_space(it) return false;
114 if query.count > MAX_QUERY_LENGTH return .VALIDATION_TOO_LONG, terms;
115 if !query || is_only_whitespace(query) return .VALIDATION_EMPTY, terms;
117 terms = split(query, " ");
121// @Memory: Free by caller
122find_in_documents :: (ht: *Table(string, []int), query: []string) -> []int {
129 compare_count :: (a: Item, b: Item) -> s64 {
130 return inline compare_floats(xx b.count, xx a.count);
137 found, indexes := table_find(ht, to_lower_copy(term));
138 if found then { for indexes array_add(*buf, it); }
144 for buf { if it > max then max = it; }
145 assert(max > -1, "Something went shit");
147 freq := NewArray(max+1, int);
148 for i: 0..buf.count-1 { freq[buf[i]] += 1; }
155 array_add(*entries, .{value = i, count = freq[i]});
159 quick_sort(entries, compare_count);
161 new := NewArray(entries.count, int, initialized=false);
162 for i: 0..entries.count-1 new[i] = entries[i].value;
167/** Dear reader, if you're a professional: look away.
168 If you're someone that currently discovers programming, this is
169 **NOT** how you replace_special_chars_with_space data!
171 I recommend reading the code from 'https://github.com/mozilla/bleach'
173replace_special_chars_with_space :: (s: string) -> string {
175 has_special_char :: (char: u8) -> bool {
176 SPECIAL_CHARACTERS :: "#,;.:_!\"\\()[]{}`*?=/&^°|<>’";
178 for SPECIAL_CHARACTERS {
179 if it == char return true;
186 init_string_builder(*buf);
189 if has_special_char(char)
190 then append(*buf, " ");
191 else append(*buf, char);
194 return builder_to_string(*buf);
197split_by_whitespace :: (s: string) -> []string {
200 array_reset_keeping_memory(*buf);
210 word := join(xx buf);
212 array_reset_keeping_memory(*buf);
213 array_add(*results, word);
215 case; array_add(*buf, it);
219 word := join(xx buf);
220 if word then array_add(*results, word);