Logo

index : blog

---

  • summary
  • about
  • tree
  • log
  • branches
<< path: root/public/blog.git/html/src/search/main.jai blob: 142e0996b7aa1da372b76a2e7c1298e9d57db8da [raw] [clear marker]

        
0/**
1 Static html generator & search server.
2 Copyright (C) 2026 dev@ptrace.dev
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>.
16*/
17
18#import "Basic"()(
19 MEMORY_DEBUGGER=MEMORY_DEBUGGER_ENABLED, ENABLE_ASSERT=IS_DEVELOPER
20);
21#import "Curl"()(LINUX_USE_SYSTEM_LIBRARY=false);
22#import "File";
23#import "String";
24#import "Print_Vars";
25#import "Thread";
26
27#import "htmltemplate";
28#import "stringpad";
29
30#load "main.h";
31#load "search.jai";
32#load "http.jai";
33#load "response.jai";
34#load "reload.jai";
35#load "libseccomp.jai";
36#load "liblandlock.jai";
37#load "sec_seccomp.jai";
38#load "sec_landlock.jai";
39#load "../gen/blog.h";
40#load "../gen/entries.h";
41#load "../gen/constants.h";
42#load "../marshal.jai";
43
44#import,file "../quick_trace.jai"(ENABLE_TRACING=IS_DEVELOPER);
45
46
47
48/** This is a list of things we did, to ensure stability
49
50 - fuzzing individual functions via libfuzz
51 - E2E fuzzing via libfuzz
52 - load testing via wrk
53 - profiling under load via gprof
54 - unmapping allocator test
55 - tracing via Quick_Trace
56
57*/
58
59
60/** Security
61
62 -- Unmapping Allocator
63 This allocator unmaps memory when freed. Which allows us to detect
64 use-after-free bugs way better, since the program just crashes if something
65 wants to access invalid memory.
66
67 This is only enabled if we need it, since it's quite slow.
68
69 -- CLOEXEC on Sockets
70 Prevents from using the same FD table when using execve("/bin/sh").
71 Although exec* is disabled via seccomp, but we use this as
72 an extra layer of security.
73
74 -- Seccomp
75 We're using seccomp as an whitelist, so every other syscall is denied.
76 We recorded needed syscalls via strace and build an allow-list from it.
77
78 -- Landlock
79 We deny complete file access, since this program does not need it after
80 the init segment.
81
82 -- Namespaces
83 We're isolating this program from the host system. Since namespaces need
84 way more work implementing them as code, we're performing this step via
85 systemd.exec and closing remaining blind spots.
86
87
88 --------------------------------------------------------------------------
89 https://docs.kernel.org/userspace-api/landlock.html
90 https://man7.org/linux/man-pages/man2/seccomp.2.html
91 https://wiki.archlinux.org/title/Systemd/Sandboxing
92 https://www.freedesktop.org/software/systemd/man/latest/systemd.exec.html
93
94*/
95
96
97/** Perf findings
98
99 Update: 2026-04-22
100 We do not build the HT at comptime anymore, so those findings
101 do not matter now. But I leave it there for future reference.
102
103 Indexing attempts
104
105 | | build | document | result |
106 |----------|---------|----------|------------|
107 | Runtime | release | 345 MB | OOM |
108 | Runtime | release | 87 MB | 1 minute |
109 | Runtime | release | 22 MB | 20 seconds |
110 | Runtime | release | 5 MB | < 1 second |
111 | | | | |
112 | Comptime | release | 87 MB | impossible |
113 | Comptime | release | 22 MB | 4 minutes |
114 | Comptime | release | 5 MB | 45 seconds |
115
116 According to gprof, HT extend() was very slow -> table_ensure_space()
117
118 The conclusion would be: keep it at comptime for now, since my blog wont
119 have a lot entries for the next years.
120
121 And if it reaches the ~2 MB mark, think about a new solution.
122 Best thing would be creating it at runtime separately as binary file
123 and load it at init time (don't forget seccomp then!).
124*/
125
126
127documents_html: *[]Entry;
128search_index: *Table(string, []int);
129is_server_offline: bool;
130is_ipc_offline: bool;
131mutex_data: Mutex;
132
133
134SECURITY_MODULE_ENABLED :: true;
135SECCOMP_ENABLED :: true;
136SECCOMP_ARMED :: true;
137LANDLOCK_ENABLED :: true;
138
139
140#if IS_DEVELOPER {
141 FP_SOCKET_RELOAD :: "/tmp/ptracedev_search.sock\0";
142} else {
143 FP_SOCKET_RELOAD :: "/run/ptracedev/search.sock\0";
144}
145
146LOCALHOST :: "127.0.0.1";
147DEFAULT_PORT :u16: 8081;
148
149
150Data_Kind :: enum {
151 ENTRIES;
152 INDEX;
153}
154
155
156main :: () {
157 #if MEMORY_DEBUGGER_ENABLED defer report_memory_leaks();
158 #if IS_UA_ALLOCATOR {
159 UA :: #import "Unmapping_Allocator";
160 context.allocator = UA.get_unmapping_allocator();
161 }
162
163 #if !SECURITY_MODULE_ENABLED {
164 for 0..9 log_error("-- SECURITY MODULE IS DISABLED! --");
165 if !is_dev_machine() then exit(99);
166 defer {
167 for 0..9 log_error("-- SECURITY MODULE IS DISABLED! --");
168 exit(99);
169 }
170 }
171
172
173 args := get_command_line_arguments();
174
175 if is_arg("-h", "--help", "-help", "help") {
176 log(ARGS_HELP);
177 return;
178 }
179
180 if is_arg("store") {
181 search_dump_search_index();
182 log("> Regenerated Search Index");
183 return;
184 }
185
186 load_blog_data();
187
188
189 /** No threading started yet, so we can access data without mutex */
190
191 if documents_html.count <= 0 {
192 log_error("HTML entries are empty.");
193 exit(1);
194 }
195
196 if search_index.count <= 0 {
197 log_error("Search index is empty.");
198 exit(1);
199 }
200
201 if is_arg("human") {
202 for search_index.* log_error("K: % V: %", it_index, it);
203 return;
204 }
205
206 port := DEFAULT_PORT;
207 if args.count == 3 && args[1] == "port" {
208 maybe_port := args[2];
209 port=, ok := parse_int(*maybe_port, u16);
210 if !ok {
211 log_error("Your argument is not a valid port number. Input: %", args[2]);
212 exit(1);
213 }
214
215 if port <= 1024 {
216 log_error("Cannot use privileged ports.");
217 exit(1);
218 }
219 }
220
221 /** ------------ *
222 * INIT SEGMENT *
223 * ------------ */
224
225 signal(SIGINT, handler_server);
226
227 http_init(port);
228
229
230 thread_reload_server := New(Thread);
231 thread_search_server := New(Thread);
232
233 thread_init(thread_reload_server, th_start_reload_server);
234 thread_init(thread_search_server, th_start_search_server);
235
236 defer {
237 thread_deinit(thread_reload_server);
238 thread_deinit(thread_search_server);
239 free(thread_reload_server);
240 free(thread_search_server);
241 }
242
243 thread_start(thread_reload_server);
244
245
246 /** NOTE: Important!
247
248 This marker is needed for dev/syscalls.py - since it determines from
249 which point the syscalls should be whitelisted.
250 */
251 #if !SECCOMP_ENABLED then log("---- SECCOMP BOUNDARY ----");
252
253
254 /** --------------- *
255 * SANDBOX SEGMENT *
256 * --------------- */
257
258 sandbox_load();
259
260
261 /** ------------------- *
262 * RESTRICTIVE SEGMENT *
263 * ------------------- */
264
265 thread_start(thread_search_server);
266
267 while !thread_is_done(thread_search_server, -1) {}
268 while !thread_is_done(thread_reload_server, -1) {}
269 log("Search Server offline.");
270}
271
272sandbox_load :: () {
273 #if !SECURITY_MODULE_ENABLED return;
274
275
276 sec_landlock_init();
277
278
279 /** Every syscall after this proc is highly restricted.
280 It is important to make sure that every needed syscall is
281 whitelisted. Otherwise the application will crash in prod!
282
283 # How to find operational syscalls
284
285 1. Change seccomp to logging mode, instead of killing the
286 program.
287
288 2. Use dev/syscalls.py for recording and printing every needed syscall.
289 Keep in mind to hit every code path while recording, otherwise you'll
290 miss several syscalls.
291
292 3. Pair the recording with a load test via wrk.
293
294 4. Replace the old rules with the new ones.
295
296 5. Make sure to truncate the audit log (make a backup if needed):
297 `sudo truncate -s 0 /var/log/audit/audit.log`
298
299 5. Repeat the test without recording syscalls.
300
301 7. Check the audit log if you missed whitelisting syscalls via:
302 `sudo ausearch -m SECCOMP -ts recent`
303
304 If it has '<no matches>' you catched all syscalls we need for
305 this program!
306
307 */
308 sec_seccomp_init();
309}
310
311load_blog_data :: () {
312 lock(*mutex_data);
313 defer unlock(*mutex_data);
314
315 /** According to "Default_Allocator" -> deallocate() it is a NOOP on `null` */
316 free(documents_html);
317 free(search_index);
318
319 documents_html = load_data_or_exit(FP_DUMP_ENTRIES_HTML, []Entry);
320 search_index = load_data_or_exit(FP_DUMP_SEARCH_INDEX, Table(string, []int));
321}
322
323base_logger :: (prefix: string, message: string, data: *void, info: Log_Info) {
324 write_string("[");
325 write_string(prefix);
326 write_string("]: ");
327 write_string(message);
328 if message && message[message.count-1] != #char "\n" write_string("\n");
329}
330
331is_arg :: (argument: string) -> bool #expand {
332 return array_find(`args, argument);
333}
334
335is_arg :: (argument: ..string) -> bool #expand {
336 for argument if array_find(`args, it) then return true;
337 return false;
338}
339
340load_data_or_exit :: ($fp: string, $type: Type) -> *type {
341 qtrace_init(fp);
342 ok, data := read_from_disk_and_unmarshal(fp, type);
343 if !ok then exit(1);
344 assert(data != null);
345 return data;
346}
347
348socket_log_error :: (loc := #caller_location) {
349 err := get_last_socket_error();
350 if err == 0 return;
351
352 print_proc :: #procedure_of_call print("");
353 print_err :: #bake_arguments print_proc(to_standard_error=true);
354
355 log_error("%", loc);
356 print_err("Error: ");
357
358 if err == {
359 case ENOENT; print_err("Cannot find Unix socket");
360 case EBADF; print_err("Socket corrupted");
361 case EINVAL; print_err("Socket corrupted");
362 case EFAULT; print_err("Invalid buffer ptr");
363 case EADDRINUSE; print_err("Socket in use");
364 case EPIPE; print_err("Broken pipe");
365 case EAGAIN; print_err("No data in pipe");
366 case EINTR; print_err("Got interrupted");
367 case EAFNOSUPPORT; print_err("Address family not supported");
368 case; log_error("Uncovered error: %", err);
369 }
370
371 print_err("\n");
372}
373
374is_dev_machine :: () -> bool {
375 shl_dev := to_string(getenv("SHL_DEVELOPER"));
376 if shl_dev && shl_dev == "true" return true;
377 return false;
378}
379
380
381qtrace_init :: #bake_arguments qtrace(color=.YELLOW);
382qtrace_live :: #bake_arguments qtrace(color=.GREEN);
383
384
385#scope_file
386
387
388libc :: #library,system "libc";
389
390
391ARGS_HELP :: #string STR_END
392Options
393 store Serializes the search index to disk
394 human Prints the content of the search index to stderr line by line
395
396STR_END;
397
398
399handler_server :: (signal: int) #c_call {
400 push_context {
401 log("Received interrupt signal.");
402 http_server_shutdown_clean();
403 reload_server_shutdown_clean();
404 }
405}
406
407th_start_reload_server :: (thread: *Thread) -> s64 {
408 reload_run();
409 return 0;
410}
411
412th_start_search_server :: (thread: *Thread) -> s64 {
413 http_server();
414 return 0;
415}
416
417sighandler_t :: #type (signal: int) -> void #c_call;
418signal :: (signal: int, handler: sighandler_t) -> void #foreign libc;
419
420
Copyright 2026  E766CB298A6D1E64 | Git-Thing heavily inspired by cgit