<<
path:
root/public/blog.git/html/src/search/main.jai
blob: 142e0996b7aa1da372b76a2e7c1298e9d57db8da
[raw]
[clear marker]
1 Static html generator & search server.
2 Copyright (C) 2026 dev@ptrace.dev
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>.
19 MEMORY_DEBUGGER=MEMORY_DEBUGGER_ENABLED, ENABLE_ASSERT=IS_DEVELOPER
21#import "Curl"()(LINUX_USE_SYSTEM_LIBRARY=false);
27#import "htmltemplate";
35#load "libseccomp.jai";
36#load "liblandlock.jai";
37#load "sec_seccomp.jai";
38#load "sec_landlock.jai";
40#load "../gen/entries.h";
41#load "../gen/constants.h";
42#load "../marshal.jai";
44#import,file "../quick_trace.jai"(ENABLE_TRACING=IS_DEVELOPER);
48/** This is a list of things we did, to ensure stability
50 - fuzzing individual functions via libfuzz
51 - E2E fuzzing via libfuzz
52 - load testing via wrk
53 - profiling under load via gprof
54 - unmapping allocator test
55 - tracing via Quick_Trace
62 -- Unmapping Allocator
63 This allocator unmaps memory when freed. Which allows us to detect
64 use-after-free bugs way better, since the program just crashes if something
65 wants to access invalid memory.
67 This is only enabled if we need it, since it's quite slow.
70 Prevents from using the same FD table when using execve("/bin/sh").
71 Although exec* is disabled via seccomp, but we use this as
72 an extra layer of security.
75 We're using seccomp as an whitelist, so every other syscall is denied.
76 We recorded needed syscalls via strace and build an allow-list from it.
79 We deny complete file access, since this program does not need it after
83 We're isolating this program from the host system. Since namespaces need
84 way more work implementing them as code, we're performing this step via
85 systemd.exec and closing remaining blind spots.
88 --------------------------------------------------------------------------
89 https://docs.kernel.org/userspace-api/landlock.html
90 https://man7.org/linux/man-pages/man2/seccomp.2.html
91 https://wiki.archlinux.org/title/Systemd/Sandboxing
92 https://www.freedesktop.org/software/systemd/man/latest/systemd.exec.html
100 We do not build the HT at comptime anymore, so those findings
101 do not matter now. But I leave it there for future reference.
105 | | build | document | result |
106 |----------|---------|----------|------------|
107 | Runtime | release | 345 MB | OOM |
108 | Runtime | release | 87 MB | 1 minute |
109 | Runtime | release | 22 MB | 20 seconds |
110 | Runtime | release | 5 MB | < 1 second |
112 | Comptime | release | 87 MB | impossible |
113 | Comptime | release | 22 MB | 4 minutes |
114 | Comptime | release | 5 MB | 45 seconds |
116 According to gprof, HT extend() was very slow -> table_ensure_space()
118 The conclusion would be: keep it at comptime for now, since my blog wont
119 have a lot entries for the next years.
121 And if it reaches the ~2 MB mark, think about a new solution.
122 Best thing would be creating it at runtime separately as binary file
123 and load it at init time (don't forget seccomp then!).
127documents_html: *[]Entry;
128search_index: *Table(string, []int);
129is_server_offline: bool;
134SECURITY_MODULE_ENABLED :: true;
135SECCOMP_ENABLED :: true;
136SECCOMP_ARMED :: true;
137LANDLOCK_ENABLED :: true;
141 FP_SOCKET_RELOAD :: "/tmp/ptracedev_search.sock\0";
143 FP_SOCKET_RELOAD :: "/run/ptracedev/search.sock\0";
146LOCALHOST :: "127.0.0.1";
147DEFAULT_PORT :u16: 8081;
157 #if MEMORY_DEBUGGER_ENABLED defer report_memory_leaks();
158 #if IS_UA_ALLOCATOR {
159 UA :: #import "Unmapping_Allocator";
160 context.allocator = UA.get_unmapping_allocator();
163 #if !SECURITY_MODULE_ENABLED {
164 for 0..9 log_error("-- SECURITY MODULE IS DISABLED! --");
165 if !is_dev_machine() then exit(99);
167 for 0..9 log_error("-- SECURITY MODULE IS DISABLED! --");
173 args := get_command_line_arguments();
175 if is_arg("-h", "--help", "-help", "help") {
181 search_dump_search_index();
182 log("> Regenerated Search Index");
189 /** No threading started yet, so we can access data without mutex */
191 if documents_html.count <= 0 {
192 log_error("HTML entries are empty.");
196 if search_index.count <= 0 {
197 log_error("Search index is empty.");
202 for search_index.* log_error("K: % V: %", it_index, it);
206 port := DEFAULT_PORT;
207 if args.count == 3 && args[1] == "port" {
208 maybe_port := args[2];
209 port=, ok := parse_int(*maybe_port, u16);
211 log_error("Your argument is not a valid port number. Input: %", args[2]);
216 log_error("Cannot use privileged ports.");
225 signal(SIGINT, handler_server);
230 thread_reload_server := New(Thread);
231 thread_search_server := New(Thread);
233 thread_init(thread_reload_server, th_start_reload_server);
234 thread_init(thread_search_server, th_start_search_server);
237 thread_deinit(thread_reload_server);
238 thread_deinit(thread_search_server);
239 free(thread_reload_server);
240 free(thread_search_server);
243 thread_start(thread_reload_server);
248 This marker is needed for dev/syscalls.py - since it determines from
249 which point the syscalls should be whitelisted.
251 #if !SECCOMP_ENABLED then log("---- SECCOMP BOUNDARY ----");
254 /** --------------- *
261 /** ------------------- *
262 * RESTRICTIVE SEGMENT *
263 * ------------------- */
265 thread_start(thread_search_server);
267 while !thread_is_done(thread_search_server, -1) {}
268 while !thread_is_done(thread_reload_server, -1) {}
269 log("Search Server offline.");
273 #if !SECURITY_MODULE_ENABLED return;
279 /** Every syscall after this proc is highly restricted.
280 It is important to make sure that every needed syscall is
281 whitelisted. Otherwise the application will crash in prod!
283 # How to find operational syscalls
285 1. Change seccomp to logging mode, instead of killing the
288 2. Use dev/syscalls.py for recording and printing every needed syscall.
289 Keep in mind to hit every code path while recording, otherwise you'll
290 miss several syscalls.
292 3. Pair the recording with a load test via wrk.
294 4. Replace the old rules with the new ones.
296 5. Make sure to truncate the audit log (make a backup if needed):
297 `sudo truncate -s 0 /var/log/audit/audit.log`
299 5. Repeat the test without recording syscalls.
301 7. Check the audit log if you missed whitelisting syscalls via:
302 `sudo ausearch -m SECCOMP -ts recent`
304 If it has '<no matches>' you catched all syscalls we need for
311load_blog_data :: () {
313 defer unlock(*mutex_data);
315 /** According to "Default_Allocator" -> deallocate() it is a NOOP on `null` */
316 free(documents_html);
319 documents_html = load_data_or_exit(FP_DUMP_ENTRIES_HTML, []Entry);
320 search_index = load_data_or_exit(FP_DUMP_SEARCH_INDEX, Table(string, []int));
323base_logger :: (prefix: string, message: string, data: *void, info: Log_Info) {
325 write_string(prefix);
327 write_string(message);
328 if message && message[message.count-1] != #char "\n" write_string("\n");
331is_arg :: (argument: string) -> bool #expand {
332 return array_find(`args, argument);
335is_arg :: (argument: ..string) -> bool #expand {
336 for argument if array_find(`args, it) then return true;
340load_data_or_exit :: ($fp: string, $type: Type) -> *type {
342 ok, data := read_from_disk_and_unmarshal(fp, type);
344 assert(data != null);
348socket_log_error :: (loc := #caller_location) {
349 err := get_last_socket_error();
352 print_proc :: #procedure_of_call print("");
353 print_err :: #bake_arguments print_proc(to_standard_error=true);
356 print_err("Error: ");
359 case ENOENT; print_err("Cannot find Unix socket");
360 case EBADF; print_err("Socket corrupted");
361 case EINVAL; print_err("Socket corrupted");
362 case EFAULT; print_err("Invalid buffer ptr");
363 case EADDRINUSE; print_err("Socket in use");
364 case EPIPE; print_err("Broken pipe");
365 case EAGAIN; print_err("No data in pipe");
366 case EINTR; print_err("Got interrupted");
367 case EAFNOSUPPORT; print_err("Address family not supported");
368 case; log_error("Uncovered error: %", err);
374is_dev_machine :: () -> bool {
375 shl_dev := to_string(getenv("SHL_DEVELOPER"));
376 if shl_dev && shl_dev == "true" return true;
381qtrace_init :: #bake_arguments qtrace(color=.YELLOW);
382qtrace_live :: #bake_arguments qtrace(color=.GREEN);
388libc :: #library,system "libc";
391ARGS_HELP :: #string STR_END
393 store Serializes the search index to disk
394 human Prints the content of the search index to stderr line by line
399handler_server :: (signal: int) #c_call {
401 log("Received interrupt signal.");
402 http_server_shutdown_clean();
403 reload_server_shutdown_clean();
407th_start_reload_server :: (thread: *Thread) -> s64 {
412th_start_search_server :: (thread: *Thread) -> s64 {
417sighandler_t :: #type (signal: int) -> void #c_call;
418signal :: (signal: int, handler: sighandler_t) -> void #foreign libc;