/** Static html generator & search server. Copyright (C) 2026 dev@ptrace.dev This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #import "Basic"()( MEMORY_DEBUGGER=MEMORY_DEBUGGER_ENABLED, ENABLE_ASSERT=IS_DEVELOPER ); #import "Curl"()(LINUX_USE_SYSTEM_LIBRARY=false); #import "File"; #import "String"; #import "Print_Vars"; #import "Thread"; #import "htmltemplate"; #import "stringpad"; #load "main.h"; #load "search.jai"; #load "http.jai"; #load "response.jai"; #load "reload.jai"; #load "libseccomp.jai"; #load "liblandlock.jai"; #load "sec_seccomp.jai"; #load "sec_landlock.jai"; #load "../gen/blog.h"; #load "../gen/entries.h"; #load "../gen/constants.h"; #load "../marshal.jai"; #import,file "../quick_trace.jai"(ENABLE_TRACING=IS_DEVELOPER); /** This is a list of things we did, to ensure stability - fuzzing individual functions via libfuzz - E2E fuzzing via libfuzz - load testing via wrk - profiling under load via gprof - unmapping allocator test - tracing via Quick_Trace */ /** Security -- Unmapping Allocator This allocator unmaps memory when freed. Which allows us to detect use-after-free bugs way better, since the program just crashes if something wants to access invalid memory. This is only enabled if we need it, since it's quite slow. -- CLOEXEC on Sockets Prevents from using the same FD table when using execve("/bin/sh"). Although exec* is disabled via seccomp, but we use this as an extra layer of security. -- Seccomp We're using seccomp as an whitelist, so every other syscall is denied. We recorded needed syscalls via strace and build an allow-list from it. -- Landlock We deny complete file access, since this program does not need it after the init segment. -- Namespaces We're isolating this program from the host system. Since namespaces need way more work implementing them as code, we're performing this step via systemd.exec and closing remaining blind spots. -------------------------------------------------------------------------- https://docs.kernel.org/userspace-api/landlock.html https://man7.org/linux/man-pages/man2/seccomp.2.html https://wiki.archlinux.org/title/Systemd/Sandboxing https://www.freedesktop.org/software/systemd/man/latest/systemd.exec.html */ /** Perf findings Update: 2026-04-22 We do not build the HT at comptime anymore, so those findings do not matter now. But I leave it there for future reference. Indexing attempts | | build | document | result | |----------|---------|----------|------------| | Runtime | release | 345 MB | OOM | | Runtime | release | 87 MB | 1 minute | | Runtime | release | 22 MB | 20 seconds | | Runtime | release | 5 MB | < 1 second | | | | | | | Comptime | release | 87 MB | impossible | | Comptime | release | 22 MB | 4 minutes | | Comptime | release | 5 MB | 45 seconds | According to gprof, HT extend() was very slow -> table_ensure_space() The conclusion would be: keep it at comptime for now, since my blog wont have a lot entries for the next years. And if it reaches the ~2 MB mark, think about a new solution. Best thing would be creating it at runtime separately as binary file and load it at init time (don't forget seccomp then!). */ documents_html: *[]Entry; search_index: *Table(string, []int); is_server_offline: bool; is_ipc_offline: bool; mutex_data: Mutex; SECURITY_MODULE_ENABLED :: true; SECCOMP_ENABLED :: true; SECCOMP_ARMED :: true; LANDLOCK_ENABLED :: true; #if IS_DEVELOPER { FP_SOCKET_RELOAD :: "/tmp/ptracedev_search.sock\0"; } else { FP_SOCKET_RELOAD :: "/run/ptracedev/search.sock\0"; } LOCALHOST :: "127.0.0.1"; DEFAULT_PORT :u16: 8081; Data_Kind :: enum { ENTRIES; INDEX; } main :: () { #if MEMORY_DEBUGGER_ENABLED defer report_memory_leaks(); #if IS_UA_ALLOCATOR { UA :: #import "Unmapping_Allocator"; context.allocator = UA.get_unmapping_allocator(); } #if !SECURITY_MODULE_ENABLED { for 0..9 log_error("-- SECURITY MODULE IS DISABLED! --"); if !is_dev_machine() then exit(99); defer { for 0..9 log_error("-- SECURITY MODULE IS DISABLED! --"); exit(99); } } args := get_command_line_arguments(); if is_arg("-h", "--help", "-help", "help") { log(ARGS_HELP); return; } if is_arg("store") { search_dump_search_index(); log("> Regenerated Search Index"); return; } load_blog_data(); /** No threading started yet, so we can access data without mutex */ if documents_html.count <= 0 { log_error("HTML entries are empty."); exit(1); } if search_index.count <= 0 { log_error("Search index is empty."); exit(1); } if is_arg("human") { for search_index.* log_error("K: % V: %", it_index, it); return; } port := DEFAULT_PORT; if args.count == 3 && args[1] == "port" { maybe_port := args[2]; port=, ok := parse_int(*maybe_port, u16); if !ok { log_error("Your argument is not a valid port number. Input: %", args[2]); exit(1); } if port <= 1024 { log_error("Cannot use privileged ports."); exit(1); } } /** ------------ * * INIT SEGMENT * * ------------ */ signal(SIGINT, handler_server); http_init(port); thread_reload_server := New(Thread); thread_search_server := New(Thread); thread_init(thread_reload_server, th_start_reload_server); thread_init(thread_search_server, th_start_search_server); defer { thread_deinit(thread_reload_server); thread_deinit(thread_search_server); free(thread_reload_server); free(thread_search_server); } thread_start(thread_reload_server); /** NOTE: Important! This marker is needed for dev/syscalls.py - since it determines from which point the syscalls should be whitelisted. */ #if !SECCOMP_ENABLED then log("---- SECCOMP BOUNDARY ----"); /** --------------- * * SANDBOX SEGMENT * * --------------- */ sandbox_load(); /** ------------------- * * RESTRICTIVE SEGMENT * * ------------------- */ thread_start(thread_search_server); while !thread_is_done(thread_search_server, -1) {} while !thread_is_done(thread_reload_server, -1) {} log("Search Server offline."); } sandbox_load :: () { #if !SECURITY_MODULE_ENABLED return; sec_landlock_init(); /** Every syscall after this proc is highly restricted. It is important to make sure that every needed syscall is whitelisted. Otherwise the application will crash in prod! # How to find operational syscalls 1. Change seccomp to logging mode, instead of killing the program. 2. Use dev/syscalls.py for recording and printing every needed syscall. Keep in mind to hit every code path while recording, otherwise you'll miss several syscalls. 3. Pair the recording with a load test via wrk. 4. Replace the old rules with the new ones. 5. Make sure to truncate the audit log (make a backup if needed): `sudo truncate -s 0 /var/log/audit/audit.log` 5. Repeat the test without recording syscalls. 7. Check the audit log if you missed whitelisting syscalls via: `sudo ausearch -m SECCOMP -ts recent` If it has '' you catched all syscalls we need for this program! */ sec_seccomp_init(); } load_blog_data :: () { lock(*mutex_data); defer unlock(*mutex_data); /** According to "Default_Allocator" -> deallocate() it is a NOOP on `null` */ free(documents_html); free(search_index); documents_html = load_data_or_exit(FP_DUMP_ENTRIES_HTML, []Entry); search_index = load_data_or_exit(FP_DUMP_SEARCH_INDEX, Table(string, []int)); } base_logger :: (prefix: string, message: string, data: *void, info: Log_Info) { write_string("["); write_string(prefix); write_string("]: "); write_string(message); if message && message[message.count-1] != #char "\n" write_string("\n"); } is_arg :: (argument: string) -> bool #expand { return array_find(`args, argument); } is_arg :: (argument: ..string) -> bool #expand { for argument if array_find(`args, it) then return true; return false; } load_data_or_exit :: ($fp: string, $type: Type) -> *type { qtrace_init(fp); ok, data := read_from_disk_and_unmarshal(fp, type); if !ok then exit(1); assert(data != null); return data; } socket_log_error :: (loc := #caller_location) { err := get_last_socket_error(); if err == 0 return; print_proc :: #procedure_of_call print(""); print_err :: #bake_arguments print_proc(to_standard_error=true); log_error("%", loc); print_err("Error: "); if err == { case ENOENT; print_err("Cannot find Unix socket"); case EBADF; print_err("Socket corrupted"); case EINVAL; print_err("Socket corrupted"); case EFAULT; print_err("Invalid buffer ptr"); case EADDRINUSE; print_err("Socket in use"); case EPIPE; print_err("Broken pipe"); case EAGAIN; print_err("No data in pipe"); case EINTR; print_err("Got interrupted"); case EAFNOSUPPORT; print_err("Address family not supported"); case; log_error("Uncovered error: %", err); } print_err("\n"); } is_dev_machine :: () -> bool { shl_dev := to_string(getenv("SHL_DEVELOPER")); if shl_dev && shl_dev == "true" return true; return false; } qtrace_init :: #bake_arguments qtrace(color=.YELLOW); qtrace_live :: #bake_arguments qtrace(color=.GREEN); #scope_file libc :: #library,system "libc"; ARGS_HELP :: #string STR_END Options store Serializes the search index to disk human Prints the content of the search index to stderr line by line STR_END; handler_server :: (signal: int) #c_call { push_context { log("Received interrupt signal."); http_server_shutdown_clean(); reload_server_shutdown_clean(); } } th_start_reload_server :: (thread: *Thread) -> s64 { reload_run(); return 0; } th_start_search_server :: (thread: *Thread) -> s64 { http_server(); return 0; } sighandler_t :: #type (signal: int) -> void #c_call; signal :: (signal: int, handler: sighandler_t) -> void #foreign libc;