/** NOTE: After a high load, the server opens multiple FDs and closes them without serving them. See `bench/output/plot_418257_27374.png` Maybe it's because those clients are outside `CLIENT_TIMEOUT`. Anyway... For this small project, it's fine to keep it as is. For the bigger project, investigate! */ http_init :: (listen_port: u16) { my_context = context; my_context.logger = my_logger; push_context,defer_pop my_context; log("Server init"); table_ensure_space(*clients, EPOLL_MAX_EVENTS); /** Setup socket */ socket_init(); server_s = socket(AF_INET, .STREAM, 0); if server_s < 0 { socket_log_error(); close_and_reset(*server_s); exit(1); } opt: u32 = 1; if setsockopt(server_s, SOL_SOCKET, SO_REUSEADDR, xx *opt, size_of(type_of(opt))) < 0 { socket_log_error(); close_and_reset(*server_s); exit(1); } if bind(server_s, LOCALHOST, listen_port) < 0 { socket_log_error(); close_and_reset(*server_s); exit(1); } if listen(server_s, BACKLOG) < 0 { socket_log_error(); close_and_reset(*server_s); exit(1); } log("Listening to %:%", LOCALHOST, listen_port); /** Setup epoll */ epoll_fd = epoll_create1(EPOLL_CLOEXEC); if epoll_fd < 0 { socket_log_error(); close_and_reset(*server_s); exit(1); } ev: epoll_event; ev.events = EPOLLIN; ev.data.fd = server_s; epoll_ctl(epoll_fd, .ADD, server_s, *ev); } http_server :: () { push_context,defer_pop my_context; log("Server ready"); defer { close_and_reset(*server_s); log("Server shutdown gracefully"); } buffer: [BUFFER_MAX_WITH_NULL_TERMINATOR]u8; future_sweep: Apollo_Time; clients_sweep_refresh_timer(current_time_monotonic(), *future_sweep); /** Important! Using the TS here only works because we're running with HTTP/1.0, where every connection is closed as soon the request is served. If we would use HTTP/1.1 or higher, TS would corrupt data. So keep that in mind if upgrading this ever in the future. */ push_allocator(temp); while !is_server_offline { reset_temporary_storage(); n := epoll_wait(epoll_fd, events.data, EPOLL_MAX_EVENTS, EPOLL_TIMEOUT_MS); if n < 0 && !is_server_offline { socket_log_error(); assert(false, "Epoll error"); } now := current_time_monotonic(); /** Remove dead clients when the server has nothing to do. Also refresh the sweep timer, so it does not sweep again after this. */ if n == 0 { log_dbg("Sweep @ downtime"); clients_sweep(now); clients_sweep_refresh_timer(now, *future_sweep); continue; } delta_client := to_apollo(CLIENT_TIMEOUT); future_client := now + delta_client; for i: 0..n-1 { fd := events[i].data.fd; event := events[i].events; if event & (EPOLLHUP | EPOLLERR) { client_remove(*fd); continue; } if fd == server_s { qtrace_live("Add new client to queue"); client_s, client_addr := accept(server_s); if client_s < 0 { socket_log_error(); assert(false, "Client Socket"); continue; } ok := fcntl(client_s, F_SETFD, FD_CLOEXEC); if ok < 0 { socket_log_error(); close_and_reset(*fd); assert(false, "FCNTL"); continue; } set_nonblocking(client_s); client_ev: epoll_event; client_ev.events = EPOLLIN; client_ev.data.fd = client_s; ok = epoll_ctl(epoll_fd, .ADD, client_s, *client_ev); if ok < 0 { socket_log_error(); close_and_reset(*fd); assert(false, "epoll ctl"); continue; } client_append(client_s, future_client); #if IS_DEVELOPER { ip_buf: [INET_ADDRSTRLEN]u8; ip_ok := inet_ntop( AF_INET, *client_addr.sin_addr, ip_buf.data, size_of(type_of(ip_buf)) ); assert(ip_ok != null, "IP Address conversion"); client_ip := string.{ INET_ADDRSTRLEN, ip_buf.data }; log("-- Client Connected ---"); log("FD: %", client_s); log("Port: %", client_addr.sin_port); log("IP: %", client_ip); } } else { qtrace_live("Send response to client"); defer { client_remove(*fd); log_dbg("-- Client disconnected ---"); } is_client_gone, request := client_loop(fd, *buffer); if is_client_gone continue; log_dbg("-- Request --\n%", request); handle_request(fd, request); } } // Remove dead clients every n seconds if now > future_sweep { log_dbg("Sweep @ Busy"); clients_sweep(now); clients_sweep_refresh_timer(now, *future_sweep); } } } /** The neat part is, that we used `defer` to close FDs. So we just need to end the server loop and everything is cleaned up! And since we're using epoll & timeouts, no client can stall our shutdown. Neat! */ http_server_shutdown_clean :: () { is_server_offline = true; } /** For testing / fuzzing */ test__http_handle_request :: #bake_arguments handle_request(test_mode=true); #scope_file #import "Socket"; #import "POSIX"; #import "Linux"; #import "Hash_Table"; is_server_offline: bool; server_s: Socket; epoll_fd: FD; events: [EPOLL_MAX_EVENTS]epoll_event; clients: Table(FD, Apollo_Time); my_context: #Context; BACKLOG :: 128; HTTP_VERSION :: "1.0"; EPOLL_MAX_EVENTS :: 512; EPOLL_TIMEOUT_MS :: 5000; EPOLL_TIMEOUT_BLOCK :: -1; CLIENT_TIMEOUT :: 2.0; CLIENT_SWEEP_TIMER :: 10.0; BUFFER_MAX_WITH_NULL_TERMINATOR :: 1024+1; BUFFER_MAX_NO_NULL_TERMINATOR :: 1024; /** Currently nothing bad happens if we not filter those characters, but at the other hand it's a defensive strategy if I f'ed up at some other place. */ CHAR_BLACKLIST :: string.[ "\\", "/", "%", ]; FD :: Socket; Http_Status_Code :: enum #specified { OK :: 200; // Client BAD_REQUEST :: 400; PAYMENT_REQUIRED :: 402; NOT_FOUND :: 404; REQUEST_TIMEOUT :: 408; LENGTH_REQUIRED :: 411; CONTENT_TOO_LARGE :: 413; TEAPOT :: 418; // Server ERROR_INTERNAL :: 500; } Content_Type :: enum { HTML; PLAIN; JSON; } /** Procs `write` from https://github.com/smari/jai-simplehttp/ We ignore broken pipes here and we also don't log it on purpose. If someone sends us many requests with broken pipes, it'll flood our logs and maybe degrade server performance. */ write :: inline (fd: Socket, msg: string) -> s64 { return send(fd, cast(*void) msg.data, xx msg.count, .NOSIGNAL); } write :: inline (fd: Socket, buf: string, args: ..Any) -> s64 { tmp := tprint(buf, ..args); return send(fd, tmp.data, xx tmp.count, .NOSIGNAL); } client_append :: (fd: FD, ts: Apollo_Time) { // Note: Overwrites FDs if they already exist table_set(*clients, fd, ts); } client_remove_from_table :: (fd: FD) { table_remove(*clients, fd); } client_remove :: (fd: *FD) { epoll_close(fd.*); client_remove_from_table(fd.*); close_and_reset(fd); } clients_sweep :: (ts: Apollo_Time) { if clients.count == 0 return; for clients { fd := it_index; if fd > 0 { if it < ts { log_dbg("Disconnected zombie: %", fd); client_remove(*fd); } } } } clients_sweep_refresh_timer :: (now: Apollo_Time, future_sweep: *Apollo_Time) { delta_sweep := to_apollo(CLIENT_SWEEP_TIMER); future_sweep.* = now + delta_sweep; } client_loop :: ( socket: Socket, buffer: *[BUFFER_MAX_WITH_NULL_TERMINATOR]u8 ) -> (ok: bool, request: string) { client_reset :: (message: string = "", loc := #caller_location) #expand { log_error("-- Client Error -------"); if message log_error(message); log_error("Socket: %", `socket); socket_log_error(loc); `is_client_gone = true; } is_client_gone: bool; total_bytes: int; n: int; qtrace_live("Loop only"); while true { n = recv(socket, xx buffer, BUFFER_MAX_NO_NULL_TERMINATOR, 0); if n == 0 { client_reset("Client disconnected"); break; } if n < 0 { client_reset(); break; } total_bytes += n; if total_bytes > BUFFER_MAX_NO_NULL_TERMINATOR { chat_to_client( socket, "Content too large :O\n", .CONTENT_TOO_LARGE, .PLAIN ); client_reset("Content too big"); log_vars(n, total_bytes); break; } if n > BUFFER_MAX_NO_NULL_TERMINATOR { client_reset( tprint("Client did weird shit. Buffer:\n%\n---------", string.{ buffer.count, buffer.data } ) ); log_vars(n, total_bytes); break; } if buffer.*[0] == 0 { chat_to_client(socket, "You're a teapot\n", .TEAPOT, .PLAIN); client_reset( tprint("Client did weird shit. Buffer:\n%\n---------", string.{ buffer.count, buffer.data } ) ); break; } if n != BUFFER_MAX_NO_NULL_TERMINATOR break; } assert(buffer.data != null, "Buffer, no data"); request := string.{ n, buffer.data }; return is_client_gone, copy_string(request); } chat_to_client :: ( client: Socket, message: string, http_code: Http_Status_Code, content_type: Content_Type ) { write(client, get_status_code(http_code)); write(client, get_content_type(content_type)); write(client, "Content-Length: %\r\n", message.count); write(client, "\r\n"); write(client, message); } get_status_code :: (status: Http_Status_Code) -> string { message: string; if #complete status == { case .OK; message = "OK"; case .BAD_REQUEST; message = "Bad request"; case .PAYMENT_REQUIRED; message = "Payment required"; case .NOT_FOUND; message = "Not found"; case .REQUEST_TIMEOUT; message = "Request timeout"; case .LENGTH_REQUIRED; message = "Length required"; case .CONTENT_TOO_LARGE; message = "Content too large"; case .TEAPOT; message = "I'm a teapot"; case .ERROR_INTERNAL; message = "Internal Error"; case; assert(false, "Status code not set"); } buf: String_Builder; append(*buf, "HTTP/"); append(*buf, HTTP_VERSION); append(*buf, " "); append(*buf, tprint("% ", cast(int, status))); append(*buf, message); append(*buf, "\r\n"); return builder_to_string(*buf); } get_content_type :: (content: Content_Type) -> string { buf: String_Builder; append(*buf, "Content-Type: "); if #complete content == { case .HTML; append(*buf, "text/html"); case .PLAIN; append(*buf, "text/plain"); case .JSON; append(*buf, "application/json"); case; assert(false, "Content type not set"); } append(*buf, "\r\n"); return builder_to_string(*buf); } url_decode :: (query: string) -> (ok: bool, decoded: string) { out_length: s32; /** Note(adam): Passing null is not officially supported, so it might break in the future. https://github.com/curl/curl/blob/d129ff355dde5698d7cfd090fdc4c1f05f376ad2/lib/escape.c#L167 */ unescaped := curl_easy_unescape(null, query.data, xx query.count, *out_length); defer curl_free(unescaped); decoded: string = ---; decoded.data = unescaped; decoded.count = out_length; assert(decoded.data != null, "Url Decode blew up"); if decoded.data == null return false, ""; return true, copy_string(decoded); } /** A search request has this request shape: GET /search?q=something HTTP/1.1\r\n Accept-Encoding: \r\n Host: 127.0.0.1:8081\r\n We only care about the first line, so we can omit the rest. Plus we are only interested in GET requests. And we have to handle this stuff too: GET /search?q=test+123d%C3%B6ner HTTP/1.1 */ /** NOTE: Currently this is the last proc inside the client loop, so it does not matter if we return a `should_disconnect`. */ handle_request :: ( socket: Socket, request: string, $test_mode := false ) -> (ok: bool = false) { request_is_invalid_we_bail :: () #expand { #if `test_mode `return; chat_to_client( `socket, "Invalid request, dude.", .BAD_REQUEST, .PLAIN ); `return; } get_query :: (request: string) -> (found: bool = false, path: string = "") { PATH_THING :: "/search?q="; found, header := split_from_left(request, "\r\n\r\n"); if !found return; found=, request_line := split_from_left(header, "\r\n"); if !found return; /** 2. */ fields := split(request_line, " "); if fields.count != 3 return; /** 3. */ path := fields[1]; if !starts_with(path, PATH_THING) return; query := slice(path, PATH_THING.count, path.count); return true, query; } contains_invalid_char :: (query: string) -> ok: bool { for CHAR_BLACKLIST if contains(query, it) return true; return false; } /** Test Checklist 0. Max request length (already tested in the recv loop) 1. Begins with GET (which is with v1.0 the case ¹) 2. Request line has 3 items after splitting 3. Second item begins with /search?q= 4. Doesn't contain invalid chars from blacklist 5. Max query length (gets tested inside the search) --------------------------------------------------- ¹https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1 */ qtrace_live(); /** 1. */ if !starts_with(request, "GET") then request_is_invalid_we_bail(); /** 2. & 3. */ found, query := get_query(request); if !found then request_is_invalid_we_bail(); ok, decoded_path := url_decode(query); if !ok then request_is_invalid_we_bail(); /** 4. */ if contains_invalid_char(decoded_path) then request_is_invalid_we_bail(); final_query := replace(decoded_path, "+", " "); log_dbg("Query: %", final_query); lock(*mutex_data); defer unlock(*mutex_data); response, results_idx := search_run(search_index, final_query); ok=, html := forge_response(response, results_idx, documents_html); if !ok { log_error("Shit response, sorry"); #if !test_mode // TODO: maybe nicer error page? then chat_to_client(socket, "Internal Error\n", .ERROR_INTERNAL, .PLAIN); return true; } #if !test_mode then chat_to_client(socket, html, .OK, .HTML); return true; } set_nonblocking :: (fd: FD) { flags := fcntl(fd, F_GETFL, 0); fcntl(fd, F_SETFL, flags | O_NONBLOCK); } epoll_close :: (fd: FD) { epoll_ctl(epoll_fd, .DEL, fd, null); } log_dbg :: (s: string, args: ..Any, $to_stderr := false) { #if IS_DEVELOPER { #if to_stderr then log_error(s, ..args); else log(s, ..args); } } /** Note: Hot Path: bench/graph_2026_04_22_01.png */ forge_response :: ( status: Search_Response, entry_idx: []int, documents_ptr: *[]Entry ) -> (ok: bool, html: string) { response_results :: () -> string #expand { template :: #run template_read_or_exit("search_item"); variable :: "search_items"; data := NewArray(`entry_idx.count, []string); for entry_idx { doc := `documents[it]; published_cal := to_calendar(doc.published); updated_cal := to_calendar(doc.updated); published := make_date(published_cal); updated := make_date(updated_cal); search_item: [ENTRIES_MAX_FIELDS]string; search_item[0] = doc.uri; search_item[1] = doc.title; search_item[2] = published; search_item[3] = updated; search_item[4] = doc.post; data[it_index] = array_copy(search_item); } commit(*`actions, variable, data); return template; } response_report_to_user :: (message: string, is_error: bool = true) -> string #expand { template :: #run template_read_or_exit("search_response"); variable :: "response"; data := NewArray(2, string); data[0] = ifx is_error then "error" else ""; data[1] = message; commit(*`actions, variable, data); return template; } actions: [..]Action; template: string; documents := documents_ptr.*; website :: #run file_open_or_exit(FP_WWW_SEARCH_RESULTS); if #complete status == { case .OK; template = response_results(); case .NO_RESULT; template = response_report_to_user(SEARCH_RESPONSE_NO_RESULT, false); case .ERROR; template = response_report_to_user(SEARCH_RESPONSE_ERROR); case .VALIDATION_TOO_LONG; template = response_report_to_user(SEARCH_RESPONSE_VALIDATION_TOO_LONG); case .VALIDATION_EMPTY; template = response_report_to_user(SEARCH_RESPONSE_VALIDATION_EMPTY); } /** Note: Hot Path. If we really want to solve this, we could just cache the request + response under some criterias and give the cached item some TTL. But for this project it's not worth it. */ ok, html, _, error_message := generate(actions, template, .STRING); if !ok { log_error(error_message); return false, SEARCH_INTERNAL_ERROR; } new_html := replace(website, PATTERN_EXTERN_ENTRY, html); return true, new_html; } my_logger :: #bake_arguments base_logger(prefix="HTTP");