<<
path:
root/public/blog.git/html/src/search/http.jai
blob: f9418864480c77c619ec746efeb35b57e4ebc7a0
[raw]
[clear marker]
2/** NOTE: After a high load, the server opens multiple FDs and closes them
3 without serving them. See `bench/output/plot_418257_27374.png`
5 Maybe it's because those clients are outside `CLIENT_TIMEOUT`.
9 For this small project, it's fine to keep it as is.
10 For the bigger project, investigate!
13http_init :: (listen_port: u16) {
15 my_context.logger = my_logger;
17 push_context,defer_pop my_context;
21 table_ensure_space(*clients, EPOLL_MAX_EVENTS);
26 server_s = socket(AF_INET, .STREAM, 0);
30 close_and_reset(*server_s);
35 if setsockopt(server_s, SOL_SOCKET, SO_REUSEADDR, xx *opt, size_of(type_of(opt))) < 0 {
37 close_and_reset(*server_s);
41 if bind(server_s, LOCALHOST, listen_port) < 0 {
43 close_and_reset(*server_s);
47 if listen(server_s, BACKLOG) < 0 {
49 close_and_reset(*server_s);
53 log("Listening to %:%", LOCALHOST, listen_port);
57 epoll_fd = epoll_create1(EPOLL_CLOEXEC);
61 close_and_reset(*server_s);
67 ev.data.fd = server_s;
68 epoll_ctl(epoll_fd, .ADD, server_s, *ev);
72 push_context,defer_pop my_context;
77 close_and_reset(*server_s);
78 log("Server shutdown gracefully");
81 buffer: [BUFFER_MAX_WITH_NULL_TERMINATOR]u8;
83 future_sweep: Apollo_Time;
84 clients_sweep_refresh_timer(current_time_monotonic(), *future_sweep);
88 Using the TS here only works because we're running with HTTP/1.0, where every
89 connection is closed as soon the request is served.
91 If we would use HTTP/1.1 or higher, TS would corrupt data.
92 So keep that in mind if upgrading this ever in the future.
95 while !is_server_offline {
96 reset_temporary_storage();
98 n := epoll_wait(epoll_fd, events.data, EPOLL_MAX_EVENTS, EPOLL_TIMEOUT_MS);
99 if n < 0 && !is_server_offline {
101 assert(false, "Epoll error");
104 now := current_time_monotonic();
106 /** Remove dead clients when the server has nothing to do.
107 Also refresh the sweep timer, so it does not sweep again after this.
110 log_dbg("Sweep @ downtime");
112 clients_sweep_refresh_timer(now, *future_sweep);
116 delta_client := to_apollo(CLIENT_TIMEOUT);
117 future_client := now + delta_client;
120 fd := events[i].data.fd;
121 event := events[i].events;
123 if event & (EPOLLHUP | EPOLLERR) {
129 qtrace_live("Add new client to queue");
130 client_s, client_addr := accept(server_s);
134 assert(false, "Client Socket");
138 ok := fcntl(client_s, F_SETFD, FD_CLOEXEC);
142 close_and_reset(*fd);
143 assert(false, "FCNTL");
147 set_nonblocking(client_s);
149 client_ev: epoll_event;
150 client_ev.events = EPOLLIN;
151 client_ev.data.fd = client_s;
152 ok = epoll_ctl(epoll_fd, .ADD, client_s, *client_ev);
156 close_and_reset(*fd);
157 assert(false, "epoll ctl");
161 client_append(client_s, future_client);
164 ip_buf: [INET_ADDRSTRLEN]u8;
166 AF_INET, *client_addr.sin_addr, ip_buf.data, size_of(type_of(ip_buf))
168 assert(ip_ok != null, "IP Address conversion");
170 client_ip := string.{ INET_ADDRSTRLEN, ip_buf.data };
172 log("-- Client Connected ---");
173 log("FD: %", client_s);
174 log("Port: %", client_addr.sin_port);
175 log("IP: %", client_ip);
178 qtrace_live("Send response to client");
181 log_dbg("-- Client disconnected ---");
184 is_client_gone, request := client_loop(fd, *buffer);
186 if is_client_gone continue;
187 log_dbg("-- Request --\n%", request);
188 handle_request(fd, request);
191 // Remove dead clients every n seconds
192 if now > future_sweep {
193 log_dbg("Sweep @ Busy");
195 clients_sweep_refresh_timer(now, *future_sweep);
200/** The neat part is, that we used `defer` to close FDs.
201 So we just need to end the server loop and everything is cleaned up!
203 And since we're using epoll & timeouts, no client can stall our shutdown. Neat!
205http_server_shutdown_clean :: () {
206 is_server_offline = true;
209/** For testing / fuzzing */
210test__http_handle_request :: #bake_arguments handle_request(test_mode=true);
222is_server_offline: bool;
226events: [EPOLL_MAX_EVENTS]epoll_event;
227clients: Table(FD, Apollo_Time);
234HTTP_VERSION :: "1.0";
235EPOLL_MAX_EVENTS :: 512;
236EPOLL_TIMEOUT_MS :: 5000;
237EPOLL_TIMEOUT_BLOCK :: -1;
238CLIENT_TIMEOUT :: 2.0;
239CLIENT_SWEEP_TIMER :: 10.0;
241BUFFER_MAX_WITH_NULL_TERMINATOR :: 1024+1;
242BUFFER_MAX_NO_NULL_TERMINATOR :: 1024;
244/** Currently nothing bad happens if we not filter those characters,
245 but at the other hand it's a defensive strategy if I f'ed up at
248CHAR_BLACKLIST :: string.[
258Http_Status_Code :: enum #specified {
263 PAYMENT_REQUIRED :: 402;
265 REQUEST_TIMEOUT :: 408;
266 LENGTH_REQUIRED :: 411;
267 CONTENT_TOO_LARGE :: 413;
271 ERROR_INTERNAL :: 500;
274Content_Type :: enum {
281/** Procs `write` from https://github.com/smari/jai-simplehttp/
283 We ignore broken pipes here and we also don't log it on purpose.
284 If someone sends us many requests with broken pipes, it'll flood our logs
285 and maybe degrade server performance.
287write :: inline (fd: Socket, msg: string) -> s64 {
288 return send(fd, cast(*void) msg.data, xx msg.count, .NOSIGNAL);
291write :: inline (fd: Socket, buf: string, args: ..Any) -> s64 {
292 tmp := tprint(buf, ..args);
293 return send(fd, tmp.data, xx tmp.count, .NOSIGNAL);
296client_append :: (fd: FD, ts: Apollo_Time) {
297 // Note: Overwrites FDs if they already exist
298 table_set(*clients, fd, ts);
301client_remove_from_table :: (fd: FD) {
302 table_remove(*clients, fd);
305client_remove :: (fd: *FD) {
307 client_remove_from_table(fd.*);
311clients_sweep :: (ts: Apollo_Time) {
312 if clients.count == 0 return;
317 log_dbg("Disconnected zombie: %", fd);
324clients_sweep_refresh_timer :: (now: Apollo_Time, future_sweep: *Apollo_Time) {
325 delta_sweep := to_apollo(CLIENT_SWEEP_TIMER);
326 future_sweep.* = now + delta_sweep;
331 buffer: *[BUFFER_MAX_WITH_NULL_TERMINATOR]u8
333 -> (ok: bool, request: string)
335 client_reset :: (message: string = "", loc := #caller_location) #expand {
336 log_error("-- Client Error -------");
337 if message log_error(message);
338 log_error("Socket: %", `socket);
339 socket_log_error(loc);
340 `is_client_gone = true;
344 is_client_gone: bool;
348 qtrace_live("Loop only");
350 n = recv(socket, xx buffer, BUFFER_MAX_NO_NULL_TERMINATOR, 0);
352 if n == 0 { client_reset("Client disconnected"); break; }
353 if n < 0 { client_reset(); break; }
357 if total_bytes > BUFFER_MAX_NO_NULL_TERMINATOR {
359 socket, "Content too large :O\n", .CONTENT_TOO_LARGE, .PLAIN
361 client_reset("Content too big");
362 log_vars(n, total_bytes);
366 if n > BUFFER_MAX_NO_NULL_TERMINATOR {
368 tprint("Client did weird shit. Buffer:\n%\n---------",
369 string.{ buffer.count, buffer.data }
372 log_vars(n, total_bytes);
376 if buffer.*[0] == 0 {
377 chat_to_client(socket, "You're a teapot\n", .TEAPOT, .PLAIN);
379 tprint("Client did weird shit. Buffer:\n%\n---------",
380 string.{ buffer.count, buffer.data }
386 if n != BUFFER_MAX_NO_NULL_TERMINATOR break;
389 assert(buffer.data != null, "Buffer, no data");
390 request := string.{ n, buffer.data };
392 return is_client_gone, copy_string(request);
398 http_code: Http_Status_Code,
399 content_type: Content_Type
401 write(client, get_status_code(http_code));
402 write(client, get_content_type(content_type));
403 write(client, "Content-Length: %\r\n", message.count);
404 write(client, "\r\n");
405 write(client, message);
408get_status_code :: (status: Http_Status_Code) -> string {
411 if #complete status == {
412 case .OK; message = "OK";
413 case .BAD_REQUEST; message = "Bad request";
414 case .PAYMENT_REQUIRED; message = "Payment required";
415 case .NOT_FOUND; message = "Not found";
416 case .REQUEST_TIMEOUT; message = "Request timeout";
417 case .LENGTH_REQUIRED; message = "Length required";
418 case .CONTENT_TOO_LARGE; message = "Content too large";
419 case .TEAPOT; message = "I'm a teapot";
420 case .ERROR_INTERNAL; message = "Internal Error";
422 assert(false, "Status code not set");
426 append(*buf, "HTTP/");
427 append(*buf, HTTP_VERSION);
429 append(*buf, tprint("% ", cast(int, status)));
430 append(*buf, message);
431 append(*buf, "\r\n");
432 return builder_to_string(*buf);
435get_content_type :: (content: Content_Type) -> string {
438 append(*buf, "Content-Type: ");
440 if #complete content == {
441 case .HTML; append(*buf, "text/html");
442 case .PLAIN; append(*buf, "text/plain");
443 case .JSON; append(*buf, "application/json");
445 assert(false, "Content type not set");
448 append(*buf, "\r\n");
450 return builder_to_string(*buf);
453url_decode :: (query: string) -> (ok: bool, decoded: string) {
456 /** Note(adam): Passing null is not officially supported,
457 so it might break in the future.
459 https://github.com/curl/curl/blob/d129ff355dde5698d7cfd090fdc4c1f05f376ad2/lib/escape.c#L167
461 unescaped := curl_easy_unescape(null, query.data, xx query.count, *out_length);
462 defer curl_free(unescaped);
464 decoded: string = ---;
465 decoded.data = unescaped;
466 decoded.count = out_length;
468 assert(decoded.data != null, "Url Decode blew up");
469 if decoded.data == null return false, "";
470 return true, copy_string(decoded);
474/** A search request has this request shape:
476 GET /search?q=something HTTP/1.1\r\n
477 Accept-Encoding: <whatever>\r\n
478 Host: 127.0.0.1:8081\r\n
480 We only care about the first line, so we can omit the rest.
481 Plus we are only interested in GET requests.
483 And we have to handle this stuff too:
485 GET /search?q=test+123d%C3%B6ner HTTP/1.1
489/** NOTE: Currently this is the last proc inside the client loop, so
490 it does not matter if we return a `should_disconnect`. */
493 socket: Socket, request: string, $test_mode := false
495 -> (ok: bool = false)
497 request_is_invalid_we_bail :: () #expand {
498 #if `test_mode `return;
500 `socket, "Invalid request, dude.", .BAD_REQUEST, .PLAIN
505 get_query :: (request: string) -> (found: bool = false, path: string = "") {
506 PATH_THING :: "/search?q=";
508 found, header := split_from_left(request, "\r\n\r\n");
511 found=, request_line := split_from_left(header, "\r\n");
515 fields := split(request_line, " ");
516 if fields.count != 3 return;
520 if !starts_with(path, PATH_THING) return;
522 query := slice(path, PATH_THING.count, path.count);
526 contains_invalid_char :: (query: string) -> ok: bool {
527 for CHAR_BLACKLIST if contains(query, it) return true;
533 0. Max request length (already tested in the recv loop)
534 1. Begins with GET (which is with v1.0 the case ¹)
535 2. Request line has 3 items after splitting
536 3. Second item begins with /search?q=
537 4. Doesn't contain invalid chars from blacklist
538 5. Max query length (gets tested inside the search)
540 ---------------------------------------------------
541 ¹https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1
547 if !starts_with(request, "GET") then request_is_invalid_we_bail();
550 found, query := get_query(request);
551 if !found then request_is_invalid_we_bail();
553 ok, decoded_path := url_decode(query);
554 if !ok then request_is_invalid_we_bail();
557 if contains_invalid_char(decoded_path) then request_is_invalid_we_bail();
559 final_query := replace(decoded_path, "+", " ");
560 log_dbg("Query: %", final_query);
563 defer unlock(*mutex_data);
565 response, results_idx := search_run(search_index, final_query);
566 ok=, html := forge_response(response, results_idx, documents_html);
568 log_error("Shit response, sorry");
570 // TODO: maybe nicer error page?
571 then chat_to_client(socket, "Internal Error\n", .ERROR_INTERNAL, .PLAIN);
575 #if !test_mode then chat_to_client(socket, html, .OK, .HTML);
579set_nonblocking :: (fd: FD) {
580 flags := fcntl(fd, F_GETFL, 0);
581 fcntl(fd, F_SETFL, flags | O_NONBLOCK);
584epoll_close :: (fd: FD) {
585 epoll_ctl(epoll_fd, .DEL, fd, null);
588log_dbg :: (s: string, args: ..Any, $to_stderr := false) {
590 #if to_stderr then log_error(s, ..args); else log(s, ..args);
594/** Note: Hot Path: bench/graph_2026_04_22_01.png */
596 status: Search_Response, entry_idx: []int, documents_ptr: *[]Entry
598 -> (ok: bool, html: string)
600 response_results :: () -> string #expand {
601 template :: #run template_read_or_exit("search_item");
602 variable :: "search_items";
604 data := NewArray(`entry_idx.count, []string);
606 doc := `documents[it];
608 published_cal := to_calendar(doc.published);
609 updated_cal := to_calendar(doc.updated);
610 published := make_date(published_cal);
611 updated := make_date(updated_cal);
613 search_item: [ENTRIES_MAX_FIELDS]string;
614 search_item[0] = doc.uri;
615 search_item[1] = doc.title;
616 search_item[2] = published;
617 search_item[3] = updated;
618 search_item[4] = doc.post;
620 data[it_index] = array_copy(search_item);
622 commit(*`actions, variable, data);
626 response_report_to_user :: (message: string, is_error: bool = true) -> string #expand {
627 template :: #run template_read_or_exit("search_response");
628 variable :: "response";
630 data := NewArray(2, string);
631 data[0] = ifx is_error then "error" else "";
634 commit(*`actions, variable, data);
641 documents := documents_ptr.*;
642 website :: #run file_open_or_exit(FP_WWW_SEARCH_RESULTS);
644 if #complete status == {
646 template = response_results();
648 template = response_report_to_user(SEARCH_RESPONSE_NO_RESULT, false);
650 template = response_report_to_user(SEARCH_RESPONSE_ERROR);
651 case .VALIDATION_TOO_LONG;
652 template = response_report_to_user(SEARCH_RESPONSE_VALIDATION_TOO_LONG);
653 case .VALIDATION_EMPTY;
654 template = response_report_to_user(SEARCH_RESPONSE_VALIDATION_EMPTY);
657 /** Note: Hot Path. If we really want to solve this, we could just
658 cache the request + response under some criterias and
659 give the cached item some TTL.
661 But for this project it's not worth it.
663 ok, html, _, error_message := generate(actions, template, .STRING);
665 log_error(error_message);
666 return false, SEARCH_INTERNAL_ERROR;
669 new_html := replace(website, PATTERN_EXTERN_ENTRY, html);
670 return true, new_html;
673my_logger :: #bake_arguments base_logger(prefix="HTTP");