Logo

index : blog

---

  • summary
  • about
  • tree
  • log
  • branches
<< path: root/public/blog.git/html/src/search/http.jai blob: f9418864480c77c619ec746efeb35b57e4ebc7a0 [raw] [clear marker]

        
0
1
2/** NOTE: After a high load, the server opens multiple FDs and closes them
3 without serving them. See `bench/output/plot_418257_27374.png`
4
5 Maybe it's because those clients are outside `CLIENT_TIMEOUT`.
6
7 Anyway...
8
9 For this small project, it's fine to keep it as is.
10 For the bigger project, investigate!
11*/
12
13http_init :: (listen_port: u16) {
14 my_context = context;
15 my_context.logger = my_logger;
16
17 push_context,defer_pop my_context;
18
19 log("Server init");
20
21 table_ensure_space(*clients, EPOLL_MAX_EVENTS);
22
23 /** Setup socket */
24 socket_init();
25
26 server_s = socket(AF_INET, .STREAM, 0);
27
28 if server_s < 0 {
29 socket_log_error();
30 close_and_reset(*server_s);
31 exit(1);
32 }
33
34 opt: u32 = 1;
35 if setsockopt(server_s, SOL_SOCKET, SO_REUSEADDR, xx *opt, size_of(type_of(opt))) < 0 {
36 socket_log_error();
37 close_and_reset(*server_s);
38 exit(1);
39 }
40
41 if bind(server_s, LOCALHOST, listen_port) < 0 {
42 socket_log_error();
43 close_and_reset(*server_s);
44 exit(1);
45 }
46
47 if listen(server_s, BACKLOG) < 0 {
48 socket_log_error();
49 close_and_reset(*server_s);
50 exit(1);
51 }
52
53 log("Listening to %:%", LOCALHOST, listen_port);
54
55
56 /** Setup epoll */
57 epoll_fd = epoll_create1(EPOLL_CLOEXEC);
58
59 if epoll_fd < 0 {
60 socket_log_error();
61 close_and_reset(*server_s);
62 exit(1);
63 }
64
65 ev: epoll_event;
66 ev.events = EPOLLIN;
67 ev.data.fd = server_s;
68 epoll_ctl(epoll_fd, .ADD, server_s, *ev);
69}
70
71http_server :: () {
72 push_context,defer_pop my_context;
73
74 log("Server ready");
75
76 defer {
77 close_and_reset(*server_s);
78 log("Server shutdown gracefully");
79 }
80
81 buffer: [BUFFER_MAX_WITH_NULL_TERMINATOR]u8;
82
83 future_sweep: Apollo_Time;
84 clients_sweep_refresh_timer(current_time_monotonic(), *future_sweep);
85
86 /** Important!
87
88 Using the TS here only works because we're running with HTTP/1.0, where every
89 connection is closed as soon the request is served.
90
91 If we would use HTTP/1.1 or higher, TS would corrupt data.
92 So keep that in mind if upgrading this ever in the future.
93 */
94 push_allocator(temp);
95 while !is_server_offline {
96 reset_temporary_storage();
97
98 n := epoll_wait(epoll_fd, events.data, EPOLL_MAX_EVENTS, EPOLL_TIMEOUT_MS);
99 if n < 0 && !is_server_offline {
100 socket_log_error();
101 assert(false, "Epoll error");
102 }
103
104 now := current_time_monotonic();
105
106 /** Remove dead clients when the server has nothing to do.
107 Also refresh the sweep timer, so it does not sweep again after this.
108 */
109 if n == 0 {
110 log_dbg("Sweep @ downtime");
111 clients_sweep(now);
112 clients_sweep_refresh_timer(now, *future_sweep);
113 continue;
114 }
115
116 delta_client := to_apollo(CLIENT_TIMEOUT);
117 future_client := now + delta_client;
118
119 for i: 0..n-1 {
120 fd := events[i].data.fd;
121 event := events[i].events;
122
123 if event & (EPOLLHUP | EPOLLERR) {
124 client_remove(*fd);
125 continue;
126 }
127
128 if fd == server_s {
129 qtrace_live("Add new client to queue");
130 client_s, client_addr := accept(server_s);
131
132 if client_s < 0 {
133 socket_log_error();
134 assert(false, "Client Socket");
135 continue;
136 }
137
138 ok := fcntl(client_s, F_SETFD, FD_CLOEXEC);
139
140 if ok < 0 {
141 socket_log_error();
142 close_and_reset(*fd);
143 assert(false, "FCNTL");
144 continue;
145 }
146
147 set_nonblocking(client_s);
148
149 client_ev: epoll_event;
150 client_ev.events = EPOLLIN;
151 client_ev.data.fd = client_s;
152 ok = epoll_ctl(epoll_fd, .ADD, client_s, *client_ev);
153
154 if ok < 0 {
155 socket_log_error();
156 close_and_reset(*fd);
157 assert(false, "epoll ctl");
158 continue;
159 }
160
161 client_append(client_s, future_client);
162
163 #if IS_DEVELOPER {
164 ip_buf: [INET_ADDRSTRLEN]u8;
165 ip_ok := inet_ntop(
166 AF_INET, *client_addr.sin_addr, ip_buf.data, size_of(type_of(ip_buf))
167 );
168 assert(ip_ok != null, "IP Address conversion");
169
170 client_ip := string.{ INET_ADDRSTRLEN, ip_buf.data };
171
172 log("-- Client Connected ---");
173 log("FD: %", client_s);
174 log("Port: %", client_addr.sin_port);
175 log("IP: %", client_ip);
176 }
177 } else {
178 qtrace_live("Send response to client");
179 defer {
180 client_remove(*fd);
181 log_dbg("-- Client disconnected ---");
182 }
183
184 is_client_gone, request := client_loop(fd, *buffer);
185
186 if is_client_gone continue;
187 log_dbg("-- Request --\n%", request);
188 handle_request(fd, request);
189 }
190 }
191 // Remove dead clients every n seconds
192 if now > future_sweep {
193 log_dbg("Sweep @ Busy");
194 clients_sweep(now);
195 clients_sweep_refresh_timer(now, *future_sweep);
196 }
197 }
198}
199
200/** The neat part is, that we used `defer` to close FDs.
201 So we just need to end the server loop and everything is cleaned up!
202
203 And since we're using epoll & timeouts, no client can stall our shutdown. Neat!
204*/
205http_server_shutdown_clean :: () {
206 is_server_offline = true;
207}
208
209/** For testing / fuzzing */
210test__http_handle_request :: #bake_arguments handle_request(test_mode=true);
211
212
213#scope_file
214
215
216#import "Socket";
217#import "POSIX";
218#import "Linux";
219#import "Hash_Table";
220
221
222is_server_offline: bool;
223
224server_s: Socket;
225epoll_fd: FD;
226events: [EPOLL_MAX_EVENTS]epoll_event;
227clients: Table(FD, Apollo_Time);
228
229my_context: #Context;
230
231
232BACKLOG :: 128;
233
234HTTP_VERSION :: "1.0";
235EPOLL_MAX_EVENTS :: 512;
236EPOLL_TIMEOUT_MS :: 5000;
237EPOLL_TIMEOUT_BLOCK :: -1;
238CLIENT_TIMEOUT :: 2.0;
239CLIENT_SWEEP_TIMER :: 10.0;
240
241BUFFER_MAX_WITH_NULL_TERMINATOR :: 1024+1;
242BUFFER_MAX_NO_NULL_TERMINATOR :: 1024;
243
244/** Currently nothing bad happens if we not filter those characters,
245 but at the other hand it's a defensive strategy if I f'ed up at
246 some other place.
247*/
248CHAR_BLACKLIST :: string.[
249 "\\",
250 "/",
251 "%",
252];
253
254
255FD :: Socket;
256
257
258Http_Status_Code :: enum #specified {
259 OK :: 200;
260
261 // Client
262 BAD_REQUEST :: 400;
263 PAYMENT_REQUIRED :: 402;
264 NOT_FOUND :: 404;
265 REQUEST_TIMEOUT :: 408;
266 LENGTH_REQUIRED :: 411;
267 CONTENT_TOO_LARGE :: 413;
268 TEAPOT :: 418;
269
270 // Server
271 ERROR_INTERNAL :: 500;
272}
273
274Content_Type :: enum {
275 HTML;
276 PLAIN;
277 JSON;
278}
279
280
281/** Procs `write` from https://github.com/smari/jai-simplehttp/
282
283 We ignore broken pipes here and we also don't log it on purpose.
284 If someone sends us many requests with broken pipes, it'll flood our logs
285 and maybe degrade server performance.
286*/
287write :: inline (fd: Socket, msg: string) -> s64 {
288 return send(fd, cast(*void) msg.data, xx msg.count, .NOSIGNAL);
289}
290
291write :: inline (fd: Socket, buf: string, args: ..Any) -> s64 {
292 tmp := tprint(buf, ..args);
293 return send(fd, tmp.data, xx tmp.count, .NOSIGNAL);
294}
295
296client_append :: (fd: FD, ts: Apollo_Time) {
297 // Note: Overwrites FDs if they already exist
298 table_set(*clients, fd, ts);
299}
300
301client_remove_from_table :: (fd: FD) {
302 table_remove(*clients, fd);
303}
304
305client_remove :: (fd: *FD) {
306 epoll_close(fd.*);
307 client_remove_from_table(fd.*);
308 close_and_reset(fd);
309}
310
311clients_sweep :: (ts: Apollo_Time) {
312 if clients.count == 0 return;
313 for clients {
314 fd := it_index;
315 if fd > 0 {
316 if it < ts {
317 log_dbg("Disconnected zombie: %", fd);
318 client_remove(*fd);
319 }
320 }
321 }
322}
323
324clients_sweep_refresh_timer :: (now: Apollo_Time, future_sweep: *Apollo_Time) {
325 delta_sweep := to_apollo(CLIENT_SWEEP_TIMER);
326 future_sweep.* = now + delta_sweep;
327}
328
329client_loop :: (
330 socket: Socket,
331 buffer: *[BUFFER_MAX_WITH_NULL_TERMINATOR]u8
332)
333 -> (ok: bool, request: string)
334{
335 client_reset :: (message: string = "", loc := #caller_location) #expand {
336 log_error("-- Client Error -------");
337 if message log_error(message);
338 log_error("Socket: %", `socket);
339 socket_log_error(loc);
340 `is_client_gone = true;
341 }
342
343
344 is_client_gone: bool;
345 total_bytes: int;
346 n: int;
347
348 qtrace_live("Loop only");
349 while true {
350 n = recv(socket, xx buffer, BUFFER_MAX_NO_NULL_TERMINATOR, 0);
351
352 if n == 0 { client_reset("Client disconnected"); break; }
353 if n < 0 { client_reset(); break; }
354
355 total_bytes += n;
356
357 if total_bytes > BUFFER_MAX_NO_NULL_TERMINATOR {
358 chat_to_client(
359 socket, "Content too large :O\n", .CONTENT_TOO_LARGE, .PLAIN
360 );
361 client_reset("Content too big");
362 log_vars(n, total_bytes);
363 break;
364 }
365
366 if n > BUFFER_MAX_NO_NULL_TERMINATOR {
367 client_reset(
368 tprint("Client did weird shit. Buffer:\n%\n---------",
369 string.{ buffer.count, buffer.data }
370 )
371 );
372 log_vars(n, total_bytes);
373 break;
374 }
375
376 if buffer.*[0] == 0 {
377 chat_to_client(socket, "You're a teapot\n", .TEAPOT, .PLAIN);
378 client_reset(
379 tprint("Client did weird shit. Buffer:\n%\n---------",
380 string.{ buffer.count, buffer.data }
381 )
382 );
383 break;
384 }
385
386 if n != BUFFER_MAX_NO_NULL_TERMINATOR break;
387 }
388
389 assert(buffer.data != null, "Buffer, no data");
390 request := string.{ n, buffer.data };
391
392 return is_client_gone, copy_string(request);
393}
394
395chat_to_client :: (
396 client: Socket,
397 message: string,
398 http_code: Http_Status_Code,
399 content_type: Content_Type
400) {
401 write(client, get_status_code(http_code));
402 write(client, get_content_type(content_type));
403 write(client, "Content-Length: %\r\n", message.count);
404 write(client, "\r\n");
405 write(client, message);
406}
407
408get_status_code :: (status: Http_Status_Code) -> string {
409 message: string;
410
411 if #complete status == {
412 case .OK; message = "OK";
413 case .BAD_REQUEST; message = "Bad request";
414 case .PAYMENT_REQUIRED; message = "Payment required";
415 case .NOT_FOUND; message = "Not found";
416 case .REQUEST_TIMEOUT; message = "Request timeout";
417 case .LENGTH_REQUIRED; message = "Length required";
418 case .CONTENT_TOO_LARGE; message = "Content too large";
419 case .TEAPOT; message = "I'm a teapot";
420 case .ERROR_INTERNAL; message = "Internal Error";
421 case;
422 assert(false, "Status code not set");
423 }
424
425 buf: String_Builder;
426 append(*buf, "HTTP/");
427 append(*buf, HTTP_VERSION);
428 append(*buf, " ");
429 append(*buf, tprint("% ", cast(int, status)));
430 append(*buf, message);
431 append(*buf, "\r\n");
432 return builder_to_string(*buf);
433}
434
435get_content_type :: (content: Content_Type) -> string {
436 buf: String_Builder;
437
438 append(*buf, "Content-Type: ");
439
440 if #complete content == {
441 case .HTML; append(*buf, "text/html");
442 case .PLAIN; append(*buf, "text/plain");
443 case .JSON; append(*buf, "application/json");
444 case;
445 assert(false, "Content type not set");
446 }
447
448 append(*buf, "\r\n");
449
450 return builder_to_string(*buf);
451}
452
453url_decode :: (query: string) -> (ok: bool, decoded: string) {
454 out_length: s32;
455
456 /** Note(adam): Passing null is not officially supported,
457 so it might break in the future.
458
459 https://github.com/curl/curl/blob/d129ff355dde5698d7cfd090fdc4c1f05f376ad2/lib/escape.c#L167
460 */
461 unescaped := curl_easy_unescape(null, query.data, xx query.count, *out_length);
462 defer curl_free(unescaped);
463
464 decoded: string = ---;
465 decoded.data = unescaped;
466 decoded.count = out_length;
467
468 assert(decoded.data != null, "Url Decode blew up");
469 if decoded.data == null return false, "";
470 return true, copy_string(decoded);
471}
472
473
474/** A search request has this request shape:
475
476 GET /search?q=something HTTP/1.1\r\n
477 Accept-Encoding: <whatever>\r\n
478 Host: 127.0.0.1:8081\r\n
479
480 We only care about the first line, so we can omit the rest.
481 Plus we are only interested in GET requests.
482
483 And we have to handle this stuff too:
484
485 GET /search?q=test+123d%C3%B6ner HTTP/1.1
486
487*/
488
489/** NOTE: Currently this is the last proc inside the client loop, so
490 it does not matter if we return a `should_disconnect`. */
491
492handle_request :: (
493 socket: Socket, request: string, $test_mode := false
494)
495 -> (ok: bool = false)
496{
497 request_is_invalid_we_bail :: () #expand {
498 #if `test_mode `return;
499 chat_to_client(
500 `socket, "Invalid request, dude.", .BAD_REQUEST, .PLAIN
501 );
502 `return;
503 }
504
505 get_query :: (request: string) -> (found: bool = false, path: string = "") {
506 PATH_THING :: "/search?q=";
507
508 found, header := split_from_left(request, "\r\n\r\n");
509 if !found return;
510
511 found=, request_line := split_from_left(header, "\r\n");
512 if !found return;
513
514 /** 2. */
515 fields := split(request_line, " ");
516 if fields.count != 3 return;
517
518 /** 3. */
519 path := fields[1];
520 if !starts_with(path, PATH_THING) return;
521
522 query := slice(path, PATH_THING.count, path.count);
523 return true, query;
524 }
525
526 contains_invalid_char :: (query: string) -> ok: bool {
527 for CHAR_BLACKLIST if contains(query, it) return true;
528 return false;
529 }
530
531 /** Test Checklist
532
533 0. Max request length (already tested in the recv loop)
534 1. Begins with GET (which is with v1.0 the case ¹)
535 2. Request line has 3 items after splitting
536 3. Second item begins with /search?q=
537 4. Doesn't contain invalid chars from blacklist
538 5. Max query length (gets tested inside the search)
539
540 ---------------------------------------------------
541 ¹https://datatracker.ietf.org/doc/html/rfc7230#section-3.1.1
542 */
543
544 qtrace_live();
545
546 /** 1. */
547 if !starts_with(request, "GET") then request_is_invalid_we_bail();
548
549 /** 2. & 3. */
550 found, query := get_query(request);
551 if !found then request_is_invalid_we_bail();
552
553 ok, decoded_path := url_decode(query);
554 if !ok then request_is_invalid_we_bail();
555
556 /** 4. */
557 if contains_invalid_char(decoded_path) then request_is_invalid_we_bail();
558
559 final_query := replace(decoded_path, "+", " ");
560 log_dbg("Query: %", final_query);
561
562 lock(*mutex_data);
563 defer unlock(*mutex_data);
564
565 response, results_idx := search_run(search_index, final_query);
566 ok=, html := forge_response(response, results_idx, documents_html);
567 if !ok {
568 log_error("Shit response, sorry");
569 #if !test_mode
570 // TODO: maybe nicer error page?
571 then chat_to_client(socket, "Internal Error\n", .ERROR_INTERNAL, .PLAIN);
572 return true;
573 }
574
575 #if !test_mode then chat_to_client(socket, html, .OK, .HTML);
576 return true;
577}
578
579set_nonblocking :: (fd: FD) {
580 flags := fcntl(fd, F_GETFL, 0);
581 fcntl(fd, F_SETFL, flags | O_NONBLOCK);
582}
583
584epoll_close :: (fd: FD) {
585 epoll_ctl(epoll_fd, .DEL, fd, null);
586}
587
588log_dbg :: (s: string, args: ..Any, $to_stderr := false) {
589 #if IS_DEVELOPER {
590 #if to_stderr then log_error(s, ..args); else log(s, ..args);
591 }
592}
593
594/** Note: Hot Path: bench/graph_2026_04_22_01.png */
595forge_response :: (
596 status: Search_Response, entry_idx: []int, documents_ptr: *[]Entry
597)
598 -> (ok: bool, html: string)
599{
600 response_results :: () -> string #expand {
601 template :: #run template_read_or_exit("search_item");
602 variable :: "search_items";
603
604 data := NewArray(`entry_idx.count, []string);
605 for entry_idx {
606 doc := `documents[it];
607
608 published_cal := to_calendar(doc.published);
609 updated_cal := to_calendar(doc.updated);
610 published := make_date(published_cal);
611 updated := make_date(updated_cal);
612
613 search_item: [ENTRIES_MAX_FIELDS]string;
614 search_item[0] = doc.uri;
615 search_item[1] = doc.title;
616 search_item[2] = published;
617 search_item[3] = updated;
618 search_item[4] = doc.post;
619
620 data[it_index] = array_copy(search_item);
621 }
622 commit(*`actions, variable, data);
623 return template;
624 }
625
626 response_report_to_user :: (message: string, is_error: bool = true) -> string #expand {
627 template :: #run template_read_or_exit("search_response");
628 variable :: "response";
629
630 data := NewArray(2, string);
631 data[0] = ifx is_error then "error" else "";
632 data[1] = message;
633
634 commit(*`actions, variable, data);
635 return template;
636 }
637
638
639 actions: [..]Action;
640 template: string;
641 documents := documents_ptr.*;
642 website :: #run file_open_or_exit(FP_WWW_SEARCH_RESULTS);
643
644 if #complete status == {
645 case .OK;
646 template = response_results();
647 case .NO_RESULT;
648 template = response_report_to_user(SEARCH_RESPONSE_NO_RESULT, false);
649 case .ERROR;
650 template = response_report_to_user(SEARCH_RESPONSE_ERROR);
651 case .VALIDATION_TOO_LONG;
652 template = response_report_to_user(SEARCH_RESPONSE_VALIDATION_TOO_LONG);
653 case .VALIDATION_EMPTY;
654 template = response_report_to_user(SEARCH_RESPONSE_VALIDATION_EMPTY);
655 }
656
657 /** Note: Hot Path. If we really want to solve this, we could just
658 cache the request + response under some criterias and
659 give the cached item some TTL.
660
661 But for this project it's not worth it.
662 */
663 ok, html, _, error_message := generate(actions, template, .STRING);
664 if !ok {
665 log_error(error_message);
666 return false, SEARCH_INTERNAL_ERROR;
667 }
668
669 new_html := replace(website, PATTERN_EXTERN_ENTRY, html);
670 return true, new_html;
671}
672
673my_logger :: #bake_arguments base_logger(prefix="HTTP");
674
675
Copyright 2026  E766CB298A6D1E64 | Git-Thing heavily inspired by cgit