Logo

index : blog

---

  • summary
  • about
  • tree
  • log
  • branches
<< path: root/public/blog.git/html/fuzzer/generate_corpus.py blob: c670ae0cc3e217a922665c8670055b55f796b8b0 [raw] [clear marker]

        
0#!/bin/python
1
2from pathlib import Path
3from sys import exit, argv
4
5
6
7def write_to_disk(fn, data):
8 try:
9 with open(fn, "w+", encoding="utf8") as f:
10 f.write(data)
11 except Exception as e:
12 print(e)
13 exit(1)
14
15
16def corpus_generate(corpus_data, path):
17 for idx, string in enumerate(corpus_data):
18 padded = f"{idx+1:02d}"
19 fp = path / Path(padded)
20 print(f"Stored: {fp}")
21 write_to_disk(fp, string)
22
23
24def config_read(path):
25 if not path.exists():
26 print("Cannot find config file in:", path)
27 exit(1)
28
29 with open(path, "r", encoding="utf8") as f:
30 config = f.readlines()
31
32 if not config:
33 print("Could not read config")
34 exit(1)
35
36 if len(config) <= 2:
37 print("Config is missing corpus data")
38 exit(1)
39
40 if not config[0].startswith("CFG"):
41 print("Invalid protocol byte. Missing 'CFG'")
42 exit(1)
43
44 if not config[1] == "\n":
45 print("Invalid protocol. Missing newline on second line")
46 exit(1)
47
48 header = config[0].split(" ")
49
50 if len(header) != 2:
51 print("Invalid header")
52 exit(1)
53
54 path = Path(header[1].strip())
55 path.mkdir(parents=True, exist_ok=True)
56
57 corpus_data = config[2:]
58 corpus_generate(corpus_data, path)
59
60
61def is_arg(args, *matches):
62 for match in matches:
63 if match in args:
64 return True
65 return False
66
67def print_usage():
68 print("Usage: generate_copus.py [PATH TO CORPUS CONFIG]")
69
70
71def main():
72 args = argv
73
74 if is_arg("help", "-h", "-help", "--help"):
75 print_usage()
76 elif len(args) > 1:
77 paths = args[1:]
78 for path in paths:
79 fp = Path(path.strip())
80 config_read(fp)
81 else:
82 print_usage()
83
84
85if __name__ == "__main__":
86 main()
87
88
Copyright 2026  E766CB298A6D1E64 | Git-Thing heavily inspired by cgit