Ticket #35440: bench_mark.py

File bench_mark.py, 3.2 KB (added by Pravin, 9 days ago)

This small adjustment i made.

Line 
1import timeit
2from urllib.parse import unquote
3
4MAX_HEADER_LENGTH = 10_000
5
6def _parseparam(s):
7 while s[:1] == ";":
8 s = s[1:]
9 end = s.find(";")
10 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
11 end = s.find(";", end + 1)
12 if end < 0:
13 end = len(s)
14 f = s[:end]
15 yield f.strip()
16 s = s[end:]
17
18def original_parse_header_parameters(line, max_length=MAX_HEADER_LENGTH):
19 """
20 Parse a Content-type like header.
21 Return the main content-type and a dictionary of options.
22
23 If `line` is longer than `max_length`, `ValueError` is raised.
24 """
25 if not line:
26 return "", {}
27
28 if max_length is not None and len(line) > max_length:
29 raise ValueError("Unable to parse header parameters (value too long).")
30
31 parts = _parseparam(";" + line)
32 key = parts.__next__().lower()
33 pdict = {}
34 for p in parts:
35 i = p.find("=")
36 if i >= 0:
37 has_encoding = False
38 name = p[:i].strip().lower()
39 if name.endswith("*"):
40 # Embedded lang/encoding, like "filename*=UTF-8''file.ext".
41 # https://tools.ietf.org/html/rfc2231#section-4
42 name = name[:-1]
43 if p.count("'") == 2:
44 has_encoding = True
45 value = p[i + 1 :].strip()
46 if len(value) >= 2 and value[0] == value[-1] == '"':
47 value = value[1:-1]
48 value = value.replace("\\\\", "\\").replace('\\"', '"')
49 if has_encoding:
50 encoding, lang, value = value.split("'")
51 value = unquote(value, encoding=encoding)
52 pdict[name] = value
53 return key, pdict
54
55def optimized_parse_header_parameters(line, max_length=MAX_HEADER_LENGTH):
56 if not line:
57 return "", {}
58
59 if max_length is not None and len(line) > max_length:
60 raise ValueError("Unable to parse header parameters (value too long).")
61
62 if ";" not in line:
63 return line.lower().strip(), {}
64
65 if '"' not in line and "*" not in line and "\\" not in line:
66 parts = line.split(";")
67 key = parts[0].lower().strip()
68 pdict = {}
69 for p in parts[1:]:
70 if "=" in p:
71 name, value = p.split("=", 1)
72 name = name.strip().lower()
73 if name:
74 pdict[name] = value.strip()
75 return key, pdict
76
77 return original_parse_header_parameters(line, max_length)
78
79test_cases = {
80 "Simple (No Params)": "text/plain",
81 "Standard (Charset)": "text/plain; charset=utf-8",
82 "Multi-Param": "text/plain; charset=utf-8; boundary=something",
83 "Complex (Quotes/RFC2231)": 'attachment; filename="strange;name"; title*=UTF-8\'\'foo-%c3%a4.html'
84}
85
86ITERATIONS = 50_000
87
88print(f"{'Test Case':<30} | {'Original':<10} | {'Optimized':<10} | {'Improvement'}")
89print("-" * 75)
90
91for name, header in test_cases.items():
92 t_orig = timeit.timeit(lambda: original_parse_header_parameters(header), number=ITERATIONS)
93 t_opt = timeit.timeit(lambda: optimized_parse_header_parameters(header), number=ITERATIONS)
94
95 diff = ((t_orig - t_opt) / t_orig) * 100
96 print(f"{name:<30} | {t_orig:>8.4f}s | {t_opt:>9.4f}s | {diff:>10.2f}%")
Back to Top