| 1 | import timeit
|
|---|
| 2 | from urllib.parse import unquote
|
|---|
| 3 |
|
|---|
| 4 | MAX_HEADER_LENGTH = 10_000
|
|---|
| 5 |
|
|---|
| 6 | def _parseparam(s):
|
|---|
| 7 | while s[:1] == ";":
|
|---|
| 8 | s = s[1:]
|
|---|
| 9 | end = s.find(";")
|
|---|
| 10 | while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
|
|---|
| 11 | end = s.find(";", end + 1)
|
|---|
| 12 | if end < 0:
|
|---|
| 13 | end = len(s)
|
|---|
| 14 | f = s[:end]
|
|---|
| 15 | yield f.strip()
|
|---|
| 16 | s = s[end:]
|
|---|
| 17 |
|
|---|
| 18 | def original_parse_header_parameters(line, max_length=MAX_HEADER_LENGTH):
|
|---|
| 19 | """
|
|---|
| 20 | Parse a Content-type like header.
|
|---|
| 21 | Return the main content-type and a dictionary of options.
|
|---|
| 22 |
|
|---|
| 23 | If `line` is longer than `max_length`, `ValueError` is raised.
|
|---|
| 24 | """
|
|---|
| 25 | if not line:
|
|---|
| 26 | return "", {}
|
|---|
| 27 |
|
|---|
| 28 | if max_length is not None and len(line) > max_length:
|
|---|
| 29 | raise ValueError("Unable to parse header parameters (value too long).")
|
|---|
| 30 |
|
|---|
| 31 | parts = _parseparam(";" + line)
|
|---|
| 32 | key = parts.__next__().lower()
|
|---|
| 33 | pdict = {}
|
|---|
| 34 | for p in parts:
|
|---|
| 35 | i = p.find("=")
|
|---|
| 36 | if i >= 0:
|
|---|
| 37 | has_encoding = False
|
|---|
| 38 | name = p[:i].strip().lower()
|
|---|
| 39 | if name.endswith("*"):
|
|---|
| 40 | # Embedded lang/encoding, like "filename*=UTF-8''file.ext".
|
|---|
| 41 | # https://tools.ietf.org/html/rfc2231#section-4
|
|---|
| 42 | name = name[:-1]
|
|---|
| 43 | if p.count("'") == 2:
|
|---|
| 44 | has_encoding = True
|
|---|
| 45 | value = p[i + 1 :].strip()
|
|---|
| 46 | if len(value) >= 2 and value[0] == value[-1] == '"':
|
|---|
| 47 | value = value[1:-1]
|
|---|
| 48 | value = value.replace("\\\\", "\\").replace('\\"', '"')
|
|---|
| 49 | if has_encoding:
|
|---|
| 50 | encoding, lang, value = value.split("'")
|
|---|
| 51 | value = unquote(value, encoding=encoding)
|
|---|
| 52 | pdict[name] = value
|
|---|
| 53 | return key, pdict
|
|---|
| 54 |
|
|---|
| 55 | def optimized_parse_header_parameters(line, max_length=MAX_HEADER_LENGTH):
|
|---|
| 56 | if not line:
|
|---|
| 57 | return "", {}
|
|---|
| 58 |
|
|---|
| 59 | if max_length is not None and len(line) > max_length:
|
|---|
| 60 | raise ValueError("Unable to parse header parameters (value too long).")
|
|---|
| 61 |
|
|---|
| 62 | if ";" not in line:
|
|---|
| 63 | return line.lower().strip(), {}
|
|---|
| 64 |
|
|---|
| 65 | if '"' not in line and "*" not in line and "\\" not in line:
|
|---|
| 66 | parts = line.split(";")
|
|---|
| 67 | key = parts[0].lower().strip()
|
|---|
| 68 | pdict = {}
|
|---|
| 69 | for p in parts[1:]:
|
|---|
| 70 | if "=" in p:
|
|---|
| 71 | name, value = p.split("=", 1)
|
|---|
| 72 | name = name.strip().lower()
|
|---|
| 73 | if name:
|
|---|
| 74 | pdict[name] = value.strip()
|
|---|
| 75 | return key, pdict
|
|---|
| 76 |
|
|---|
| 77 | return original_parse_header_parameters(line, max_length)
|
|---|
| 78 |
|
|---|
| 79 | test_cases = {
|
|---|
| 80 | "Simple (No Params)": "text/plain",
|
|---|
| 81 | "Standard (Charset)": "text/plain; charset=utf-8",
|
|---|
| 82 | "Multi-Param": "text/plain; charset=utf-8; boundary=something",
|
|---|
| 83 | "Complex (Quotes/RFC2231)": 'attachment; filename="strange;name"; title*=UTF-8\'\'foo-%c3%a4.html'
|
|---|
| 84 | }
|
|---|
| 85 |
|
|---|
| 86 | ITERATIONS = 50_000
|
|---|
| 87 |
|
|---|
| 88 | print(f"{'Test Case':<30} | {'Original':<10} | {'Optimized':<10} | {'Improvement'}")
|
|---|
| 89 | print("-" * 75)
|
|---|
| 90 |
|
|---|
| 91 | for name, header in test_cases.items():
|
|---|
| 92 | t_orig = timeit.timeit(lambda: original_parse_header_parameters(header), number=ITERATIONS)
|
|---|
| 93 | t_opt = timeit.timeit(lambda: optimized_parse_header_parameters(header), number=ITERATIONS)
|
|---|
| 94 |
|
|---|
| 95 | diff = ((t_orig - t_opt) / t_orig) * 100
|
|---|
| 96 | print(f"{name:<30} | {t_orig:>8.4f}s | {t_opt:>9.4f}s | {diff:>10.2f}%")
|
|---|