diff --git a/django/utils/html.py b/django/utils/html.py
index 650e848..21992dc 100644
|
a
|
b
|
from django.utils.functional import allow_lazy
|
| 16 | 16 | from django.utils import six |
| 17 | 17 | from django.utils.text import normalize_newlines |
| 18 | 18 | |
| | 19 | from .html_parser import HTMLParser |
| | 20 | |
| | 21 | |
| 19 | 22 | # Configuration for urlize() function. |
| 20 | 23 | TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)'] |
| 21 | 24 | WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('<', '>')] |
| … |
… |
link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
|
| 33 | 36 | html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) |
| 34 | 37 | hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL) |
| 35 | 38 | trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\Z') |
| 36 | | strip_tags_re = re.compile(r'</?\S([^=>]*=(\s*"[^"]*"|\s*\'[^\']*\'|\S*)|[^>])*?>', re.IGNORECASE) |
| 37 | 39 | |
| 38 | 40 | |
| 39 | 41 | def escape(text): |
| … |
… |
def linebreaks(value, autoescape=False):
|
| 116 | 118 | return '\n\n'.join(paras) |
| 117 | 119 | linebreaks = allow_lazy(linebreaks, six.text_type) |
| 118 | 120 | |
| | 121 | |
| | 122 | class MLStripper(HTMLParser): |
| | 123 | def __init__(self): |
| | 124 | HTMLParser.__init__(self) |
| | 125 | self.reset() |
| | 126 | self.fed = [] |
| | 127 | def handle_data(self, d): |
| | 128 | self.fed.append(d) |
| | 129 | def handle_entityref(self, name): |
| | 130 | self.fed.append('&%s;' % name) |
| | 131 | def handle_charref(self, name): |
| | 132 | self.fed.append('&#%s;' % name) |
| | 133 | def get_data(self): |
| | 134 | return ''.join(self.fed) |
| | 135 | |
| 119 | 136 | def strip_tags(value): |
| 120 | 137 | """Returns the given HTML with all tags stripped.""" |
| 121 | | return strip_tags_re.sub('', force_text(value)) |
| | 138 | s = MLStripper() |
| | 139 | s.feed(value) |
| | 140 | return s.get_data() |
| 122 | 141 | strip_tags = allow_lazy(strip_tags) |
| 123 | 142 | |
| 124 | 143 | def remove_tags(html, tags): |
diff --git a/tests/utils_tests/html.py b/tests/utils_tests/html.py
index 090cc32..04c1569 100644
|
a
|
b
|
import os
|
| 5 | 5 | |
| 6 | 6 | from django.utils import html |
| 7 | 7 | from django.utils._os import upath |
| | 8 | from django.utils.encoding import force_text |
| 8 | 9 | from django.utils.unittest import TestCase |
| 9 | 10 | |
| 10 | 11 | |
| … |
… |
class TestUtilsHtml(TestCase):
|
| 63 | 64 | def test_strip_tags(self): |
| 64 | 65 | f = html.strip_tags |
| 65 | 66 | items = ( |
| | 67 | ('<p>See: 'é is an apostrophe followed by e acute</p>', |
| | 68 | 'See: 'é is an apostrophe followed by e acute'), |
| 66 | 69 | ('<adf>a', 'a'), |
| 67 | 70 | ('</adf>a', 'a'), |
| 68 | 71 | ('<asdf><asdf>e', 'e'), |
| 69 | | ('<f', '<f'), |
| | 72 | ('<f x', '<f x'), |
| 70 | 73 | ('</fe', '</fe'), |
| 71 | 74 | ('<x>b<y>', 'b'), |
| 72 | 75 | ('a<p onclick="alert(\'<test>\')">b</p>c', 'abc'), |
| … |
… |
class TestUtilsHtml(TestCase):
|
| 81 | 84 | for filename in ('strip_tags1.html', 'strip_tags2.txt'): |
| 82 | 85 | path = os.path.join(os.path.dirname(upath(__file__)), 'files', filename) |
| 83 | 86 | with open(path, 'r') as fp: |
| | 87 | content = force_text(fp.read()) |
| 84 | 88 | start = datetime.now() |
| 85 | | stripped = html.strip_tags(fp.read()) |
| | 89 | stripped = html.strip_tags(content) |
| 86 | 90 | elapsed = datetime.now() - start |
| 87 | 91 | self.assertEqual(elapsed.seconds, 0) |
| 88 | 92 | self.assertIn("Please try again.", stripped) |