Context Navigation

Ticket #19237: strip_tags_parser.diff

File strip_tags_parser.diff, 1.2 KB (added by Simon Litchfield, 12 years ago)
Parser based version of strip_tags filter

-              old
+              new
 html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
 hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
 trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
-strip_tags_re = re.compile(r'</?\S([^=]*=(\s*"[^"]*"|\s*\'[^\']*\'|\S*)|[^>])*?>', re.IGNORECASE)
 def escape(text):
 …
     return '\n\n'.join(paras)
 linebreaks = allow_lazy(linebreaks, six.text_type)
+from HTMLParser import HTMLParser
+class MLStripper(HTMLParser):
+    def __init__(self):
+        self.reset()
+        self.fed = []
+    def handle_data(self, d):
+        self.fed.append(d)
+    def get_data(self):
+        return ''.join(self.fed)
 def strip_tags(value):
     """Returns the given HTML with all tags stripped."""
+    return strip_tags_re.sub('', force_text(value))
+    s = MLStripper()
+    s.feed(value)
+    return s.get_data()
 strip_tags = allow_lazy(strip_tags)
 def remove_tags(html, tags):