Ticket #7267: 7267.2.patch
File 7267.2.patch, 2.3 KB (added by , 13 years ago) |
---|
-
tests/regressiontests/utils/html.py
121 121 ) 122 122 for value, output in items: 123 123 self.check_output(f, value, output) 124 125 def test_clean_html(self): 126 f = html.clean_html 127 items = ( 128 (u'<p>I <i>believe</i> in <b>semantic markup</b>!</p>', u'<p>I <em>believe</em> in <strong>semantic markup</strong>!</p>'), 129 (u'I escape & I don\'t <a href="#" target="_blank">target</a>', u'I escape & I don\'t <a href="#" >target</a>'), 130 (u'<p>I kill whitespace</p><br clear="all"><p> </p>', u'<p>I kill whitespace</p>'), 131 # also a regression test for #7267: this used to raise an UnicodeDecodeError 132 (u'<p>* foo</p><p>* bar</p>', u'<ul>\n<li> foo</li><li> bar</li>\n</ul>'), 133 ) 134 for value, output in items: 135 self.check_output(f, value, output) -
django/utils/html.py
13 13 TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '>'] 14 14 15 15 # List of possible strings used for bullets in bulleted lists. 16 DOTS = [ '·', '*', '\xe2\x80\xa2', '•', '•','•']16 DOTS = [u'·', u'*', u'\xe2\x80\xa2', u'•', u'•', u'•'] 17 17 18 18 unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)') 19 19 word_split_re = re.compile(r'(\s+)') … … 180 180 text = html_gunk_re.sub('', text) 181 181 # Convert hard-coded bullets into HTML unordered lists. 182 182 def replace_p_tags(match): 183 s = match.group().replace( '</p>','</li>')183 s = match.group().replace(u'</p>', u'</li>') 184 184 for d in DOTS: 185 s = s.replace( '<p>%s' % d,'<li>')185 s = s.replace(u'<p>%s' % d, u'<li>') 186 186 return u'<ul>\n%s\n</ul>' % s 187 187 text = hard_coded_bullets_re.sub(replace_p_tags, text) 188 188 # Remove stuff like "<p> </p>", but only if it's at the bottom 189 189 # of the text. 190 text = trailing_empty_content_re.sub( '', text)190 text = trailing_empty_content_re.sub(u'', text) 191 191 return text 192 192 clean_html = allow_lazy(clean_html, unicode)