Ticket #17730: ticket17730.diff
File ticket17730.diff, 8.4 KB (added by , 13 years ago) |
---|
-
django/test/html.py
diff --git a/django/test/html.py b/django/test/html.py index ff073ba..f1e4897 100644
a b Comparing two html documents. 4 4 import re 5 5 from HTMLParser import HTMLParseError 6 6 from django.utils.encoding import force_unicode 7 from django.utils.html parser import HTMLParser7 from django.utils.html_parser import HTMLParser 8 8 9 9 10 10 WHITESPACE = re.compile('\s+') -
new file django/utils/html_parser.py
diff --git a/django/utils/html_parser.py b/django/utils/html_parser.py new file mode 100644 index 0000000..b280057
- + 1 import HTMLParser as _HTMLParser 2 import re 3 4 5 class HTMLParser(_HTMLParser.HTMLParser): 6 """ 7 Patched version of stdlib's HTMLParser with patch from: 8 http://bugs.python.org/issue670664 9 """ 10 def __init__(self): 11 _HTMLParser.HTMLParser.__init__(self) 12 self.cdata_tag = None 13 14 def set_cdata_mode(self, tag): 15 try: 16 self.interesting = _HTMLParser.interesting_cdata 17 except AttributeError: 18 self.interesting = re.compile(r'</\s*%s\s*>' % tag.lower(), re.I) 19 self.cdata_tag = tag.lower() 20 21 def clear_cdata_mode(self): 22 self.interesting = _HTMLParser.interesting_normal 23 self.cdata_tag = None 24 25 # Internal -- handle starttag, return end or -1 if not terminated 26 def parse_starttag(self, i): 27 self.__starttag_text = None 28 endpos = self.check_for_whole_start_tag(i) 29 if endpos < 0: 30 return endpos 31 rawdata = self.rawdata 32 self.__starttag_text = rawdata[i:endpos] 33 34 # Now parse the data between i+1 and j into a tag and attrs 35 attrs = [] 36 match = _HTMLParser.tagfind.match(rawdata, i + 1) 37 assert match, 'unexpected call to parse_starttag()' 38 k = match.end() 39 self.lasttag = tag = rawdata[i + 1:k].lower() 40 41 while k < endpos: 42 m = _HTMLParser.attrfind.match(rawdata, k) 43 if not m: 44 break 45 attrname, rest, attrvalue = m.group(1, 2, 3) 46 if not rest: 47 attrvalue = None 48 elif attrvalue[:1] == '\'' == attrvalue[-1:] or \ 49 attrvalue[:1] == '"' == attrvalue[-1:]: 50 attrvalue = attrvalue[1:-1] 51 attrvalue = self.unescape(attrvalue) 52 attrs.append((attrname.lower(), attrvalue)) 53 k = m.end() 54 55 end = rawdata[k:endpos].strip() 56 if end not in (">", "/>"): 57 lineno, offset = self.getpos() 58 if "\n" in self.__starttag_text: 59 lineno = lineno + self.__starttag_text.count("\n") 60 offset = len(self.__starttag_text) \ 61 - self.__starttag_text.rfind("\n") 62 else: 63 offset = offset + len(self.__starttag_text) 64 self.error("junk characters in start tag: %r" 65 % (rawdata[k:endpos][:20],)) 66 if end.endswith('/>'): 67 # XHTML-style empty tag: <span attr="value" /> 68 self.handle_startendtag(tag, attrs) 69 else: 70 self.handle_starttag(tag, attrs) 71 if tag in self.CDATA_CONTENT_ELEMENTS: 72 self.set_cdata_mode(tag) # <--------------------------- Changed 73 return endpos 74 75 # Internal -- parse endtag, return end or -1 if incomplete 76 def parse_endtag(self, i): 77 rawdata = self.rawdata 78 assert rawdata[i:i + 2] == "</", "unexpected call to parse_endtag" 79 match = _HTMLParser.endendtag.search(rawdata, i + 1) # > 80 if not match: 81 return -1 82 j = match.end() 83 match = _HTMLParser.endtagfind.match(rawdata, i) # </ + tag + > 84 if not match: 85 if self.cdata_tag is not None: # *** add *** 86 self.handle_data(rawdata[i:j]) # *** add *** 87 return j # *** add *** 88 self.error("bad end tag: %r" % (rawdata[i:j],)) 89 # --- changed start --------------------------------------------------- 90 tag = match.group(1).strip() 91 if self.cdata_tag is not None: 92 if tag.lower() != self.cdata_tag: 93 self.handle_data(rawdata[i:j]) 94 return j 95 # --- changed end ----------------------------------------------------- 96 self.handle_endtag(tag.lower()) 97 self.clear_cdata_mode() 98 return j -
deleted file django/utils/htmlparser.py
diff --git a/django/utils/htmlparser.py b/django/utils/htmlparser.py deleted file mode 100644 index b280057..0000000
+ - 1 import HTMLParser as _HTMLParser2 import re3 4 5 class HTMLParser(_HTMLParser.HTMLParser):6 """7 Patched version of stdlib's HTMLParser with patch from:8 http://bugs.python.org/issue6706649 """10 def __init__(self):11 _HTMLParser.HTMLParser.__init__(self)12 self.cdata_tag = None13 14 def set_cdata_mode(self, tag):15 try:16 self.interesting = _HTMLParser.interesting_cdata17 except AttributeError:18 self.interesting = re.compile(r'</\s*%s\s*>' % tag.lower(), re.I)19 self.cdata_tag = tag.lower()20 21 def clear_cdata_mode(self):22 self.interesting = _HTMLParser.interesting_normal23 self.cdata_tag = None24 25 # Internal -- handle starttag, return end or -1 if not terminated26 def parse_starttag(self, i):27 self.__starttag_text = None28 endpos = self.check_for_whole_start_tag(i)29 if endpos < 0:30 return endpos31 rawdata = self.rawdata32 self.__starttag_text = rawdata[i:endpos]33 34 # Now parse the data between i+1 and j into a tag and attrs35 attrs = []36 match = _HTMLParser.tagfind.match(rawdata, i + 1)37 assert match, 'unexpected call to parse_starttag()'38 k = match.end()39 self.lasttag = tag = rawdata[i + 1:k].lower()40 41 while k < endpos:42 m = _HTMLParser.attrfind.match(rawdata, k)43 if not m:44 break45 attrname, rest, attrvalue = m.group(1, 2, 3)46 if not rest:47 attrvalue = None48 elif attrvalue[:1] == '\'' == attrvalue[-1:] or \49 attrvalue[:1] == '"' == attrvalue[-1:]:50 attrvalue = attrvalue[1:-1]51 attrvalue = self.unescape(attrvalue)52 attrs.append((attrname.lower(), attrvalue))53 k = m.end()54 55 end = rawdata[k:endpos].strip()56 if end not in (">", "/>"):57 lineno, offset = self.getpos()58 if "\n" in self.__starttag_text:59 lineno = lineno + self.__starttag_text.count("\n")60 offset = len(self.__starttag_text) \61 - self.__starttag_text.rfind("\n")62 else:63 offset = offset + len(self.__starttag_text)64 self.error("junk characters in start tag: %r"65 % (rawdata[k:endpos][:20],))66 if end.endswith('/>'):67 # XHTML-style empty tag: <span attr="value" />68 self.handle_startendtag(tag, attrs)69 else:70 self.handle_starttag(tag, attrs)71 if tag in self.CDATA_CONTENT_ELEMENTS:72 self.set_cdata_mode(tag) # <--------------------------- Changed73 return endpos74 75 # Internal -- parse endtag, return end or -1 if incomplete76 def parse_endtag(self, i):77 rawdata = self.rawdata78 assert rawdata[i:i + 2] == "</", "unexpected call to parse_endtag"79 match = _HTMLParser.endendtag.search(rawdata, i + 1) # >80 if not match:81 return -182 j = match.end()83 match = _HTMLParser.endtagfind.match(rawdata, i) # </ + tag + >84 if not match:85 if self.cdata_tag is not None: # *** add ***86 self.handle_data(rawdata[i:j]) # *** add ***87 return j # *** add ***88 self.error("bad end tag: %r" % (rawdata[i:j],))89 # --- changed start ---------------------------------------------------90 tag = match.group(1).strip()91 if self.cdata_tag is not None:92 if tag.lower() != self.cdata_tag:93 self.handle_data(rawdata[i:j])94 return j95 # --- changed end -----------------------------------------------------96 self.handle_endtag(tag.lower())97 self.clear_cdata_mode()98 return j