Ticket #2027: better_truncatewords_html.patch
File better_truncatewords_html.patch, 4.6 KB (added by , 18 years ago) |
---|
-
django/template/defaultfilters.py
101 101 value = str(value) 102 102 return truncate_words(value, length) 103 103 104 def truncatewords_html(value, arg): 105 """ 106 Truncates HTML after a certain number of words 107 108 Argument: Number of words to truncate after 109 """ 110 from django.utils.text import truncate_html_words 111 try: 112 length = int(arg) 113 except ValueError: # invalid literal for int() 114 return value # Fail silently. 115 if not isinstance(value, basestring): 116 value = str(value) 117 return truncate_html_words(value, length) 118 104 119 def upper(value): 105 120 "Converts a string into all uppercase" 106 121 return value.upper() … … 481 496 register.filter(timesince) 482 497 register.filter(title) 483 498 register.filter(truncatewords) 499 register.filter(truncatewords_html) 484 500 register.filter(unordered_list) 485 501 register.filter(upper) 486 502 register.filter(urlencode) -
django/utils/text.py
30 30 words.append('...') 31 31 return ' '.join(words) 32 32 33 def truncate_html_words(s, num): 34 """ 35 Truncates html to a certain number of words (not counting tags and comments). 36 Closes opened tags if they were correctly closed in the given html. 37 """ 38 length = int(num) 39 if length <= 0: 40 return '' 41 html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input') 42 # Set up regular expressions 43 re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)') 44 re_tag = re.compile(r'<(/)?([^ ]+?)(?: .*?)?( /)?>') 45 # Count non-HTML words and keep note of open tags 46 pos = 0 47 ellipsis_pos = 0 48 words = 0 49 open_tags = [] 50 while words <= length: 51 m = re_words.search(s, pos) 52 if not m: 53 # Checked through whole string 54 break 55 pos = m.end(0) 56 if m.group(1): 57 # It's an actual non-HTML word 58 words += 1 59 if words == length: 60 ellipsis_pos = pos 61 continue 62 # Check for tag 63 tag = re_tag.match(m.group(0)) 64 if not tag or ellipsis_pos: 65 # Don't worry about non tags or tags after our truncate point 66 continue 67 closing_tag, tagname, self_closing = tag.groups() 68 tagname = tagname.lower() # Element names are always case-insensitive 69 if self_closing or tagname in html4_singlets: 70 pass 71 elif closing_tag: 72 # Check for match in open tags list 73 try: 74 i = open_tags.index(tagname) 75 except ValueError: 76 pass 77 else: 78 # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags 79 open_tags = open_tags[i+1:] 80 else: 81 # Add it to the start of the open tags list 82 open_tags.insert(0, tagname) 83 if words <= length: 84 # Don't try to close tags if we don't need to truncate 85 return s 86 out = s[:ellipsis_pos] + ' ...' 87 # Close any tags still open 88 for tag in open_tags: 89 out += '</%s>' % tag 90 # Return string 91 return out 92 33 93 def get_valid_filename(s): 34 94 """ 35 95 Returns the given string converted to a string that can be used for a clean -
tests/othertests/defaultfilters.py
64 64 >>> truncatewords('A sentence with a few words in it', 'not a number') 65 65 'A sentence with a few words in it' 66 66 67 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 0) 68 '' 67 69 70 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 2) 71 '<p>one <a href="#">two ...</a></p>' 72 73 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 4) 74 '<p>one <a href="#">two - three <br>four ...</a></p>' 75 76 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 5) 77 '<p>one <a href="#">two - three <br>four</a> five</p>' 78 79 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 100) 80 '<p>one <a href="#">two - three <br>four</a> five</p>' 81 68 82 >>> upper('Mixed case input') 69 83 'MIXED CASE INPUT' 70 84