Changeset 4468
- Timestamp:
- 02/09/07 20:51:27 (2 years ago)
- Files:
-
- django/trunk/django/template/defaultfilters.py (modified) (2 diffs)
- django/trunk/django/utils/text.py (modified) (1 diff)
- django/trunk/tests/regressiontests/defaultfilters/tests.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
django/trunk/django/template/defaultfilters.py
r4274 r4468 119 119 value = str(value) 120 120 return truncate_words(value, length) 121 122 def truncatewords_html(value, arg): 123 """ 124 Truncates HTML after a certain number of words 125 126 Argument: Number of words to truncate after 127 """ 128 from django.utils.text import truncate_html_words 129 try: 130 length = int(arg) 131 except ValueError: # invalid literal for int() 132 return value # Fail silently. 133 if not isinstance(value, basestring): 134 value = str(value) 135 return truncate_html_words(value, length) 121 136 122 137 def upper(value): … … 535 550 register.filter(title) 536 551 register.filter(truncatewords) 552 register.filter(truncatewords_html) 537 553 register.filter(unordered_list) 538 554 register.filter(upper) django/trunk/django/utils/text.py
r4213 r4468 42 42 return ' '.join(words) 43 43 44 def truncate_html_words(s, num): 45 """ 46 Truncates html to a certain number of words (not counting tags and comments). 47 Closes opened tags if they were correctly closed in the given html. 48 """ 49 length = int(num) 50 if length <= 0: 51 return '' 52 html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input') 53 # Set up regular expressions 54 re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)') 55 re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>') 56 # Count non-HTML words and keep note of open tags 57 pos = 0 58 ellipsis_pos = 0 59 words = 0 60 open_tags = [] 61 while words <= length: 62 m = re_words.search(s, pos) 63 if not m: 64 # Checked through whole string 65 break 66 pos = m.end(0) 67 if m.group(1): 68 # It's an actual non-HTML word 69 words += 1 70 if words == length: 71 ellipsis_pos = pos 72 continue 73 # Check for tag 74 tag = re_tag.match(m.group(0)) 75 if not tag or ellipsis_pos: 76 # Don't worry about non tags or tags after our truncate point 77 continue 78 closing_tag, tagname, self_closing = tag.groups() 79 tagname = tagname.lower() # Element names are always case-insensitive 80 if self_closing or tagname in html4_singlets: 81 pass 82 elif closing_tag: 83 # Check for match in open tags list 84 try: 85 i = open_tags.index(tagname) 86 except ValueError: 87 pass 88 else: 89 # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags 90 open_tags = open_tags[i+1:] 91 else: 92 # Add it to the start of the open tags list 93 open_tags.insert(0, tagname) 94 if words <= length: 95 # Don't try to close tags if we don't need to truncate 96 return s 97 out = s[:ellipsis_pos] + ' ...' 98 # Close any tags still open 99 for tag in open_tags: 100 out += '</%s>' % tag 101 # Return string 102 return out 103 44 104 def get_valid_filename(s): 45 105 """ django/trunk/tests/regressiontests/defaultfilters/tests.py
r4274 r4468 88 88 'A sentence with a few words in it' 89 89 90 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 0) 91 '' 92 93 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 2) 94 '<p>one <a href="#">two ...</a></p>' 95 96 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 4) 97 '<p>one <a href="#">two - three <br>four ...</a></p>' 98 99 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 5) 100 '<p>one <a href="#">two - three <br>four</a> five</p>' 101 102 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 100) 103 '<p>one <a href="#">two - three <br>four</a> five</p>' 90 104 91 105 >>> upper('Mixed case input')
