Ticket #2027: truncatewords_html.2.patch
File truncatewords_html.2.patch, 5.2 KB (added by , 18 years ago) |
---|
-
django/template/defaultfilters.py
101 101 value = str(value) 102 102 return truncate_words(value, length) 103 103 104 def truncatewords_html(value, arg): 105 """ 106 Truncates HTML after a certain number of words 107 108 Argument: Number of words to truncate after 109 """ 110 from django.utils.text import truncate_html_words 111 try: 112 length = int(arg) 113 except ValueError: # invalid literal for int() 114 return value # Fail silently. 115 if not isinstance(value, basestring): 116 value = str(value) 117 return truncate_html_words(value, length) 118 104 119 def upper(value): 105 120 "Converts a string into all uppercase" 106 121 return value.upper() … … 481 496 register.filter(timesince) 482 497 register.filter(title) 483 498 register.filter(truncatewords) 499 register.filter(truncatewords_html) 484 500 register.filter(unordered_list) 485 501 register.filter(upper) 486 502 register.filter(urlencode) -
django/utils/text.py
30 30 words.append('...') 31 31 return ' '.join(words) 32 32 33 def truncate_html_words(s, num): 34 """ 35 Truncates html to a certain number of words (not counting tags and comments). 36 Closes opened tags if they were correctly closed in the given html. 37 """ 38 length = int(num) 39 if length <= 0: 40 return '' 41 # Set up regular expressions 42 re_whitespace = re.compile(r'\s+') 43 re_html_comment = re.compile(r'<!--.*?-->', re.DOTALL) 44 re_tag_singlet = re.compile(r'<[^>]+/>') 45 re_tag = re.compile(r'<([^>/\s]+)[^>]*>') 46 re_tag_close = re.compile(r'</([^>\s]+)[^>]*>') 47 re_non_alphanumeric = re.compile(r'[^\w<]+') 48 re_word = re.compile(r'[^<\s]+') 49 # Set up everything else 50 tags = [] 51 words = 0 52 pos = 0 53 len_s = len(s) 54 ellipsis_pos = 0 55 ellipsis_required = 0 56 while pos < len_s: 57 # Skip white space, comment, or singlet 58 m = re_whitespace.match(s, pos) or re_html_comment.match(s, pos) or re_tag_singlet.match(s, pos) 59 if m: 60 pos = m.end(0) 61 continue 62 # Check for tag 63 m = re_tag.match(s, pos) 64 if m: 65 pos = m.end(0) 66 if not ellipsis_pos: 67 tag = m.group(1).lower() 68 tags.append(tag) 69 continue 70 # Check for close tag 71 m = re_tag_close.match(s, pos) 72 if m: 73 pos = m.end(0) 74 if not ellipsis_pos: 75 tag = m.group(1).lower() 76 try: 77 tags.remove(tag) 78 except ValueError: 79 pass 80 continue 81 # Skip non-alphanumeric 82 m = re_non_alphanumeric.match(s, pos) 83 if m: 84 pos = m.end(0) 85 continue 86 # Check for word 87 m = re_word.match(s, pos) 88 if m: 89 pos = m.end(0) 90 words += 1 91 if words == length: 92 ellipsis_pos = pos 93 if words > length: 94 ellipsis_required = 1 95 break 96 continue 97 # Shouldn't ever actually get here 98 break 99 if ellipsis_required: 100 pos = ellipsis_pos 101 out = s[:pos] 102 # Look for closing tags for any tags still open 103 print tags 104 tags.reverse() 105 for tag in tags: 106 temppos = pos 107 while 1: 108 m = re_tag_close.search(s, temppos) 109 if m: 110 print m.group(1), tag 111 temppos = m.end(0) 112 if m.group(1) == tag: 113 out += m.group(0) 114 pos = temppos 115 break 116 else: 117 break 118 # Add ellipsis 119 if ellipsis_required: 120 out = out[:ellipsis_pos] + ' ...' + out[ellipsis_pos:] 121 return out 122 33 123 def get_valid_filename(s): 34 124 """ 35 125 Returns the given string converted to a string that can be used for a clean -
tests/othertests/defaultfilters.py
64 64 >>> truncatewords('A sentence with a few words in it', 'not a number') 65 65 'A sentence with a few words in it' 66 66 67 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 0) 68 '' 67 69 70 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 2) 71 '<p>one <a href="#">two ...</a></p>' 72 73 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 4) 74 '<p>one <a href="#">two - three <br>four ...</a></p>' 75 76 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 5) 77 '<p>one <a href="#">two - three <br>four</a> five</p>' 78 79 >>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 100) 80 '<p>one <a href="#">two - three <br>four</a> five</p>' 81 68 82 >>> upper('Mixed case input') 69 83 'MIXED CASE INPUT' 70 84