Context Navigation

Back to Ticket #2027

Ticket #2027: truncatewords_html.2.patch

File truncatewords_html.2.patch, 5.2 KB (added by Chris Beaven, 19 years ago)
This time I'll upload the proper HTML-aware truncation patch

django/template/defaultfilters.py

         value = str(value)
     return truncate_words(value, length)
+def truncatewords_html(value, arg):
+    """
+    Truncates HTML after a certain number of words
+    Argument: Number of words to truncate after
+    """
+    from django.utils.text import truncate_html_words
+    try:
+        length = int(arg)
+    except ValueError: # invalid literal for int()
+        return value # Fail silently.
+    if not isinstance(value, basestring):
+        value = str(value)
+    return truncate_html_words(value, length)
 def upper(value):
     "Converts a string into all uppercase"
     return value.upper()
 …
 register.filter(timesince)
 register.filter(title)
 register.filter(truncatewords)
+register.filter(truncatewords_html)
 register.filter(unordered_list)
 register.filter(upper)
 register.filter(urlencode)

django/utils/text.py

             words.append('...')
     return ' '.join(words)
+def truncate_html_words(s, num):
+    """
+    Truncates html to a certain number of words (not counting tags and comments).
+    Closes opened tags if they were correctly closed in the given html.
+    """
+    length = int(num)
+    if length <= 0:
+        return ''
+    # Set up regular expressions
+    re_whitespace = re.compile(r'\s+')
+    re_html_comment = re.compile(r'<!--.*?-->', re.DOTALL)
+    re_tag_singlet = re.compile(r'<[^>]+/>')
+    re_tag = re.compile(r'<([^>/\s]+)[^>]*>')
+    re_tag_close = re.compile(r'</([^>\s]+)[^>]*>')
+    re_non_alphanumeric = re.compile(r'[^\w<]+')
+    re_word = re.compile(r'[^<\s]+')
+    # Set up everything else
+    tags = []
+    words = 0
+    pos = 0
+    len_s = len(s)
+    ellipsis_pos = 0
+    ellipsis_required = 0
+    while pos < len_s:
+        # Skip white space, comment, or singlet
+        m = re_whitespace.match(s, pos) or re_html_comment.match(s, pos) or re_tag_singlet.match(s, pos)
+        if m:
+            pos = m.end(0)
+            continue
+        # Check for tag
+        m = re_tag.match(s, pos)
+        if m:
+            pos = m.end(0)
+            if not ellipsis_pos:
+                tag = m.group(1).lower()
+                tags.append(tag)
+            continue
+        # Check for close tag
+        m = re_tag_close.match(s, pos)
+        if m:
+            pos = m.end(0)
+            if not ellipsis_pos:
+                tag = m.group(1).lower()
+                try:
+                    tags.remove(tag)
+                except ValueError:
+                    pass
+            continue
+        # Skip non-alphanumeric
+        m = re_non_alphanumeric.match(s, pos)
+        if m:
+            pos = m.end(0)
+            continue
+        # Check for word
+        m = re_word.match(s, pos)
+        if m:
+            pos = m.end(0)
+            words += 1
+            if words == length:
+                ellipsis_pos = pos
+            if words > length:
+                ellipsis_required = 1
+                break
+            continue
+        # Shouldn't ever actually get here
+        break
+    if ellipsis_required:
+        pos = ellipsis_pos
+    out = s[:pos]
+    # Look for closing tags for any tags still open
+    print tags
+    tags.reverse()
+    for tag in tags:
+        temppos = pos
+        while 1:
+            m = re_tag_close.search(s, temppos)
+            if m:
+                print m.group(1), tag
+                temppos = m.end(0)
+                if m.group(1) == tag:
+                    out += m.group(0)
+                    pos = temppos
+                    break
+            else:
+                break
+    # Add ellipsis
+    if ellipsis_required:
+        out = out[:ellipsis_pos] + ' ...' + out[ellipsis_pos:]
+    return out
 def get_valid_filename(s):
     """
     Returns the given string converted to a string that can be used for a clean

tests/othertests/defaultfilters.py

 >>> truncatewords('A sentence with a few words in it', 'not a number')
 'A sentence with a few words in it'
+>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 0)
+''
+>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 2)
+'<p>one <a href="#">two ...</a></p>'
+>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 4)
+'<p>one <a href="#">two - three <br>four ...</a></p>'
+>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 5)
+'<p>one <a href="#">two - three <br>four</a> five</p>'
+>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 100)
+'<p>one <a href="#">two - three <br>four</a> five</p>'
 >>> upper('Mixed case input')
 'MIXED CASE INPUT'

Download in other formats:

Original Format

Issues

Context Navigation

Ticket #2027: truncatewords_html.2.patch

django/template/defaultfilters.py

django/utils/text.py

tests/othertests/defaultfilters.py

Download in other formats:

Django Links

Learn More

Get Involved

Get Help

Follow Us

Support Us