Ticket #2027: truncatewords_html.2.patch

File truncatewords_html.2.patch, 5.2 KB (added by Chris Beaven, 18 years ago)

This time I'll upload the proper HTML-aware truncation patch

  • django/template/defaultfilters.py

     
    101101        value = str(value)
    102102    return truncate_words(value, length)
    103103
     104def truncatewords_html(value, arg):
     105    """
     106    Truncates HTML after a certain number of words
     107
     108    Argument: Number of words to truncate after
     109    """
     110    from django.utils.text import truncate_html_words
     111    try:
     112        length = int(arg)
     113    except ValueError: # invalid literal for int()
     114        return value # Fail silently.
     115    if not isinstance(value, basestring):
     116        value = str(value)
     117    return truncate_html_words(value, length)
     118
    104119def upper(value):
    105120    "Converts a string into all uppercase"
    106121    return value.upper()
     
    481496register.filter(timesince)
    482497register.filter(title)
    483498register.filter(truncatewords)
     499register.filter(truncatewords_html)
    484500register.filter(unordered_list)
    485501register.filter(upper)
    486502register.filter(urlencode)
  • django/utils/text.py

     
    3030            words.append('...')
    3131    return ' '.join(words)
    3232
     33def truncate_html_words(s, num):
     34    """
     35    Truncates html to a certain number of words (not counting tags and comments).
     36    Closes opened tags if they were correctly closed in the given html.
     37    """
     38    length = int(num)
     39    if length <= 0:
     40        return ''
     41    # Set up regular expressions
     42    re_whitespace = re.compile(r'\s+')
     43    re_html_comment = re.compile(r'<!--.*?-->', re.DOTALL)
     44    re_tag_singlet = re.compile(r'<[^>]+/>')
     45    re_tag = re.compile(r'<([^>/\s]+)[^>]*>')
     46    re_tag_close = re.compile(r'</([^>\s]+)[^>]*>')
     47    re_non_alphanumeric = re.compile(r'[^\w<]+')
     48    re_word = re.compile(r'[^<\s]+')
     49    # Set up everything else
     50    tags = []
     51    words = 0
     52    pos = 0
     53    len_s = len(s)
     54    ellipsis_pos = 0
     55    ellipsis_required = 0
     56    while pos < len_s:
     57        # Skip white space, comment, or singlet
     58        m = re_whitespace.match(s, pos) or re_html_comment.match(s, pos) or re_tag_singlet.match(s, pos)
     59        if m:
     60            pos = m.end(0)
     61            continue
     62        # Check for tag
     63        m = re_tag.match(s, pos)
     64        if m:
     65            pos = m.end(0)
     66            if not ellipsis_pos:
     67                tag = m.group(1).lower()
     68                tags.append(tag)
     69            continue
     70        # Check for close tag
     71        m = re_tag_close.match(s, pos)
     72        if m:
     73            pos = m.end(0)
     74            if not ellipsis_pos:
     75                tag = m.group(1).lower()
     76                try:
     77                    tags.remove(tag)
     78                except ValueError:
     79                    pass
     80            continue
     81        # Skip non-alphanumeric
     82        m = re_non_alphanumeric.match(s, pos)
     83        if m:
     84            pos = m.end(0)
     85            continue
     86        # Check for word
     87        m = re_word.match(s, pos)
     88        if m:
     89            pos = m.end(0)
     90            words += 1
     91            if words == length:
     92                ellipsis_pos = pos
     93            if words > length:
     94                ellipsis_required = 1
     95                break
     96            continue
     97        # Shouldn't ever actually get here
     98        break
     99    if ellipsis_required:
     100        pos = ellipsis_pos
     101    out = s[:pos]
     102    # Look for closing tags for any tags still open
     103    print tags
     104    tags.reverse()
     105    for tag in tags:
     106        temppos = pos
     107        while 1:
     108            m = re_tag_close.search(s, temppos)
     109            if m:
     110                print m.group(1), tag
     111                temppos = m.end(0)
     112                if m.group(1) == tag:
     113                    out += m.group(0)
     114                    pos = temppos
     115                    break
     116            else:
     117                break
     118    # Add ellipsis
     119    if ellipsis_required:
     120        out = out[:ellipsis_pos] + ' ...' + out[ellipsis_pos:]
     121    return out
     122
    33123def get_valid_filename(s):
    34124    """
    35125    Returns the given string converted to a string that can be used for a clean
  • tests/othertests/defaultfilters.py

     
    6464>>> truncatewords('A sentence with a few words in it', 'not a number')
    6565'A sentence with a few words in it'
    6666
     67>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 0)
     68''
    6769
     70>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 2)
     71'<p>one <a href="#">two ...</a></p>'
     72
     73>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 4)
     74'<p>one <a href="#">two - three <br>four ...</a></p>'
     75
     76>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 5)
     77'<p>one <a href="#">two - three <br>four</a> five</p>'
     78
     79>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 100)
     80'<p>one <a href="#">two - three <br>four</a> five</p>'
     81
    6882>>> upper('Mixed case input')
    6983'MIXED CASE INPUT'
    7084
Back to Top