Ticket #2027: truncatewords_html.patch

File truncatewords_html.patch, 5.1 KB (added by Chris Beaven, 18 years ago)

HTML aware word truncation

  • django/template/defaultfilters.py

     
    101101        value = str(value)
    102102    return truncate_words(value, length)
    103103
     104def truncatewords_html(value, arg):
     105    """
     106    Truncates HTML after a certain number of words
     107
     108    Argument: Number of words to truncate after
     109    """
     110    from django.utils.text import truncate_html_words
     111    try:
     112        length = int(arg)
     113    except ValueError: # invalid literal for int()
     114        return value # Fail silently.
     115    if not isinstance(value, basestring):
     116        value = str(value)
     117    return truncate_html_words(value, length)
     118
    104119def upper(value):
    105120    "Converts a string into all uppercase"
    106121    return value.upper()
     
    481496register.filter(timesince)
    482497register.filter(title)
    483498register.filter(truncatewords)
     499register.filter(truncatewords_html)
    484500register.filter(unordered_list)
    485501register.filter(upper)
    486502register.filter(urlencode)
  • django/utils/text.py

     
    3030            words.append('...')
    3131    return ' '.join(words)
    3232
     33def truncate_html_words(s, num):
     34    """
     35    Truncates html to a certain number of words (not counting tags and comments).
     36    Closes opened tags if they were correctly closed in the given html.
     37    """
     38    length = int(num)
     39    if length <= 0:
     40        return ''
     41    # Set up regular expressions
     42    re_whitespace = re.compile(r'\s+')
     43    re_html_comment = re.compile(r'<!--.*?-->', re.DOTALL)
     44    re_tag_singlet = re.compile(r'<[^>]+/>')
     45    re_tag = re.compile(r'<([^>/\s]+)[^>]*>')
     46    re_tag_close = re.compile(r'</([^>\s]+)[^>]*>')
     47    re_non_alphanumeric = re.compile(r'[^\w<]+')
     48    re_word = re.compile(r'[^<\s]+')
     49    # Set up everything else
     50    tags = []
     51    words = 0
     52    pos = 0
     53    len_s = len(s)
     54    elipsis_pos = 0
     55    elipsis_required = 0
     56    while pos < len_s:
     57        # Skip white space, comment, or singlet
     58        m = re_whitespace.match(s, pos) or re_html_comment.match(s, pos) or re_tag_singlet.match(s, pos)
     59        if m:
     60            pos = m.end(0)
     61            continue
     62        # Check for tag
     63        m = re_tag.match(s, pos)
     64        if m:
     65            pos = m.end(0)
     66            if not elipsis_pos:
     67                tag = m.group(1).lower()
     68                tags.append(tag)
     69            continue
     70        # Check for close tag
     71        m = re_tag_close.match(s, pos)
     72        if m:
     73            pos = m.end(0)
     74            if not elipsis_pos:
     75                tag = m.group(1).lower()
     76                try:
     77                    tags.remove(tag)
     78                except ValueError:
     79                    pass
     80            continue
     81        # Skip non-alphanumeric
     82        m = re_non_alphanumeric.match(s, pos)
     83        if m:
     84            pos = m.end(0)
     85            continue
     86        # Check for word
     87        m = re_word.match(s, pos)
     88        if m:
     89            pos = m.end(0)
     90            words += 1
     91            if words == length:
     92                elipsis_pos = pos
     93            if words > length:
     94                elipsis_required = 1
     95                break
     96            continue
     97        # Shouldn't ever actually get here
     98        break
     99    if elipsis_required:
     100        out = s[:elipsis_pos]
     101        if not out.endswith('...'):
     102            out += ' ...'
     103    else:
     104        out = s[:pos]
     105    # Look for closing tags for any tags still open
     106    tags.reverse()
     107    temppos = pos
     108    for tag in tags:
     109        while 1:
     110            m = re_tag_close.search(s, temppos)
     111            if m:
     112                temppos = m.end(0)
     113                if m.group(1) == tag:
     114                    out += m.group(0)
     115                    pos = temppos
     116                    break
     117            else:
     118                break
     119    return out
     120
    33121def get_valid_filename(s):
    34122    """
    35123    Returns the given string converted to a string that can be used for a clean
  • tests/othertests/defaultfilters.py

     
    6464>>> truncatewords('A sentence with a few words in it', 'not a number')
    6565'A sentence with a few words in it'
    6666
     67>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 0)
     68''
    6769
     70>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 2)
     71'<p>one <a href="#">two ...</a></p>'
     72
     73>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 4)
     74'<p>one <a href="#">two - three <br>four ...'
     75
     76>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 5)
     77'<p>one <a href="#">two - three <br>four</a> five</p>'
     78
     79>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 100)
     80'<p>one <a href="#">two - three <br>four</a> five</p>'
     81
    6882>>> upper('Mixed case input')
    6983'MIXED CASE INPUT'
    7084
Back to Top