Django

Code

Changeset 4468

Show
Ignore:
Timestamp:
02/09/07 20:51:27 (2 years ago)
Author:
mtredinnick
Message:

Fixed #2027 -- added truncatewords_html filter that respects HTML tags whilst
truncating. Patch from SmileyChris?.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • django/trunk/django/template/defaultfilters.py

    r4274 r4468  
    119119        value = str(value) 
    120120    return truncate_words(value, length) 
     121 
     122def truncatewords_html(value, arg): 
     123    """ 
     124    Truncates HTML after a certain number of words 
     125 
     126    Argument: Number of words to truncate after 
     127    """ 
     128    from django.utils.text import truncate_html_words 
     129    try: 
     130        length = int(arg) 
     131    except ValueError: # invalid literal for int() 
     132        return value # Fail silently. 
     133    if not isinstance(value, basestring): 
     134        value = str(value) 
     135    return truncate_html_words(value, length) 
    121136 
    122137def upper(value): 
     
    535550register.filter(title) 
    536551register.filter(truncatewords) 
     552register.filter(truncatewords_html) 
    537553register.filter(unordered_list) 
    538554register.filter(upper) 
  • django/trunk/django/utils/text.py

    r4213 r4468  
    4242    return ' '.join(words) 
    4343 
     44def truncate_html_words(s, num): 
     45    """ 
     46    Truncates html to a certain number of words (not counting tags and comments). 
     47    Closes opened tags if they were correctly closed in the given html. 
     48    """ 
     49    length = int(num) 
     50    if length <= 0: 
     51        return '' 
     52    html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input') 
     53    # Set up regular expressions 
     54    re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)') 
     55    re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>') 
     56    # Count non-HTML words and keep note of open tags 
     57    pos = 0 
     58    ellipsis_pos = 0 
     59    words = 0 
     60    open_tags = [] 
     61    while words <= length: 
     62        m = re_words.search(s, pos) 
     63        if not m: 
     64            # Checked through whole string 
     65            break 
     66        pos = m.end(0) 
     67        if m.group(1): 
     68            # It's an actual non-HTML word 
     69            words += 1 
     70            if words == length: 
     71                ellipsis_pos = pos 
     72            continue 
     73        # Check for tag 
     74        tag = re_tag.match(m.group(0)) 
     75        if not tag or ellipsis_pos: 
     76            # Don't worry about non tags or tags after our truncate point 
     77            continue 
     78        closing_tag, tagname, self_closing = tag.groups() 
     79        tagname = tagname.lower()  # Element names are always case-insensitive 
     80        if self_closing or tagname in html4_singlets: 
     81            pass 
     82        elif closing_tag: 
     83            # Check for match in open tags list 
     84            try: 
     85                i = open_tags.index(tagname) 
     86            except ValueError: 
     87                pass 
     88            else: 
     89                # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags 
     90                open_tags = open_tags[i+1:] 
     91        else: 
     92            # Add it to the start of the open tags list 
     93            open_tags.insert(0, tagname) 
     94    if words <= length: 
     95        # Don't try to close tags if we don't need to truncate 
     96        return s 
     97    out = s[:ellipsis_pos] + ' ...' 
     98    # Close any tags still open 
     99    for tag in open_tags: 
     100        out += '</%s>' % tag 
     101    # Return string 
     102    return out 
     103 
    44104def get_valid_filename(s): 
    45105    """ 
  • django/trunk/tests/regressiontests/defaultfilters/tests.py

    r4274 r4468  
    8888'A sentence with a few words in it' 
    8989 
     90>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 0)  
     91'' 
     92  
     93>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 2)  
     94'<p>one <a href="#">two ...</a></p>' 
     95  
     96>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 4)  
     97'<p>one <a href="#">two - three <br>four ...</a></p>' 
     98 
     99>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 5)  
     100'<p>one <a href="#">two - three <br>four</a> five</p>' 
     101 
     102>>> truncatewords_html('<p>one <a href="#">two - three <br>four</a> five</p>', 100)  
     103'<p>one <a href="#">two - three <br>four</a> five</p>' 
    90104 
    91105>>> upper('Mixed case input')