Ticket #5025: 5025.2.diff

File 5025.2.diff, 17.9 KB (added by Chris Beaven, 13 years ago)
  • django/template/defaultfilters.py

    diff --git a/django/template/defaultfilters.py b/django/template/defaultfilters.py
    index 60fa59e..3d7129d 100644
    a b from functools import wraps  
    88from django.template.base import Variable, Library
    99from django.conf import settings
    1010from django.utils import formats
     11from django.utils import text as text_utils
    1112from django.utils.encoding import force_unicode, iri_to_uri
    1213from django.utils.html import conditional_escape
    1314from django.utils.safestring import mark_safe, SafeData
    def title(value):  
    239240title.is_safe = True
    240241title = stringfilter(title)
    241242
     243def truncatechars(value, arg):
     244    """
     245    Truncates a string after a certain number of characters.
     246   
     247    Argument: Number of characters to truncate after.
     248    """
     249    try:
     250        length = int(arg)
     251    except ValueError: # Invalid literal for int().
     252        return value # Fail silently.
     253    return text_utils.Truncator(value).chars(value, length)
     254truncatechars.is_safe = True
     255truncatechars = stringfilter(truncatechars)
     256
    242257def truncatewords(value, arg):
    243258    """
    244259    Truncates a string after a certain number of words.
    def truncatewords(value, arg):  
    247262
    248263    Newlines within the string are removed.
    249264    """
    250     from django.utils.text import truncate_words
    251265    try:
    252266        length = int(arg)
    253267    except ValueError: # Invalid literal for int().
    254268        return value # Fail silently.
    255     return truncate_words(value, length)
     269    return text_utils.Truncator(value).words(length, truncate=' ...')
    256270truncatewords.is_safe = True
    257271truncatewords = stringfilter(truncatewords)
    258272
    def truncatewords_html(value, arg):  
    264278
    265279    Newlines in the HTML are preserved.
    266280    """
    267     from django.utils.text import truncate_html_words
    268281    try:
    269282        length = int(arg)
    270283    except ValueError: # invalid literal for int()
    271284        return value # Fail silently.
    272     return truncate_html_words(value, length)
     285    return text_utils.Truncator(value).words(length, html=True,
     286                                             truncate=' ...')
    273287truncatewords_html.is_safe = True
    274288truncatewords_html = stringfilter(truncatewords_html)
    275289
  • django/utils/text.py

    diff --git a/django/utils/text.py b/django/utils/text.py
    index 00c999c..80198f7 100644
    a b  
    11import re
     2import unicodedata
    23from django.utils.encoding import force_unicode
    3 from django.utils.functional import allow_lazy
    4 from django.utils.translation import ugettext_lazy, ugettext as _
     4from django.utils.functional import allow_lazy, LazyObject
     5from django.utils.translation import ugettext_lazy, ugettext as _, pgettext
    56from htmlentitydefs import name2codepoint
    67
    78# Capitalizes the first letter of a string.
    def wrap(text, width):  
    3637    return u''.join(_generator())
    3738wrap = allow_lazy(wrap, unicode)
    3839
    39 def truncate_words(s, num, end_text='...'):
    40     """Truncates a string after a certain number of words. Takes an optional
    41     argument of what should be used to notify that the string has been
    42     truncated, defaulting to ellipsis (...)
    43 
    44     Newlines in the string will be stripped.
     40class Truncator(LazyObject):
     41    """
     42    An object used to truncate text, either by characters or words.
    4543    """
    46     s = force_unicode(s)
    47     length = int(num)
    48     words = s.split()
    49     if len(words) > length:
    50         words = words[:length]
    51         if not words[-1].endswith(end_text):
    52             words.append(end_text)
    53     return u' '.join(words)
    54 truncate_words = allow_lazy(truncate_words, unicode)
    5544
    56 def truncate_html_words(s, num, end_text='...'):
    57     """Truncates HTML to a certain number of words (not counting tags and
    58     comments). Closes opened tags if they were correctly closed in the given
    59     html. Takes an optional argument of what should be used to notify that the
    60     string has been truncated, defaulting to ellipsis (...).
     45    def __init__(self, text):
     46        self.__dict__['text'] = text
     47        super(Truncator, self).__init__()
    6148
    62     Newlines in the HTML are preserved.
    63     """
    64     s = force_unicode(s)
    65     length = int(num)
    66     if length <= 0:
    67         return u''
    68     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
    69     # Set up regular expressions
    70     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
    71     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
    72     # Count non-HTML words and keep note of open tags
    73     pos = 0
    74     end_text_pos = 0
    75     words = 0
    76     open_tags = []
    77     while words <= length:
    78         m = re_words.search(s, pos)
    79         if not m:
    80             # Checked through whole string
    81             break
    82         pos = m.end(0)
    83         if m.group(1):
    84             # It's an actual non-HTML word
    85             words += 1
    86             if words == length:
    87                 end_text_pos = pos
    88             continue
    89         # Check for tag
    90         tag = re_tag.match(m.group(0))
    91         if not tag or end_text_pos:
    92             # Don't worry about non tags or tags after our truncate point
    93             continue
    94         closing_tag, tagname, self_closing = tag.groups()
    95         tagname = tagname.lower()  # Element names are always case-insensitive
    96         if self_closing or tagname in html4_singlets:
    97             pass
    98         elif closing_tag:
    99             # Check for match in open tags list
    100             try:
    101                 i = open_tags.index(tagname)
    102             except ValueError:
     49    def _setup(self):
     50        self.text = force_unicode(self.text)
     51
     52    def add_truncation_text(self, text, truncate=None):
     53        if truncate is None:
     54            truncate = pgettext('String to return when truncating text',
     55                u'%(truncated_text)s...', )
     56        truncate = force_unicode(truncate)
     57        if '%(truncated_text)s' in truncate:
     58            return truncate % {'truncated_text': text}
     59        # The truncation text didn't contain the %(truncated_text)s string
     60        # replacement argument so just append it to the text.
     61        if text.endswith(truncate):
     62            # But don't append the truncation text if the current text already
     63            # ends in this.
     64            return text
     65        return '%s%s' % (text, truncate)
     66
     67    def chars(self, num, truncate=None):
     68        """
     69        Returns the text truncated to be no longer than the specified number of
     70        characters.
     71
     72        Takes an optional argument of what should be used to notify that the
     73        string has been truncated, defaulting to a translatable string of an
     74        ellipsis (...).
     75        """
     76        length = int(num)
     77        text = unicodedata.normalize('NFC', self.text)
     78
     79        # Calculate the length to truncate to (max length - end_text length)
     80        truncate_len = length
     81        for char in self.add_truncation_text('', truncate):
     82            if not unicodedata.combining(char):
     83                truncate_len -= 1
     84                if truncate_len == 0:
     85                    break
     86
     87        s_len = 0
     88        end_index = None
     89        for i, char in enumerate(text):
     90            if unicodedata.combining(char):
     91                # Don't consider combining characters as adding to the string
     92                # length
     93                continue
     94            s_len += 1
     95            if end_index is None and s_len > truncate_len:
     96                end_index = i
     97            if s_len > length:
     98                # Return the truncated string
     99                return self.add_truncation_text(text[:end_index or 0],
     100                                                truncate)
     101
     102        # Return the original string since no truncation was necessary
     103        return text
     104    chars = allow_lazy(chars)
     105
     106    def words(self, num, truncate=None, html=False):
     107        """
     108        Truncates a string after a certain number of words. Takes an optional
     109        argument of what should be used to notify that the string has been
     110        truncated, defaulting to ellipsis (...).
     111        """
     112        length = int(num)
     113        if html:
     114            return self._html_words(length, truncate)
     115        return self._text_words(length, truncate)
     116    words = allow_lazy(words)
     117
     118    def _text_words(self, length, truncate):
     119        """
     120        Truncates a string after a certain number of words.
     121
     122        Newlines in the string will be stripped.
     123        """
     124        words = self.text.split()
     125        if len(words) > length:
     126            words = words[:length]
     127            return self.add_truncation_text(u' '.join(words), truncate)
     128        return u' '.join(words)
     129
     130    def _html_words(self, length, truncate):
     131        """
     132        Truncates HTML to a certain number of words (not counting tags and
     133        comments). Closes opened tags if they were correctly closed in the
     134        given HTML.
     135
     136        Newlines in the HTML are preserved.
     137        """
     138        if length <= 0:
     139            return u''
     140        html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area',
     141            'hr', 'input')
     142        # Set up regular expressions
     143        re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
     144        re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
     145        # Count non-HTML words and keep note of open tags
     146        pos = 0
     147        end_text_pos = 0
     148        words = 0
     149        open_tags = []
     150        while words <= length:
     151            m = re_words.search(self.text, pos)
     152            if not m:
     153                # Checked through whole string
     154                break
     155            pos = m.end(0)
     156            if m.group(1):
     157                # It's an actual non-HTML word
     158                words += 1
     159                if words == length:
     160                    end_text_pos = pos
     161                continue
     162            # Check for tag
     163            tag = re_tag.match(m.group(0))
     164            if not tag or end_text_pos:
     165                # Don't worry about non tags or tags after our truncate point
     166                continue
     167            closing_tag, tagname, self_closing = tag.groups()
     168            # Element names are always case-insensitive
     169            tagname = tagname.lower()
     170            if self_closing or tagname in html4_singlets:
    103171                pass
     172            elif closing_tag:
     173                # Check for match in open tags list
     174                try:
     175                    i = open_tags.index(tagname)
     176                except ValueError:
     177                    pass
     178                else:
     179                    # SGML: An end tag closes, back to the matching start tag,
     180                    # all unclosed intervening start tags with omitted end tags
     181                    open_tags = open_tags[i + 1:]
    104182            else:
    105                 # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
    106                 open_tags = open_tags[i+1:]
    107         else:
    108             # Add it to the start of the open tags list
    109             open_tags.insert(0, tagname)
    110     if words <= length:
    111         # Don't try to close tags if we don't need to truncate
    112         return s
    113     out = s[:end_text_pos]
    114     if end_text:
    115         out += ' ' + end_text
    116     # Close any tags still open
    117     for tag in open_tags:
    118         out += '</%s>' % tag
    119     # Return string
    120     return out
     183                # Add it to the start of the open tags list
     184                open_tags.insert(0, tagname)
     185        if words <= length:
     186            # Don't try to close tags if we don't need to truncate
     187            return self.text
     188        out = self.text[:end_text_pos]
     189        truncate_text = self.add_truncation_text('', truncate)
     190        if truncate_text:
     191            out += truncate_text
     192        # Close any tags still open
     193        for tag in open_tags:
     194            out += '</%s>' % tag
     195        # Return string
     196        return out
     197
     198def truncate_words(s, num, end_text='...'):
     199    import warnings
     200    warnings.warn('This function has been deprecated. Use the Truncator class '
     201        'in django.utils.text instead.', category=PendingDeprecationWarning)
     202    truncate = end_text and ' %s' % end_text or ''
     203    return Truncator(s).words(num, truncate=truncate)
     204truncate_words = allow_lazy(truncate_words, unicode)
     205
     206def truncate_html_words(s, num, end_text='...'):
     207    import warnings
     208    warnings.warn('This function has been deprecated. Use the Truncator class '
     209        'in django.utils.text instead.', category=PendingDeprecationWarning)
     210    truncate = end_text and ' %s' % end_text or ''
     211    return Truncator(s).words(num, truncate=truncate, html=True)
    121212truncate_html_words = allow_lazy(truncate_html_words, unicode)
    122213
    123214def get_valid_filename(s):
  • docs/ref/templates/builtins.txt

    diff --git a/docs/ref/templates/builtins.txt b/docs/ref/templates/builtins.txt
    index 29bed25..efddf02 100644
    a b For example::  
    20242024
    20252025If ``value`` is ``"my first post"``, the output will be ``"My First Post"``.
    20262026
     2027.. templatefilter:: truncatechars
     2028
     2029truncatechars
     2030~~~~~~~~~~~~~
     2031
     2032Truncates a string if it is longer than the specified number of characters.
     2033Truncated strings will end with a translatable ellipsis sequence ("...").
     2034
     2035**Argument:** Number of characters to truncate to
     2036
     2037For example::
     2038
     2039    {{ value|truncatechars:9 }}
     2040
     2041If ``value`` is ``"Joel is a slug"``, the output will be ``"Joel i..."``.
     2042
    20272043.. templatefilter:: truncatewords
    20282044
    20292045truncatewords
  • docs/releases/1.4.txt

    diff --git a/docs/releases/1.4.txt b/docs/releases/1.4.txt
    index 496a4c9..d57cc49 100644
    a b A new helper function,  
    6060``template.Library`` to ease the creation of template tags that store some
    6161data in a specified context variable.
    6262
     63``truncatechars`` template filter
     64~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     65
     66Added a filter which truncates a string to be no longer than the specified
     67number of characters. Truncated strings end with a translatable ellipsis
     68sequence ("...").
     69
    6370.. _backwards-incompatible-changes-1.4:
    6471
    6572Backwards incompatible changes in 1.4
  • tests/regressiontests/utils/text.py

    diff --git a/tests/regressiontests/utils/text.py b/tests/regressiontests/utils/text.py
    index f565d87..921fde1 100644
    a b  
     1# -*- coding: utf-8 -*-
    12import unittest
    23
    34from django.utils import text
    45
    56class TestUtilsText(unittest.TestCase):
    67
     8    def test_truncate_chars(self):
     9        truncator = text.Truncator(
     10            u'The quick brown fox jumped over the lazy dog.'
     11        )
     12        self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
     13            truncator.chars(100)),
     14        self.assertEqual(u'The quick brown fox ...',
     15            truncator.chars(23)),
     16        self.assertEqual(u'The quick brown fo.....',
     17            truncator.chars(23, '.....')),
     18       
     19        # Ensure that we normalize our unicode data first
     20        nfc = text.Truncator(u'o\xfco\xfco\xfco\xfc')
     21        nfd = text.Truncator(u'ou\u0308ou\u0308ou\u0308ou\u0308')
     22        self.assertEqual(u'oüoüoüoü', nfc.chars(8))
     23        self.assertEqual(u'oüoüoüoü', nfd.chars(8))
     24        self.assertEqual(u'oü...', nfc.chars(5))
     25        self.assertEqual(u'oü...', nfd.chars(5))
     26       
     27        # Ensure the final length is calculated correctly when there are
     28        # combining characters with no precomposed form, and that combining
     29        # characters are not split up.
     30        truncator = text.Truncator(u'-B\u030AB\u030A----8')
     31        self.assertEqual(u'-B\u030A...', truncator.chars(5))
     32        self.assertEqual(u'-B\u030AB\u030A-...', truncator.chars(7))
     33        self.assertEqual(u'-B\u030AB\u030A----8', truncator.chars(8))
     34
     35        # Ensure the length of the end text is correctly calculated when it
     36        # contains combining characters with no precomposed form.
     37        truncator = text.Truncator(u'-----')
     38        self.assertEqual(u'---B\u030A', truncator.chars(4, u'B\u030A'))
     39        self.assertEqual(u'-----', truncator.chars(5, u'B\u030A'))
     40
     41        # Make a best effort to shorten to the desired length, but requesting
     42        # a length shorter than the ellipsis shouldn't break
     43        self.assertEqual(u'...', text.Truncator(u'asdf').chars(1))
     44
    745    def test_truncate_words(self):
     46        truncator = text.Truncator(u'The quick brown fox jumped over the lazy '
     47            'dog.')
     48        self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
     49            truncator.words(10))
     50        self.assertEqual(u'The quick brown fox...', truncator.words(4))
     51        self.assertEqual(u'The quick brown fox[snip]',
     52            truncator.words(4, '[snip]'))
     53
     54    def test_truncate_html_words(self):
     55        truncator = text.Truncator('<p><strong><em>The quick brown fox jumped '
     56            'over the lazy dog.</em></strong></p>')
     57        self.assertEqual(u'<p><strong><em>The quick brown fox jumped over the '
     58            'lazy dog.</em></strong></p>', truncator.words(10, html=True))
     59        self.assertEqual(u'<p><strong><em>The quick brown fox...</em>'
     60            '</strong></p>', truncator.words(4, html=True))
     61        self.assertEqual(u'<p><strong><em>The quick brown fox....</em>'
     62            '</strong></p>', truncator.words(4, '....', html=True))
     63        self.assertEqual(u'<p><strong><em>The quick brown fox</em></strong>'
     64            '</p>', truncator.words(4, '', html=True))
     65
     66    def test_old_truncate_words(self):
    867        self.assertEqual(u'The quick brown fox jumped over the lazy dog.',
    968            text.truncate_words(u'The quick brown fox jumped over the lazy dog.', 10))
    1069        self.assertEqual(u'The quick brown fox ...',
    class TestUtilsText(unittest.TestCase):  
    1271        self.assertEqual(u'The quick brown fox ....',
    1372            text.truncate_words('The quick brown fox jumped over the lazy dog.', 4, '....'))
    1473
    15     def test_truncate_html_words(self):
     74    def test_old_truncate_html_words(self):
    1675        self.assertEqual(u'<p><strong><em>The quick brown fox jumped over the lazy dog.</em></strong></p>',
    1776            text.truncate_html_words('<p><strong><em>The quick brown fox jumped over the lazy dog.</em></strong></p>', 10))
    1877        self.assertEqual(u'<p><strong><em>The quick brown fox ...</em></strong></p>',
Back to Top