Django

Code

Changeset 8088

Show
Ignore:
Timestamp:
07/26/08 00:07:16 (4 months ago)
Author:
mtredinnick
Message:

Fixed #7793 -- Handle sitemaps with more than 50,000 URLs in them (by using
pagination). Patch from Julian Bez.

The docs patch here could probably do with some rewording.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • django/trunk/AUTHORS

    r8075 r8088  
    7272    Chris Bennett <chrisrbennett@yahoo.com> 
    7373    James Bennett 
    74     Ben Godfrey <http://aftnn.org> 
     74    Julian Bez 
    7575    Arvis Bickovskis <viestards.lists@gmail.com> 
    7676    Paul Bissex <http://e-scribe.com/> 
     
    167167    martin.glueck@gmail.com 
    168168    Artyom Gnilov <boobsd@gmail.com> 
     169    Ben Godfrey <http://aftnn.org> 
    169170    GomoX <gomo@datafull.com> 
    170171    Guilherme Mesquita Gondim <semente@taurinus.org> 
  • django/trunk/django/contrib/sitemaps/__init__.py

    r5300 r8088  
    1 from django.core import urlresolvers 
     1from django.core import urlresolvers, paginator 
    22import urllib 
    33 
     
    3535 
    3636class Sitemap: 
     37    # This limit is defined by Google. See the index documentation at 
     38    # http://sitemaps.org/protocol.php#index. 
     39    limit = 50000 
     40 
    3741    def __get(self, name, obj, default=None): 
    3842        try: 
     
    5054        return obj.get_absolute_url() 
    5155 
    52     def get_urls(self): 
     56    def _get_paginator(self): 
     57        if not hasattr(self, "paginator"): 
     58            self.paginator = paginator.Paginator(self.items(), self.limit) 
     59        return self.paginator 
     60    paginator = property(_get_paginator) 
     61 
     62    def get_urls(self, page=1): 
    5363        from django.contrib.sites.models import Site 
    5464        current_site = Site.objects.get_current() 
    5565        urls = [] 
    56         for item in self.items()
     66        for item in self.paginator.page(page).object_list
    5767            loc = "http://%s%s" % (current_site.domain, self.__get('location', item)) 
    5868            url_info = { 
  • django/trunk/django/contrib/sitemaps/views.py

    r5609 r8088  
    44from django.core import urlresolvers 
    55from django.utils.encoding import smart_str 
     6from django.core.paginator import EmptyPage, PageNotAnInteger 
    67 
    78def index(request, sitemaps): 
     
    910    sites = [] 
    1011    protocol = request.is_secure() and 'https' or 'http' 
    11     for section in sitemaps.keys(): 
     12    for section, site in sitemaps.items(): 
     13        if callable(site): 
     14            pages = site().paginator.num_pages 
     15        else: 
     16            pages = site.paginator.num_pages 
    1217        sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap', kwargs={'section': section}) 
    1318        sites.append('%s://%s%s' % (protocol, current_site.domain, sitemap_url)) 
     19        if pages > 1: 
     20            for page in range(2, pages+1): 
     21                sites.append('%s://%s%s?p=%s' % (protocol, current_site.domain, sitemap_url, page)) 
    1422    xml = loader.render_to_string('sitemap_index.xml', {'sitemaps': sites}) 
    1523    return HttpResponse(xml, mimetype='application/xml') 
     
    2331    else: 
    2432        maps = sitemaps.values() 
     33    page = request.GET.get("p", 1) 
    2534    for site in maps: 
    26         if callable(site): 
    27             urls.extend(site().get_urls()) 
    28         else: 
    29             urls.extend(site.get_urls()) 
     35        try: 
     36            if callable(site): 
     37                urls.extend(site().get_urls(page)) 
     38            else: 
     39                urls.extend(site.get_urls(page)) 
     40        except EmptyPage: 
     41            raise Http404("Page %s empty" % page) 
     42        except PageNotAnInteger: 
     43            raise Http404("No page '%s'" % page) 
    3044    xml = smart_str(loader.render_to_string('sitemap.xml', {'urlset': urls})) 
    3145    return HttpResponse(xml, mimetype='application/xml') 
  • django/trunk/docs/sitemaps.txt

    r7813 r8088  
    283283classes and the ``sitemaps`` dict don't change at all. 
    284284 
     285If one of your sitemaps is going to have more than 50,000 URLs you should  
     286create an index file. Your sitemap will be paginated and the index will  
     287reflect that. 
     288 
    285289Pinging Google 
    286290==============