Ticket #8995: sitemaps.diff

File sitemaps.diff, 7.3 KB (added by john, 5 years ago)

Patch against r13357 for sitemap enhancements.

  • django/contrib/sitemaps/__init__.py

     
    1 from django.core import urlresolvers, paginator
     1import logging
    22import urllib
    33
    4 PING_URL = "http://www.google.com/webmasters/tools/ping"
     4from django.conf import settings
     5from django.contrib.sites.models import Site
     6from django.core import urlresolvers, paginator
    57
    68class SitemapNotFound(Exception):
    79    pass
    810
    9 def ping_google(sitemap_url=None, ping_url=PING_URL):
     11class Pinger(object):
    1012    """
    11     Alerts Google that the sitemap for the current site has been updated.
    12     If sitemap_url is provided, it should be an absolute path to the sitemap
    13     for this site -- e.g., '/sitemap.xml'. If sitemap_url is not provided, this
    14     function will attempt to deduce it by using urlresolvers.reverse().
     13
     14    Superclass for alerting search engines that the sitemap for the current
     15    site has been updated.  If sitemap_url is provided to the constructor, it
     16    should be an absolute path to the sitemap for this site -- e.g.,
     17    '/sitemap.xml'. If sitemap_url is not provided, this function will attempt
     18    to deduce it by using urlresolvers.reverse().
     19
     20    The ping_url is specific to each search engine, thus each subclass, but
     21    can be overridden in the constructor arguments.
     22
    1523    """
    16     if sitemap_url is None:
    17         try:
    18             # First, try to get the "index" sitemap URL.
    19             sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.index')
    20         except urlresolvers.NoReverseMatch:
     24    logger = None
     25    name = None
     26    ping_url = None
     27    sitemap_url = None
     28   
     29    def __init__(self, sitemap_url=None, ping_url=None):
     30        if sitemap_url is None:
    2131            try:
    22                 # Next, try for the "global" sitemap URL.
    23                 sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap')
     32                # First, try to get the "index" sitemap URL.
     33                sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.index')
    2434            except urlresolvers.NoReverseMatch:
    25                 pass
     35                try:
     36                    # Next, try for the "global" sitemap URL.
     37                    sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap')
     38                except urlresolvers.NoReverseMatch:
     39                    raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.")
    2640
    27     if sitemap_url is None:
    28         raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.")
     41        self.sitemap_url = "http://%s%s" % (Site.objects.get_current().domain, sitemap_url or get_sitemap_url())
     42        if ping_url:
     43            self.ping_url = ping_url
     44        self.logger = logging.getLogger("django.contrib.sitemaps.%s" % str(self.__class__.__name__))
     45   
     46    def ping(self):
     47        try:
     48            if settings.DEBUG:
     49                self.logger.debug("Pinging %s with sitemap %s..." % (self.name, self.sitemap_url))
     50            params = urllib.urlencode({'sitemap' : self.sitemap_url})
     51            u = urllib.urlopen("%s?%s" % (self.ping_url, params))
     52            if settings.DEBUG:
     53                self.logger.debug(u.read())
     54            self.logger.info("Pinged %s with sitemap %s." % (self.name, self.sitemap_url))
     55        except Exception, e:
     56            self.logger.error("%s ping failed: %s" % (self.name, e))
    2957
    30     from django.contrib.sites.models import Site
    31     current_site = Site.objects.get_current()
    32     url = "http://%s%s" % (current_site.domain, sitemap_url)
    33     params = urllib.urlencode({'sitemap':url})
    34     urllib.urlopen("%s?%s" % (ping_url, params))
     58class AskPinger(Pinger):
     59    name = 'Ask'
     60    ping_url = 'http://submissions.ask.com/ping'
    3561
     62class GooglePinger(Pinger):
     63    name = 'Google'
     64    ping_url = 'http://www.google.com/webmasters/tools/ping'
     65
     66class LiveSearchPinger(Pinger):
     67    name = 'Live Search'
     68    ping_url = 'http://webmaster.live.com/ping.aspx'
     69
     70def ping_google(sitemap_url=None, ping_url=None):
     71    GooglePinger(sitemap_url=sitemap_url).ping()
     72
     73def ping_search_engines(sitemap_url=None):
     74    pingers = getattr(settings, 'SITEMAP_PINGERS', [AskPinger, GooglePinger, LiveSearchPinger])
     75    for pinger in pingers:
     76        pinger = pinger(sitemap_url=sitemap_url)
     77        pinger.ping()
     78
    3679class Sitemap(object):
    3780    # This limit is defined by Google. See the index documentation at
    3881    # http://sitemaps.org/protocol.php#index.
    3982    limit = 50000
     83    protocol = 'http'
    4084
    4185    def __get(self, name, obj, default=None):
    4286        try:
     
    64108        current_site = Site.objects.get_current()
    65109        urls = []
    66110        for item in self.paginator.page(page).object_list:
    67             loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
     111            loc = "%s://%s%s" % (self.protocol, current_site.domain, self.__get('location', item))
    68112            url_info = {
    69113                'location':   loc,
    70114                'lastmod':    self.__get('lastmod', item, None),
  • django/contrib/sitemaps/management/commands/ping_search_engines.py

     
     1import logging
     2import sys
     3
     4from django.conf import settings
     5from django.core.management.base import BaseCommand
     6from django.contrib.sitemaps import ping_search_engines
     7
     8
     9class Command(BaseCommand):
     10    help = "Ping all supported search engines with an updated sitemap and optionally, the url of the sitemap."
     11
     12    def execute(self, *args, **options):
     13        logging.root.addHandler(logging.StreamHandler(sys.stderr))
     14        logging.root.setLevel(settings.DEBUG and logging.DEBUG or logging.INFO)
     15        if len(args) == 1:
     16            sitemap_url = args[0]
     17        else:
     18            sitemap_url = None
     19        ping_search_engines(sitemap_url=sitemap_url)
     20
  • docs/ref/contrib/sitemaps.txt

     
    209209
    210210        .. _sitemaps.org documentation: http://www.sitemaps.org/protocol.html#prioritydef
    211211
     212    .. attribute:: Sitemap.protocol
     213
     214        **Optional.** A string containing the protocol to be used in the URL of each
     215        item in the sitemap. The default is 'http'; if your site is all HTTPS, set it to 'https'.
     216
    212217Shortcuts
    213218=========
    214219
     
    348353ping the Google server's through the command line manage.py interface::
    349354
    350355    python manage.py ping_google [/sitemap.xml]
     356
     357New in Django development version
     358---------------------------------
     359
     360Support for pinging other search engines is now available. There is a
     361new function, :func:`ping_search_engines`, that can be used just like
     362:func:`ping_google`. It notifies Ask and Live Search in addition to
     363Google. There is a corresponding manage.py command, which again
     364works just like ping_google:
     365
     366    python manage.py ping_search_engines [/sitemap.xml]
Back to Top