Code

Ticket #8995: sitemaps.diff

File sitemaps.diff, 7.3 KB (added by john, 4 years ago)

Patch against r13357 for sitemap enhancements.

Line 
1Index: django/contrib/sitemaps/__init__.py
2===================================================================
3--- django/contrib/sitemaps/__init__.py (revision 13357)
4+++ django/contrib/sitemaps/__init__.py (working copy)
5@@ -1,42 +1,86 @@
6-from django.core import urlresolvers, paginator
7+import logging
8 import urllib
9 
10-PING_URL = "http://www.google.com/webmasters/tools/ping"
11+from django.conf import settings
12+from django.contrib.sites.models import Site
13+from django.core import urlresolvers, paginator
14 
15 class SitemapNotFound(Exception):
16     pass
17 
18-def ping_google(sitemap_url=None, ping_url=PING_URL):
19+class Pinger(object):
20     """
21-    Alerts Google that the sitemap for the current site has been updated.
22-    If sitemap_url is provided, it should be an absolute path to the sitemap
23-    for this site -- e.g., '/sitemap.xml'. If sitemap_url is not provided, this
24-    function will attempt to deduce it by using urlresolvers.reverse().
25+
26+    Superclass for alerting search engines that the sitemap for the current
27+    site has been updated.  If sitemap_url is provided to the constructor, it
28+    should be an absolute path to the sitemap for this site -- e.g.,
29+    '/sitemap.xml'. If sitemap_url is not provided, this function will attempt
30+    to deduce it by using urlresolvers.reverse().
31+
32+    The ping_url is specific to each search engine, thus each subclass, but
33+    can be overridden in the constructor arguments.
34+
35     """
36-    if sitemap_url is None:
37-        try:
38-            # First, try to get the "index" sitemap URL.
39-            sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.index')
40-        except urlresolvers.NoReverseMatch:
41+    logger = None
42+    name = None
43+    ping_url = None
44+    sitemap_url = None
45+   
46+    def __init__(self, sitemap_url=None, ping_url=None):
47+        if sitemap_url is None:
48             try:
49-                # Next, try for the "global" sitemap URL.
50-                sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap')
51+                # First, try to get the "index" sitemap URL.
52+                sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.index')
53             except urlresolvers.NoReverseMatch:
54-                pass
55+                try:
56+                    # Next, try for the "global" sitemap URL.
57+                    sitemap_url = urlresolvers.reverse('django.contrib.sitemaps.views.sitemap')
58+                except urlresolvers.NoReverseMatch:
59+                    raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.")
60 
61-    if sitemap_url is None:
62-        raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.")
63+        self.sitemap_url = "http://%s%s" % (Site.objects.get_current().domain, sitemap_url or get_sitemap_url())
64+        if ping_url:
65+            self.ping_url = ping_url
66+        self.logger = logging.getLogger("django.contrib.sitemaps.%s" % str(self.__class__.__name__))
67+   
68+    def ping(self):
69+        try:
70+            if settings.DEBUG:
71+                self.logger.debug("Pinging %s with sitemap %s..." % (self.name, self.sitemap_url))
72+            params = urllib.urlencode({'sitemap' : self.sitemap_url})
73+            u = urllib.urlopen("%s?%s" % (self.ping_url, params))
74+            if settings.DEBUG:
75+                self.logger.debug(u.read())
76+            self.logger.info("Pinged %s with sitemap %s." % (self.name, self.sitemap_url))
77+        except Exception, e:
78+            self.logger.error("%s ping failed: %s" % (self.name, e))
79 
80-    from django.contrib.sites.models import Site
81-    current_site = Site.objects.get_current()
82-    url = "http://%s%s" % (current_site.domain, sitemap_url)
83-    params = urllib.urlencode({'sitemap':url})
84-    urllib.urlopen("%s?%s" % (ping_url, params))
85+class AskPinger(Pinger):
86+    name = 'Ask'
87+    ping_url = 'http://submissions.ask.com/ping'
88 
89+class GooglePinger(Pinger):
90+    name = 'Google'
91+    ping_url = 'http://www.google.com/webmasters/tools/ping'
92+
93+class LiveSearchPinger(Pinger):
94+    name = 'Live Search'
95+    ping_url = 'http://webmaster.live.com/ping.aspx'
96+
97+def ping_google(sitemap_url=None, ping_url=None):
98+    GooglePinger(sitemap_url=sitemap_url).ping()
99+
100+def ping_search_engines(sitemap_url=None):
101+    pingers = getattr(settings, 'SITEMAP_PINGERS', [AskPinger, GooglePinger, LiveSearchPinger])
102+    for pinger in pingers:
103+        pinger = pinger(sitemap_url=sitemap_url)
104+        pinger.ping()
105+
106 class Sitemap(object):
107     # This limit is defined by Google. See the index documentation at
108     # http://sitemaps.org/protocol.php#index.
109     limit = 50000
110+    protocol = 'http'
111 
112     def __get(self, name, obj, default=None):
113         try:
114@@ -64,7 +108,7 @@
115         current_site = Site.objects.get_current()
116         urls = []
117         for item in self.paginator.page(page).object_list:
118-            loc = "http://%s%s" % (current_site.domain, self.__get('location', item))
119+            loc = "%s://%s%s" % (self.protocol, current_site.domain, self.__get('location', item))
120             url_info = {
121                 'location':   loc,
122                 'lastmod':    self.__get('lastmod', item, None),
123Index: django/contrib/sitemaps/management/commands/ping_search_engines.py
124===================================================================
125--- django/contrib/sitemaps/management/commands/ping_search_engines.py  (revision 0)
126+++ django/contrib/sitemaps/management/commands/ping_search_engines.py  (revision 0)
127@@ -0,0 +1,20 @@
128+import logging
129+import sys
130+
131+from django.conf import settings
132+from django.core.management.base import BaseCommand
133+from django.contrib.sitemaps import ping_search_engines
134+
135+
136+class Command(BaseCommand):
137+    help = "Ping all supported search engines with an updated sitemap and optionally, the url of the sitemap."
138+
139+    def execute(self, *args, **options):
140+        logging.root.addHandler(logging.StreamHandler(sys.stderr))
141+        logging.root.setLevel(settings.DEBUG and logging.DEBUG or logging.INFO)
142+        if len(args) == 1:
143+            sitemap_url = args[0]
144+        else:
145+            sitemap_url = None
146+        ping_search_engines(sitemap_url=sitemap_url)
147+
148Index: docs/ref/contrib/sitemaps.txt
149===================================================================
150--- docs/ref/contrib/sitemaps.txt       (revision 13357)
151+++ docs/ref/contrib/sitemaps.txt       (working copy)
152@@ -209,6 +209,11 @@
153 
154         .. _sitemaps.org documentation: http://www.sitemaps.org/protocol.html#prioritydef
155 
156+    .. attribute:: Sitemap.protocol
157+
158+        **Optional.** A string containing the protocol to be used in the URL of each
159+        item in the sitemap. The default is 'http'; if your site is all HTTPS, set it to 'https'.
160+
161 Shortcuts
162 =========
163 
164@@ -348,3 +353,14 @@
165 ping the Google server's through the command line manage.py interface::
166 
167     python manage.py ping_google [/sitemap.xml]
168+
169+New in Django development version
170+---------------------------------
171+
172+Support for pinging other search engines is now available. There is a
173+new function, :func:`ping_search_engines`, that can be used just like
174+:func:`ping_google`. It notifies Ask and Live Search in addition to
175+Google. There is a corresponding manage.py command, which again
176+works just like ping_google:
177+
178+    python manage.py ping_search_engines [/sitemap.xml]