Django

Code

root/django/trunk/django/utils/feedgenerator.py

Revision 10801, 13.9 kB (checked in by kmtracey, 2 months ago)

Fixed #11066 -- Corrected 15 duplicate "the"s found in docs and code comments. Thanks kaikuehne.

  • Property svn:eol-style set to native
Line 
1 """
2 Syndication feed generation library -- used for generating RSS, etc.
3
4 Sample usage:
5
6 >>> from django.utils import feedgenerator
7 >>> feed = feedgenerator.Rss201rev2Feed(
8 ...     title=u"Poynter E-Media Tidbits",
9 ...     link=u"http://www.poynter.org/column.asp?id=31",
10 ...     description=u"A group weblog by the sharpest minds in online media/journalism/publishing.",
11 ...     language=u"en",
12 ... )
13 >>> feed.add_item(title="Hello", link=u"http://www.holovaty.com/test/", description="Testing.")
14 >>> fp = open('test.rss', 'w')
15 >>> feed.write(fp, 'utf-8')
16 >>> fp.close()
17
18 For definitions of the different versions of RSS, see:
19 http://diveintomark.org/archives/2004/02/04/incompatible-rss
20 """
21
22 import re
23 import datetime
24 from django.utils.xmlutils import SimplerXMLGenerator
25 from django.utils.encoding import force_unicode, iri_to_uri
26
27 def rfc2822_date(date):
28     # We do this ourselves to be timezone aware, email.Utils is not tz aware.
29     if date.tzinfo:
30         time_str = date.strftime('%a, %d %b %Y %H:%M:%S ')
31         offset = date.tzinfo.utcoffset(date)
32         timezone = (offset.days * 24 * 60) + (offset.seconds / 60)
33         hour, minute = divmod(timezone, 60)
34         return time_str + "%+03d%02d" % (hour, minute)
35     else:
36         return date.strftime('%a, %d %b %Y %H:%M:%S -0000')
37
38 def rfc3339_date(date):
39     if date.tzinfo:
40         time_str = date.strftime('%Y-%m-%dT%H:%M:%S')
41         offset = date.tzinfo.utcoffset(date)
42         timezone = (offset.days * 24 * 60) + (offset.seconds / 60)
43         hour, minute = divmod(timezone, 60)
44         return time_str + "%+03d:%02d" % (hour, minute)
45     else:
46         return date.strftime('%Y-%m-%dT%H:%M:%SZ')
47
48 def get_tag_uri(url, date):
49     "Creates a TagURI. See http://diveintomark.org/archives/2004/05/28/howto-atom-id"
50     tag = re.sub('^http://', '', url)
51     if date is not None:
52         tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1)
53     tag = re.sub('#', '/', tag)
54     return u'tag:' + tag
55
56 class SyndicationFeed(object):
57     "Base class for all syndication feeds. Subclasses should provide write()"
58     def __init__(self, title, link, description, language=None, author_email=None,
59             author_name=None, author_link=None, subtitle=None, categories=None,
60             feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs):
61         to_unicode = lambda s: force_unicode(s, strings_only=True)
62         if categories:
63             categories = [force_unicode(c) for c in categories]
64         self.feed = {
65             'title': to_unicode(title),
66             'link': iri_to_uri(link),
67             'description': to_unicode(description),
68             'language': to_unicode(language),
69             'author_email': to_unicode(author_email),
70             'author_name': to_unicode(author_name),
71             'author_link': iri_to_uri(author_link),
72             'subtitle': to_unicode(subtitle),
73             'categories': categories or (),
74             'feed_url': iri_to_uri(feed_url),
75             'feed_copyright': to_unicode(feed_copyright),
76             'id': feed_guid or link,
77             'ttl': ttl,
78         }
79         self.feed.update(kwargs)
80         self.items = []
81
82     def add_item(self, title, link, description, author_email=None,
83         author_name=None, author_link=None, pubdate=None, comments=None,
84         unique_id=None, enclosure=None, categories=(), item_copyright=None,
85         ttl=None, **kwargs):
86         """
87         Adds an item to the feed. All args are expected to be Python Unicode
88         objects except pubdate, which is a datetime.datetime object, and
89         enclosure, which is an instance of the Enclosure class.
90         """
91         to_unicode = lambda s: force_unicode(s, strings_only=True)
92         if categories:
93             categories = [to_unicode(c) for c in categories]
94         item = {
95             'title': to_unicode(title),
96             'link': iri_to_uri(link),
97             'description': to_unicode(description),
98             'author_email': to_unicode(author_email),
99             'author_name': to_unicode(author_name),
100             'author_link': iri_to_uri(author_link),
101             'pubdate': pubdate,
102             'comments': to_unicode(comments),
103             'unique_id': to_unicode(unique_id),
104             'enclosure': enclosure,
105             'categories': categories or (),
106             'item_copyright': to_unicode(item_copyright),
107             'ttl': ttl,
108         }
109         item.update(kwargs)
110         self.items.append(item)
111
112     def num_items(self):
113         return len(self.items)
114
115     def root_attributes(self):
116         """
117         Return extra attributes to place on the root (i.e. feed/channel) element.
118         Called from write().
119         """
120         return {}
121
122     def add_root_elements(self, handler):
123         """
124         Add elements in the root (i.e. feed/channel) element. Called
125         from write().
126         """
127         pass
128
129     def item_attributes(self, item):
130         """
131         Return extra attributes to place on each item (i.e. item/entry) element.
132         """
133         return {}
134
135     def add_item_elements(self, handler, item):
136         """
137         Add elements on each item (i.e. item/entry) element.
138         """
139         pass
140
141     def write(self, outfile, encoding):
142         """
143         Outputs the feed in the given encoding to outfile, which is a file-like
144         object. Subclasses should override this.
145         """
146         raise NotImplementedError
147
148     def writeString(self, encoding):
149         """
150         Returns the feed in the given encoding as a string.
151         """
152         from StringIO import StringIO
153         s = StringIO()
154         self.write(s, encoding)
155         return s.getvalue()
156
157     def latest_post_date(self):
158         """
159         Returns the latest item's pubdate. If none of them have a pubdate,
160         this returns the current date/time.
161         """
162         updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None]
163         if len(updates) > 0:
164             updates.sort()
165             return updates[-1]
166         else:
167             return datetime.datetime.now()
168
169 class Enclosure(object):
170     "Represents an RSS enclosure"
171     def __init__(self, url, length, mime_type):
172         "All args are expected to be Python Unicode objects"
173         self.length, self.mime_type = length, mime_type
174         self.url = iri_to_uri(url)
175
176 class RssFeed(SyndicationFeed):
177     mime_type = 'application/rss+xml'
178     def write(self, outfile, encoding):
179         handler = SimplerXMLGenerator(outfile, encoding)
180         handler.startDocument()
181         handler.startElement(u"rss", self.rss_attributes())
182         handler.startElement(u"channel", self.root_attributes())
183         self.add_root_elements(handler)
184         self.write_items(handler)
185         self.endChannelElement(handler)
186         handler.endElement(u"rss")
187
188     def rss_attributes(self):
189         return {u"version": self._version}
190
191     def write_items(self, handler):
192         for item in self.items:
193             handler.startElement(u'item', self.item_attributes(item))
194             self.add_item_elements(handler, item)
195             handler.endElement(u"item")
196
197     def add_root_elements(self, handler):
198         handler.addQuickElement(u"title", self.feed['title'])
199         handler.addQuickElement(u"link", self.feed['link'])
200         handler.addQuickElement(u"description", self.feed['description'])
201         if self.feed['language'] is not None:
202             handler.addQuickElement(u"language", self.feed['language'])
203         for cat in self.feed['categories']:
204             handler.addQuickElement(u"category", cat)
205         if self.feed['feed_copyright'] is not None:
206             handler.addQuickElement(u"copyright", self.feed['feed_copyright'])
207         handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('utf-8'))
208         if self.feed['ttl'] is not None:
209             handler.addQuickElement(u"ttl", self.feed['ttl'])
210
211     def endChannelElement(self, handler):
212         handler.endElement(u"channel")
213
214 class RssUserland091Feed(RssFeed):
215     _version = u"0.91"
216     def add_item_elements(self, handler, item):
217         handler.addQuickElement(u"title", item['title'])
218         handler.addQuickElement(u"link", item['link'])
219         if item['description'] is not None:
220             handler.addQuickElement(u"description", item['description'])
221
222 class Rss201rev2Feed(RssFeed):
223     # Spec: http://blogs.law.harvard.edu/tech/rss
224     _version = u"2.0"
225     def add_item_elements(self, handler, item):
226         handler.addQuickElement(u"title", item['title'])
227         handler.addQuickElement(u"link", item['link'])
228         if item['description'] is not None:
229             handler.addQuickElement(u"description", item['description'])
230
231         # Author information.
232         if item["author_name"] and item["author_email"]:
233             handler.addQuickElement(u"author", "%s (%s)" % \
234                 (item['author_email'], item['author_name']))
235         elif item["author_email"]:
236             handler.addQuickElement(u"author", item["author_email"])
237         elif item["author_name"]:
238             handler.addQuickElement(u"dc:creator", item["author_name"], {"xmlns:dc": u"http://purl.org/dc/elements/1.1/"})
239
240         if item['pubdate'] is not None:
241             handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('utf-8'))
242         if item['comments'] is not None:
243             handler.addQuickElement(u"comments", item['comments'])
244         if item['unique_id'] is not None:
245             handler.addQuickElement(u"guid", item['unique_id'])
246         if item['ttl'] is not None:
247             handler.addQuickElement(u"ttl", item['ttl'])
248
249         # Enclosure.
250         if item['enclosure'] is not None:
251             handler.addQuickElement(u"enclosure", '',
252                 {u"url": item['enclosure'].url, u"length": item['enclosure'].length,
253                     u"type": item['enclosure'].mime_type})
254
255         # Categories.
256         for cat in item['categories']:
257             handler.addQuickElement(u"category", cat)
258
259 class Atom1Feed(SyndicationFeed):
260     # Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
261     mime_type = 'application/atom+xml'
262     ns = u"http://www.w3.org/2005/Atom"
263
264     def write(self, outfile, encoding):
265         handler = SimplerXMLGenerator(outfile, encoding)
266         handler.startDocument()
267         handler.startElement(u'feed', self.root_attributes())
268         self.add_root_elements(handler)
269         self.write_items(handler)
270         handler.endElement(u"feed")
271
272     def root_attributes(self):
273         if self.feed['language'] is not None:
274             return {u"xmlns": self.ns, u"xml:lang": self.feed['language']}
275         else:
276             return {u"xmlns": self.ns}
277
278     def add_root_elements(self, handler):
279         handler.addQuickElement(u"title", self.feed['title'])
280         handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']})
281         if self.feed['feed_url'] is not None:
282             handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']})
283         handler.addQuickElement(u"id", self.feed['id'])
284         handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('utf-8'))
285         if self.feed['author_name'] is not None:
286             handler.startElement(u"author", {})
287             handler.addQuickElement(u"name", self.feed['author_name'])
288             if self.feed['author_email'] is not None:
289                 handler.addQuickElement(u"email", self.feed['author_email'])
290             if self.feed['author_link'] is not None:
291                 handler.addQuickElement(u"uri", self.feed['author_link'])
292             handler.endElement(u"author")
293         if self.feed['subtitle'] is not None:
294             handler.addQuickElement(u"subtitle", self.feed['subtitle'])
295         for cat in self.feed['categories']:
296             handler.addQuickElement(u"category", "", {u"term": cat})
297         if self.feed['feed_copyright'] is not None:
298             handler.addQuickElement(u"rights", self.feed['feed_copyright'])
299
300     def write_items(self, handler):
301         for item in self.items:
302             handler.startElement(u"entry", self.item_attributes(item))
303             self.add_item_elements(handler, item)
304             handler.endElement(u"entry")
305
306     def add_item_elements(self, handler, item):
307         handler.addQuickElement(u"title", item['title'])
308         handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"})
309         if item['pubdate'] is not None:
310             handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8'))
311
312         # Author information.
313         if item['author_name'] is not None:
314             handler.startElement(u"author", {})
315             handler.addQuickElement(u"name", item['author_name'])
316             if item['author_email'] is not None:
317                 handler.addQuickElement(u"email", item['author_email'])
318             if item['author_link'] is not None:
319                 handler.addQuickElement(u"uri", item['author_link'])
320             handler.endElement(u"author")
321
322         # Unique ID.
323         if item['unique_id'] is not None:
324             unique_id = item['unique_id']
325         else:
326             unique_id = get_tag_uri(item['link'], item['pubdate'])
327         handler.addQuickElement(u"id", unique_id)
328
329         # Summary.
330         if item['description'] is not None:
331             handler.addQuickElement(u"summary", item['description'], {u"type": u"html"})
332
333         # Enclosure.
334         if item['enclosure'] is not None:
335             handler.addQuickElement(u"link", '',
336                 {u"rel": u"enclosure",
337                  u"href": item['enclosure'].url,
338                  u"length": item['enclosure'].length,
339                  u"type": item['enclosure'].mime_type})
340
341         # Categories.
342         for cat in item['categories']:
343             handler.addQuickElement(u"category", u"", {u"term": cat})
344
345         # Rights.
346         if item['item_copyright'] is not None:
347             handler.addQuickElement(u"rights", item['item_copyright'])
348
349 # This isolates the decision of what the system default is, so calling code can
350 # do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed".
351 DefaultFeed = Rss201rev2Feed
Note: See TracBrowser for help on using the browser.