| 1 |
""" |
|---|
| 2 |
Syndication feed generation library -- used for generating RSS, etc. |
|---|
| 3 |
|
|---|
| 4 |
Sample usage: |
|---|
| 5 |
|
|---|
| 6 |
>>> from django.utils import feedgenerator |
|---|
| 7 |
>>> feed = feedgenerator.Rss201rev2Feed( |
|---|
| 8 |
... title=u"Poynter E-Media Tidbits", |
|---|
| 9 |
... link=u"http://www.poynter.org/column.asp?id=31", |
|---|
| 10 |
... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.", |
|---|
| 11 |
... language=u"en", |
|---|
| 12 |
... ) |
|---|
| 13 |
>>> feed.add_item(title="Hello", link=u"http://www.holovaty.com/test/", description="Testing.") |
|---|
| 14 |
>>> fp = open('test.rss', 'w') |
|---|
| 15 |
>>> feed.write(fp, 'utf-8') |
|---|
| 16 |
>>> fp.close() |
|---|
| 17 |
|
|---|
| 18 |
For definitions of the different versions of RSS, see: |
|---|
| 19 |
http://diveintomark.org/archives/2004/02/04/incompatible-rss |
|---|
| 20 |
""" |
|---|
| 21 |
|
|---|
| 22 |
from django.utils.xmlutils import SimplerXMLGenerator |
|---|
| 23 |
from django.utils.encoding import force_unicode, iri_to_uri |
|---|
| 24 |
import datetime, re, time |
|---|
| 25 |
import email.Utils |
|---|
| 26 |
|
|---|
| 27 |
def rfc2822_date(date): |
|---|
| 28 |
return email.Utils.formatdate(time.mktime(date.timetuple())) |
|---|
| 29 |
|
|---|
| 30 |
def rfc3339_date(date): |
|---|
| 31 |
if date.tzinfo: |
|---|
| 32 |
return date.strftime('%Y-%m-%dT%H:%M:%S%z') |
|---|
| 33 |
else: |
|---|
| 34 |
return date.strftime('%Y-%m-%dT%H:%M:%SZ') |
|---|
| 35 |
|
|---|
| 36 |
def get_tag_uri(url, date): |
|---|
| 37 |
"Creates a TagURI. See http://diveintomark.org/archives/2004/05/28/howto-atom-id" |
|---|
| 38 |
tag = re.sub('^http://', '', url) |
|---|
| 39 |
if date is not None: |
|---|
| 40 |
tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1) |
|---|
| 41 |
tag = re.sub('#', '/', tag) |
|---|
| 42 |
return u'tag:' + tag |
|---|
| 43 |
|
|---|
| 44 |
class SyndicationFeed(object): |
|---|
| 45 |
"Base class for all syndication feeds. Subclasses should provide write()" |
|---|
| 46 |
def __init__(self, title, link, description, language=None, author_email=None, |
|---|
| 47 |
author_name=None, author_link=None, subtitle=None, categories=None, |
|---|
| 48 |
feed_url=None, feed_copyright=None, feed_guid=None, ttl=None): |
|---|
| 49 |
to_unicode = lambda s: force_unicode(s, strings_only=True) |
|---|
| 50 |
if categories: |
|---|
| 51 |
categories = [force_unicode(c) for c in categories] |
|---|
| 52 |
self.feed = { |
|---|
| 53 |
'title': to_unicode(title), |
|---|
| 54 |
'link': iri_to_uri(link), |
|---|
| 55 |
'description': to_unicode(description), |
|---|
| 56 |
'language': to_unicode(language), |
|---|
| 57 |
'author_email': to_unicode(author_email), |
|---|
| 58 |
'author_name': to_unicode(author_name), |
|---|
| 59 |
'author_link': iri_to_uri(author_link), |
|---|
| 60 |
'subtitle': to_unicode(subtitle), |
|---|
| 61 |
'categories': categories or (), |
|---|
| 62 |
'feed_url': iri_to_uri(feed_url), |
|---|
| 63 |
'feed_copyright': to_unicode(feed_copyright), |
|---|
| 64 |
'id': feed_guid or link, |
|---|
| 65 |
'ttl': ttl, |
|---|
| 66 |
} |
|---|
| 67 |
self.items = [] |
|---|
| 68 |
|
|---|
| 69 |
def add_item(self, title, link, description, author_email=None, |
|---|
| 70 |
author_name=None, author_link=None, pubdate=None, comments=None, |
|---|
| 71 |
unique_id=None, enclosure=None, categories=(), item_copyright=None, ttl=None): |
|---|
| 72 |
""" |
|---|
| 73 |
Adds an item to the feed. All args are expected to be Python Unicode |
|---|
| 74 |
objects except pubdate, which is a datetime.datetime object, and |
|---|
| 75 |
enclosure, which is an instance of the Enclosure class. |
|---|
| 76 |
""" |
|---|
| 77 |
to_unicode = lambda s: force_unicode(s, strings_only=True) |
|---|
| 78 |
if categories: |
|---|
| 79 |
categories = [to_unicode(c) for c in categories] |
|---|
| 80 |
self.items.append({ |
|---|
| 81 |
'title': to_unicode(title), |
|---|
| 82 |
'link': iri_to_uri(link), |
|---|
| 83 |
'description': to_unicode(description), |
|---|
| 84 |
'author_email': to_unicode(author_email), |
|---|
| 85 |
'author_name': to_unicode(author_name), |
|---|
| 86 |
'author_link': iri_to_uri(author_link), |
|---|
| 87 |
'pubdate': pubdate, |
|---|
| 88 |
'comments': to_unicode(comments), |
|---|
| 89 |
'unique_id': to_unicode(unique_id), |
|---|
| 90 |
'enclosure': enclosure, |
|---|
| 91 |
'categories': categories or (), |
|---|
| 92 |
'item_copyright': to_unicode(item_copyright), |
|---|
| 93 |
'ttl': ttl, |
|---|
| 94 |
}) |
|---|
| 95 |
|
|---|
| 96 |
def num_items(self): |
|---|
| 97 |
return len(self.items) |
|---|
| 98 |
|
|---|
| 99 |
def write(self, outfile, encoding): |
|---|
| 100 |
""" |
|---|
| 101 |
Outputs the feed in the given encoding to outfile, which is a file-like |
|---|
| 102 |
object. Subclasses should override this. |
|---|
| 103 |
""" |
|---|
| 104 |
raise NotImplementedError |
|---|
| 105 |
|
|---|
| 106 |
def writeString(self, encoding): |
|---|
| 107 |
""" |
|---|
| 108 |
Returns the feed in the given encoding as a string. |
|---|
| 109 |
""" |
|---|
| 110 |
from StringIO import StringIO |
|---|
| 111 |
s = StringIO() |
|---|
| 112 |
self.write(s, encoding) |
|---|
| 113 |
return s.getvalue() |
|---|
| 114 |
|
|---|
| 115 |
def latest_post_date(self): |
|---|
| 116 |
""" |
|---|
| 117 |
Returns the latest item's pubdate. If none of them have a pubdate, |
|---|
| 118 |
this returns the current date/time. |
|---|
| 119 |
""" |
|---|
| 120 |
updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None] |
|---|
| 121 |
if len(updates) > 0: |
|---|
| 122 |
updates.sort() |
|---|
| 123 |
return updates[-1] |
|---|
| 124 |
else: |
|---|
| 125 |
return datetime.datetime.now() |
|---|
| 126 |
|
|---|
| 127 |
class Enclosure(object): |
|---|
| 128 |
"Represents an RSS enclosure" |
|---|
| 129 |
def __init__(self, url, length, mime_type): |
|---|
| 130 |
"All args are expected to be Python Unicode objects" |
|---|
| 131 |
self.length, self.mime_type = length, mime_type |
|---|
| 132 |
self.url = iri_to_uri(url) |
|---|
| 133 |
|
|---|
| 134 |
class RssFeed(SyndicationFeed): |
|---|
| 135 |
mime_type = 'application/rss+xml' |
|---|
| 136 |
def write(self, outfile, encoding): |
|---|
| 137 |
handler = SimplerXMLGenerator(outfile, encoding) |
|---|
| 138 |
handler.startDocument() |
|---|
| 139 |
handler.startElement(u"rss", {u"version": self._version}) |
|---|
| 140 |
handler.startElement(u"channel", {}) |
|---|
| 141 |
handler.addQuickElement(u"title", self.feed['title']) |
|---|
| 142 |
handler.addQuickElement(u"link", self.feed['link']) |
|---|
| 143 |
handler.addQuickElement(u"description", self.feed['description']) |
|---|
| 144 |
if self.feed['language'] is not None: |
|---|
| 145 |
handler.addQuickElement(u"language", self.feed['language']) |
|---|
| 146 |
for cat in self.feed['categories']: |
|---|
| 147 |
handler.addQuickElement(u"category", cat) |
|---|
| 148 |
if self.feed['feed_copyright'] is not None: |
|---|
| 149 |
handler.addQuickElement(u"copyright", self.feed['feed_copyright']) |
|---|
| 150 |
handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('ascii')) |
|---|
| 151 |
if self.feed['ttl'] is not None: |
|---|
| 152 |
handler.addQuickElement(u"ttl", self.feed['ttl']) |
|---|
| 153 |
self.write_items(handler) |
|---|
| 154 |
self.endChannelElement(handler) |
|---|
| 155 |
handler.endElement(u"rss") |
|---|
| 156 |
|
|---|
| 157 |
def endChannelElement(self, handler): |
|---|
| 158 |
handler.endElement(u"channel") |
|---|
| 159 |
|
|---|
| 160 |
class RssUserland091Feed(RssFeed): |
|---|
| 161 |
_version = u"0.91" |
|---|
| 162 |
def write_items(self, handler): |
|---|
| 163 |
for item in self.items: |
|---|
| 164 |
handler.startElement(u"item", {}) |
|---|
| 165 |
handler.addQuickElement(u"title", item['title']) |
|---|
| 166 |
handler.addQuickElement(u"link", item['link']) |
|---|
| 167 |
if item['description'] is not None: |
|---|
| 168 |
handler.addQuickElement(u"description", item['description']) |
|---|
| 169 |
handler.endElement(u"item") |
|---|
| 170 |
|
|---|
| 171 |
class Rss201rev2Feed(RssFeed): |
|---|
| 172 |
# Spec: http://blogs.law.harvard.edu/tech/rss |
|---|
| 173 |
_version = u"2.0" |
|---|
| 174 |
def write_items(self, handler): |
|---|
| 175 |
for item in self.items: |
|---|
| 176 |
handler.startElement(u"item", {}) |
|---|
| 177 |
handler.addQuickElement(u"title", item['title']) |
|---|
| 178 |
handler.addQuickElement(u"link", item['link']) |
|---|
| 179 |
if item['description'] is not None: |
|---|
| 180 |
handler.addQuickElement(u"description", item['description']) |
|---|
| 181 |
|
|---|
| 182 |
# Author information. |
|---|
| 183 |
if item["author_name"] and item["author_email"]: |
|---|
| 184 |
handler.addQuickElement(u"author", "%s (%s)" % \ |
|---|
| 185 |
(item['author_email'], item['author_name'])) |
|---|
| 186 |
elif item["author_email"]: |
|---|
| 187 |
handler.addQuickElement(u"author", item["author_email"]) |
|---|
| 188 |
elif item["author_name"]: |
|---|
| 189 |
handler.addQuickElement(u"dc:creator", item["author_name"], {"xmlns:dc": u"http://purl.org/dc/elements/1.1/"}) |
|---|
| 190 |
|
|---|
| 191 |
if item['pubdate'] is not None: |
|---|
| 192 |
handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('ascii')) |
|---|
| 193 |
if item['comments'] is not None: |
|---|
| 194 |
handler.addQuickElement(u"comments", item['comments']) |
|---|
| 195 |
if item['unique_id'] is not None: |
|---|
| 196 |
handler.addQuickElement(u"guid", item['unique_id']) |
|---|
| 197 |
if item['ttl'] is not None: |
|---|
| 198 |
handler.addQuickElement(u"ttl", item['ttl']) |
|---|
| 199 |
|
|---|
| 200 |
# Enclosure. |
|---|
| 201 |
if item['enclosure'] is not None: |
|---|
| 202 |
handler.addQuickElement(u"enclosure", '', |
|---|
| 203 |
{u"url": item['enclosure'].url, u"length": item['enclosure'].length, |
|---|
| 204 |
u"type": item['enclosure'].mime_type}) |
|---|
| 205 |
|
|---|
| 206 |
# Categories. |
|---|
| 207 |
for cat in item['categories']: |
|---|
| 208 |
handler.addQuickElement(u"category", cat) |
|---|
| 209 |
|
|---|
| 210 |
handler.endElement(u"item") |
|---|
| 211 |
|
|---|
| 212 |
class Atom1Feed(SyndicationFeed): |
|---|
| 213 |
# Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html |
|---|
| 214 |
mime_type = 'application/atom+xml' |
|---|
| 215 |
ns = u"http://www.w3.org/2005/Atom" |
|---|
| 216 |
def write(self, outfile, encoding): |
|---|
| 217 |
handler = SimplerXMLGenerator(outfile, encoding) |
|---|
| 218 |
handler.startDocument() |
|---|
| 219 |
if self.feed['language'] is not None: |
|---|
| 220 |
handler.startElement(u"feed", {u"xmlns": self.ns, u"xml:lang": self.feed['language']}) |
|---|
| 221 |
else: |
|---|
| 222 |
handler.startElement(u"feed", {u"xmlns": self.ns}) |
|---|
| 223 |
handler.addQuickElement(u"title", self.feed['title']) |
|---|
| 224 |
handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']}) |
|---|
| 225 |
if self.feed['feed_url'] is not None: |
|---|
| 226 |
handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']}) |
|---|
| 227 |
handler.addQuickElement(u"id", self.feed['id']) |
|---|
| 228 |
handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('ascii')) |
|---|
| 229 |
if self.feed['author_name'] is not None: |
|---|
| 230 |
handler.startElement(u"author", {}) |
|---|
| 231 |
handler.addQuickElement(u"name", self.feed['author_name']) |
|---|
| 232 |
if self.feed['author_email'] is not None: |
|---|
| 233 |
handler.addQuickElement(u"email", self.feed['author_email']) |
|---|
| 234 |
if self.feed['author_link'] is not None: |
|---|
| 235 |
handler.addQuickElement(u"uri", self.feed['author_link']) |
|---|
| 236 |
handler.endElement(u"author") |
|---|
| 237 |
if self.feed['subtitle'] is not None: |
|---|
| 238 |
handler.addQuickElement(u"subtitle", self.feed['subtitle']) |
|---|
| 239 |
for cat in self.feed['categories']: |
|---|
| 240 |
handler.addQuickElement(u"category", "", {u"term": cat}) |
|---|
| 241 |
if self.feed['feed_copyright'] is not None: |
|---|
| 242 |
handler.addQuickElement(u"rights", self.feed['feed_copyright']) |
|---|
| 243 |
self.write_items(handler) |
|---|
| 244 |
handler.endElement(u"feed") |
|---|
| 245 |
|
|---|
| 246 |
def write_items(self, handler): |
|---|
| 247 |
for item in self.items: |
|---|
| 248 |
handler.startElement(u"entry", {}) |
|---|
| 249 |
handler.addQuickElement(u"title", item['title']) |
|---|
| 250 |
handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"}) |
|---|
| 251 |
if item['pubdate'] is not None: |
|---|
| 252 |
handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('ascii')) |
|---|
| 253 |
|
|---|
| 254 |
# Author information. |
|---|
| 255 |
if item['author_name'] is not None: |
|---|
| 256 |
handler.startElement(u"author", {}) |
|---|
| 257 |
handler.addQuickElement(u"name", item['author_name']) |
|---|
| 258 |
if item['author_email'] is not None: |
|---|
| 259 |
handler.addQuickElement(u"email", item['author_email']) |
|---|
| 260 |
if item['author_link'] is not None: |
|---|
| 261 |
handler.addQuickElement(u"uri", item['author_link']) |
|---|
| 262 |
handler.endElement(u"author") |
|---|
| 263 |
|
|---|
| 264 |
# Unique ID. |
|---|
| 265 |
if item['unique_id'] is not None: |
|---|
| 266 |
unique_id = item['unique_id'] |
|---|
| 267 |
else: |
|---|
| 268 |
unique_id = get_tag_uri(item['link'], item['pubdate']) |
|---|
| 269 |
handler.addQuickElement(u"id", unique_id) |
|---|
| 270 |
|
|---|
| 271 |
# Summary. |
|---|
| 272 |
if item['description'] is not None: |
|---|
| 273 |
handler.addQuickElement(u"summary", item['description'], {u"type": u"html"}) |
|---|
| 274 |
|
|---|
| 275 |
# Enclosure. |
|---|
| 276 |
if item['enclosure'] is not None: |
|---|
| 277 |
handler.addQuickElement(u"link", '', |
|---|
| 278 |
{u"rel": u"enclosure", |
|---|
| 279 |
u"href": item['enclosure'].url, |
|---|
| 280 |
u"length": item['enclosure'].length, |
|---|
| 281 |
u"type": item['enclosure'].mime_type}) |
|---|
| 282 |
|
|---|
| 283 |
# Categories. |
|---|
| 284 |
for cat in item['categories']: |
|---|
| 285 |
handler.addQuickElement(u"category", u"", {u"term": cat}) |
|---|
| 286 |
|
|---|
| 287 |
# Rights. |
|---|
| 288 |
if item['item_copyright'] is not None: |
|---|
| 289 |
handler.addQuickElement(u"rights", item['item_copyright']) |
|---|
| 290 |
|
|---|
| 291 |
handler.endElement(u"entry") |
|---|
| 292 |
|
|---|
| 293 |
# This isolates the decision of what the system default is, so calling code can |
|---|
| 294 |
# do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed". |
|---|
| 295 |
DefaultFeed = Rss201rev2Feed |
|---|