| 1 |
""" |
|---|
| 2 |
Syndication feed generation library -- used for generating RSS, etc. |
|---|
| 3 |
|
|---|
| 4 |
Sample usage: |
|---|
| 5 |
|
|---|
| 6 |
>>> from django.utils import feedgenerator |
|---|
| 7 |
>>> feed = feedgenerator.Rss201rev2Feed( |
|---|
| 8 |
... title=u"Poynter E-Media Tidbits", |
|---|
| 9 |
... link=u"http://www.poynter.org/column.asp?id=31", |
|---|
| 10 |
... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.", |
|---|
| 11 |
... language=u"en", |
|---|
| 12 |
... ) |
|---|
| 13 |
>>> feed.add_item(title="Hello", link=u"http://www.holovaty.com/test/", description="Testing.") |
|---|
| 14 |
>>> fp = open('test.rss', 'w') |
|---|
| 15 |
>>> feed.write(fp, 'utf-8') |
|---|
| 16 |
>>> fp.close() |
|---|
| 17 |
|
|---|
| 18 |
For definitions of the different versions of RSS, see: |
|---|
| 19 |
http://diveintomark.org/archives/2004/02/04/incompatible-rss |
|---|
| 20 |
""" |
|---|
| 21 |
|
|---|
| 22 |
import re |
|---|
| 23 |
import datetime |
|---|
| 24 |
from django.utils.xmlutils import SimplerXMLGenerator |
|---|
| 25 |
from django.utils.encoding import force_unicode, iri_to_uri |
|---|
| 26 |
|
|---|
| 27 |
def rfc2822_date(date): |
|---|
| 28 |
# We do this ourselves to be timezone aware, email.Utils is not tz aware. |
|---|
| 29 |
if date.tzinfo: |
|---|
| 30 |
time_str = date.strftime('%a, %d %b %Y %H:%M:%S ') |
|---|
| 31 |
offset = date.tzinfo.utcoffset(date) |
|---|
| 32 |
timezone = (offset.days * 24 * 60) + (offset.seconds / 60) |
|---|
| 33 |
hour, minute = divmod(timezone, 60) |
|---|
| 34 |
return time_str + "%+03d%02d" % (hour, minute) |
|---|
| 35 |
else: |
|---|
| 36 |
return date.strftime('%a, %d %b %Y %H:%M:%S -0000') |
|---|
| 37 |
|
|---|
| 38 |
def rfc3339_date(date): |
|---|
| 39 |
if date.tzinfo: |
|---|
| 40 |
time_str = date.strftime('%Y-%m-%dT%H:%M:%S') |
|---|
| 41 |
offset = date.tzinfo.utcoffset(date) |
|---|
| 42 |
timezone = (offset.days * 24 * 60) + (offset.seconds / 60) |
|---|
| 43 |
hour, minute = divmod(timezone, 60) |
|---|
| 44 |
return time_str + "%+03d:%02d" % (hour, minute) |
|---|
| 45 |
else: |
|---|
| 46 |
return date.strftime('%Y-%m-%dT%H:%M:%SZ') |
|---|
| 47 |
|
|---|
| 48 |
def get_tag_uri(url, date): |
|---|
| 49 |
"Creates a TagURI. See http://diveintomark.org/archives/2004/05/28/howto-atom-id" |
|---|
| 50 |
tag = re.sub('^http://', '', url) |
|---|
| 51 |
if date is not None: |
|---|
| 52 |
tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1) |
|---|
| 53 |
tag = re.sub('#', '/', tag) |
|---|
| 54 |
return u'tag:' + tag |
|---|
| 55 |
|
|---|
| 56 |
class SyndicationFeed(object): |
|---|
| 57 |
"Base class for all syndication feeds. Subclasses should provide write()" |
|---|
| 58 |
def __init__(self, title, link, description, language=None, author_email=None, |
|---|
| 59 |
author_name=None, author_link=None, subtitle=None, categories=None, |
|---|
| 60 |
feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, **kwargs): |
|---|
| 61 |
to_unicode = lambda s: force_unicode(s, strings_only=True) |
|---|
| 62 |
if categories: |
|---|
| 63 |
categories = [force_unicode(c) for c in categories] |
|---|
| 64 |
self.feed = { |
|---|
| 65 |
'title': to_unicode(title), |
|---|
| 66 |
'link': iri_to_uri(link), |
|---|
| 67 |
'description': to_unicode(description), |
|---|
| 68 |
'language': to_unicode(language), |
|---|
| 69 |
'author_email': to_unicode(author_email), |
|---|
| 70 |
'author_name': to_unicode(author_name), |
|---|
| 71 |
'author_link': iri_to_uri(author_link), |
|---|
| 72 |
'subtitle': to_unicode(subtitle), |
|---|
| 73 |
'categories': categories or (), |
|---|
| 74 |
'feed_url': iri_to_uri(feed_url), |
|---|
| 75 |
'feed_copyright': to_unicode(feed_copyright), |
|---|
| 76 |
'id': feed_guid or link, |
|---|
| 77 |
'ttl': ttl, |
|---|
| 78 |
} |
|---|
| 79 |
self.feed.update(kwargs) |
|---|
| 80 |
self.items = [] |
|---|
| 81 |
|
|---|
| 82 |
def add_item(self, title, link, description, author_email=None, |
|---|
| 83 |
author_name=None, author_link=None, pubdate=None, comments=None, |
|---|
| 84 |
unique_id=None, enclosure=None, categories=(), item_copyright=None, |
|---|
| 85 |
ttl=None, **kwargs): |
|---|
| 86 |
""" |
|---|
| 87 |
Adds an item to the feed. All args are expected to be Python Unicode |
|---|
| 88 |
objects except pubdate, which is a datetime.datetime object, and |
|---|
| 89 |
enclosure, which is an instance of the Enclosure class. |
|---|
| 90 |
""" |
|---|
| 91 |
to_unicode = lambda s: force_unicode(s, strings_only=True) |
|---|
| 92 |
if categories: |
|---|
| 93 |
categories = [to_unicode(c) for c in categories] |
|---|
| 94 |
item = { |
|---|
| 95 |
'title': to_unicode(title), |
|---|
| 96 |
'link': iri_to_uri(link), |
|---|
| 97 |
'description': to_unicode(description), |
|---|
| 98 |
'author_email': to_unicode(author_email), |
|---|
| 99 |
'author_name': to_unicode(author_name), |
|---|
| 100 |
'author_link': iri_to_uri(author_link), |
|---|
| 101 |
'pubdate': pubdate, |
|---|
| 102 |
'comments': to_unicode(comments), |
|---|
| 103 |
'unique_id': to_unicode(unique_id), |
|---|
| 104 |
'enclosure': enclosure, |
|---|
| 105 |
'categories': categories or (), |
|---|
| 106 |
'item_copyright': to_unicode(item_copyright), |
|---|
| 107 |
'ttl': ttl, |
|---|
| 108 |
} |
|---|
| 109 |
item.update(kwargs) |
|---|
| 110 |
self.items.append(item) |
|---|
| 111 |
|
|---|
| 112 |
def num_items(self): |
|---|
| 113 |
return len(self.items) |
|---|
| 114 |
|
|---|
| 115 |
def root_attributes(self): |
|---|
| 116 |
""" |
|---|
| 117 |
Return extra attributes to place on the root (i.e. feed/channel) element. |
|---|
| 118 |
Called from write(). |
|---|
| 119 |
""" |
|---|
| 120 |
return {} |
|---|
| 121 |
|
|---|
| 122 |
def add_root_elements(self, handler): |
|---|
| 123 |
""" |
|---|
| 124 |
Add elements in the root (i.e. feed/channel) element. Called |
|---|
| 125 |
from write(). |
|---|
| 126 |
""" |
|---|
| 127 |
pass |
|---|
| 128 |
|
|---|
| 129 |
def item_attributes(self, item): |
|---|
| 130 |
""" |
|---|
| 131 |
Return extra attributes to place on each item (i.e. item/entry) element. |
|---|
| 132 |
""" |
|---|
| 133 |
return {} |
|---|
| 134 |
|
|---|
| 135 |
def add_item_elements(self, handler, item): |
|---|
| 136 |
""" |
|---|
| 137 |
Add elements on each item (i.e. item/entry) element. |
|---|
| 138 |
""" |
|---|
| 139 |
pass |
|---|
| 140 |
|
|---|
| 141 |
def write(self, outfile, encoding): |
|---|
| 142 |
""" |
|---|
| 143 |
Outputs the feed in the given encoding to outfile, which is a file-like |
|---|
| 144 |
object. Subclasses should override this. |
|---|
| 145 |
""" |
|---|
| 146 |
raise NotImplementedError |
|---|
| 147 |
|
|---|
| 148 |
def writeString(self, encoding): |
|---|
| 149 |
""" |
|---|
| 150 |
Returns the feed in the given encoding as a string. |
|---|
| 151 |
""" |
|---|
| 152 |
from StringIO import StringIO |
|---|
| 153 |
s = StringIO() |
|---|
| 154 |
self.write(s, encoding) |
|---|
| 155 |
return s.getvalue() |
|---|
| 156 |
|
|---|
| 157 |
def latest_post_date(self): |
|---|
| 158 |
""" |
|---|
| 159 |
Returns the latest item's pubdate. If none of them have a pubdate, |
|---|
| 160 |
this returns the current date/time. |
|---|
| 161 |
""" |
|---|
| 162 |
updates = [i['pubdate'] for i in self.items if i['pubdate'] is not None] |
|---|
| 163 |
if len(updates) > 0: |
|---|
| 164 |
updates.sort() |
|---|
| 165 |
return updates[-1] |
|---|
| 166 |
else: |
|---|
| 167 |
return datetime.datetime.now() |
|---|
| 168 |
|
|---|
| 169 |
class Enclosure(object): |
|---|
| 170 |
"Represents an RSS enclosure" |
|---|
| 171 |
def __init__(self, url, length, mime_type): |
|---|
| 172 |
"All args are expected to be Python Unicode objects" |
|---|
| 173 |
self.length, self.mime_type = length, mime_type |
|---|
| 174 |
self.url = iri_to_uri(url) |
|---|
| 175 |
|
|---|
| 176 |
class RssFeed(SyndicationFeed): |
|---|
| 177 |
mime_type = 'application/rss+xml' |
|---|
| 178 |
def write(self, outfile, encoding): |
|---|
| 179 |
handler = SimplerXMLGenerator(outfile, encoding) |
|---|
| 180 |
handler.startDocument() |
|---|
| 181 |
handler.startElement(u"rss", self.rss_attributes()) |
|---|
| 182 |
handler.startElement(u"channel", self.root_attributes()) |
|---|
| 183 |
self.add_root_elements(handler) |
|---|
| 184 |
self.write_items(handler) |
|---|
| 185 |
self.endChannelElement(handler) |
|---|
| 186 |
handler.endElement(u"rss") |
|---|
| 187 |
|
|---|
| 188 |
def rss_attributes(self): |
|---|
| 189 |
return {u"version": self._version} |
|---|
| 190 |
|
|---|
| 191 |
def write_items(self, handler): |
|---|
| 192 |
for item in self.items: |
|---|
| 193 |
handler.startElement(u'item', self.item_attributes(item)) |
|---|
| 194 |
self.add_item_elements(handler, item) |
|---|
| 195 |
handler.endElement(u"item") |
|---|
| 196 |
|
|---|
| 197 |
def add_root_elements(self, handler): |
|---|
| 198 |
handler.addQuickElement(u"title", self.feed['title']) |
|---|
| 199 |
handler.addQuickElement(u"link", self.feed['link']) |
|---|
| 200 |
handler.addQuickElement(u"description", self.feed['description']) |
|---|
| 201 |
if self.feed['language'] is not None: |
|---|
| 202 |
handler.addQuickElement(u"language", self.feed['language']) |
|---|
| 203 |
for cat in self.feed['categories']: |
|---|
| 204 |
handler.addQuickElement(u"category", cat) |
|---|
| 205 |
if self.feed['feed_copyright'] is not None: |
|---|
| 206 |
handler.addQuickElement(u"copyright", self.feed['feed_copyright']) |
|---|
| 207 |
handler.addQuickElement(u"lastBuildDate", rfc2822_date(self.latest_post_date()).decode('utf-8')) |
|---|
| 208 |
if self.feed['ttl'] is not None: |
|---|
| 209 |
handler.addQuickElement(u"ttl", self.feed['ttl']) |
|---|
| 210 |
|
|---|
| 211 |
def endChannelElement(self, handler): |
|---|
| 212 |
handler.endElement(u"channel") |
|---|
| 213 |
|
|---|
| 214 |
class RssUserland091Feed(RssFeed): |
|---|
| 215 |
_version = u"0.91" |
|---|
| 216 |
def add_item_elements(self, handler, item): |
|---|
| 217 |
handler.addQuickElement(u"title", item['title']) |
|---|
| 218 |
handler.addQuickElement(u"link", item['link']) |
|---|
| 219 |
if item['description'] is not None: |
|---|
| 220 |
handler.addQuickElement(u"description", item['description']) |
|---|
| 221 |
|
|---|
| 222 |
class Rss201rev2Feed(RssFeed): |
|---|
| 223 |
# Spec: http://blogs.law.harvard.edu/tech/rss |
|---|
| 224 |
_version = u"2.0" |
|---|
| 225 |
def add_item_elements(self, handler, item): |
|---|
| 226 |
handler.addQuickElement(u"title", item['title']) |
|---|
| 227 |
handler.addQuickElement(u"link", item['link']) |
|---|
| 228 |
if item['description'] is not None: |
|---|
| 229 |
handler.addQuickElement(u"description", item['description']) |
|---|
| 230 |
|
|---|
| 231 |
# Author information. |
|---|
| 232 |
if item["author_name"] and item["author_email"]: |
|---|
| 233 |
handler.addQuickElement(u"author", "%s (%s)" % \ |
|---|
| 234 |
(item['author_email'], item['author_name'])) |
|---|
| 235 |
elif item["author_email"]: |
|---|
| 236 |
handler.addQuickElement(u"author", item["author_email"]) |
|---|
| 237 |
elif item["author_name"]: |
|---|
| 238 |
handler.addQuickElement(u"dc:creator", item["author_name"], {"xmlns:dc": u"http://purl.org/dc/elements/1.1/"}) |
|---|
| 239 |
|
|---|
| 240 |
if item['pubdate'] is not None: |
|---|
| 241 |
handler.addQuickElement(u"pubDate", rfc2822_date(item['pubdate']).decode('utf-8')) |
|---|
| 242 |
if item['comments'] is not None: |
|---|
| 243 |
handler.addQuickElement(u"comments", item['comments']) |
|---|
| 244 |
if item['unique_id'] is not None: |
|---|
| 245 |
handler.addQuickElement(u"guid", item['unique_id']) |
|---|
| 246 |
if item['ttl'] is not None: |
|---|
| 247 |
handler.addQuickElement(u"ttl", item['ttl']) |
|---|
| 248 |
|
|---|
| 249 |
# Enclosure. |
|---|
| 250 |
if item['enclosure'] is not None: |
|---|
| 251 |
handler.addQuickElement(u"enclosure", '', |
|---|
| 252 |
{u"url": item['enclosure'].url, u"length": item['enclosure'].length, |
|---|
| 253 |
u"type": item['enclosure'].mime_type}) |
|---|
| 254 |
|
|---|
| 255 |
# Categories. |
|---|
| 256 |
for cat in item['categories']: |
|---|
| 257 |
handler.addQuickElement(u"category", cat) |
|---|
| 258 |
|
|---|
| 259 |
class Atom1Feed(SyndicationFeed): |
|---|
| 260 |
# Spec: http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html |
|---|
| 261 |
mime_type = 'application/atom+xml' |
|---|
| 262 |
ns = u"http://www.w3.org/2005/Atom" |
|---|
| 263 |
|
|---|
| 264 |
def write(self, outfile, encoding): |
|---|
| 265 |
handler = SimplerXMLGenerator(outfile, encoding) |
|---|
| 266 |
handler.startDocument() |
|---|
| 267 |
handler.startElement(u'feed', self.root_attributes()) |
|---|
| 268 |
self.add_root_elements(handler) |
|---|
| 269 |
self.write_items(handler) |
|---|
| 270 |
handler.endElement(u"feed") |
|---|
| 271 |
|
|---|
| 272 |
def root_attributes(self): |
|---|
| 273 |
if self.feed['language'] is not None: |
|---|
| 274 |
return {u"xmlns": self.ns, u"xml:lang": self.feed['language']} |
|---|
| 275 |
else: |
|---|
| 276 |
return {u"xmlns": self.ns} |
|---|
| 277 |
|
|---|
| 278 |
def add_root_elements(self, handler): |
|---|
| 279 |
handler.addQuickElement(u"title", self.feed['title']) |
|---|
| 280 |
handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']}) |
|---|
| 281 |
if self.feed['feed_url'] is not None: |
|---|
| 282 |
handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']}) |
|---|
| 283 |
handler.addQuickElement(u"id", self.feed['id']) |
|---|
| 284 |
handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('utf-8')) |
|---|
| 285 |
if self.feed['author_name'] is not None: |
|---|
| 286 |
handler.startElement(u"author", {}) |
|---|
| 287 |
handler.addQuickElement(u"name", self.feed['author_name']) |
|---|
| 288 |
if self.feed['author_email'] is not None: |
|---|
| 289 |
handler.addQuickElement(u"email", self.feed['author_email']) |
|---|
| 290 |
if self.feed['author_link'] is not None: |
|---|
| 291 |
handler.addQuickElement(u"uri", self.feed['author_link']) |
|---|
| 292 |
handler.endElement(u"author") |
|---|
| 293 |
if self.feed['subtitle'] is not None: |
|---|
| 294 |
handler.addQuickElement(u"subtitle", self.feed['subtitle']) |
|---|
| 295 |
for cat in self.feed['categories']: |
|---|
| 296 |
handler.addQuickElement(u"category", "", {u"term": cat}) |
|---|
| 297 |
if self.feed['feed_copyright'] is not None: |
|---|
| 298 |
handler.addQuickElement(u"rights", self.feed['feed_copyright']) |
|---|
| 299 |
|
|---|
| 300 |
def write_items(self, handler): |
|---|
| 301 |
for item in self.items: |
|---|
| 302 |
handler.startElement(u"entry", self.item_attributes(item)) |
|---|
| 303 |
self.add_item_elements(handler, item) |
|---|
| 304 |
handler.endElement(u"entry") |
|---|
| 305 |
|
|---|
| 306 |
def add_item_elements(self, handler, item): |
|---|
| 307 |
handler.addQuickElement(u"title", item['title']) |
|---|
| 308 |
handler.addQuickElement(u"link", u"", {u"href": item['link'], u"rel": u"alternate"}) |
|---|
| 309 |
if item['pubdate'] is not None: |
|---|
| 310 |
handler.addQuickElement(u"updated", rfc3339_date(item['pubdate']).decode('utf-8')) |
|---|
| 311 |
|
|---|
| 312 |
# Author information. |
|---|
| 313 |
if item['author_name'] is not None: |
|---|
| 314 |
handler.startElement(u"author", {}) |
|---|
| 315 |
handler.addQuickElement(u"name", item['author_name']) |
|---|
| 316 |
if item['author_email'] is not None: |
|---|
| 317 |
handler.addQuickElement(u"email", item['author_email']) |
|---|
| 318 |
if item['author_link'] is not None: |
|---|
| 319 |
handler.addQuickElement(u"uri", item['author_link']) |
|---|
| 320 |
handler.endElement(u"author") |
|---|
| 321 |
|
|---|
| 322 |
# Unique ID. |
|---|
| 323 |
if item['unique_id'] is not None: |
|---|
| 324 |
unique_id = item['unique_id'] |
|---|
| 325 |
else: |
|---|
| 326 |
unique_id = get_tag_uri(item['link'], item['pubdate']) |
|---|
| 327 |
handler.addQuickElement(u"id", unique_id) |
|---|
| 328 |
|
|---|
| 329 |
# Summary. |
|---|
| 330 |
if item['description'] is not None: |
|---|
| 331 |
handler.addQuickElement(u"summary", item['description'], {u"type": u"html"}) |
|---|
| 332 |
|
|---|
| 333 |
# Enclosure. |
|---|
| 334 |
if item['enclosure'] is not None: |
|---|
| 335 |
handler.addQuickElement(u"link", '', |
|---|
| 336 |
{u"rel": u"enclosure", |
|---|
| 337 |
u"href": item['enclosure'].url, |
|---|
| 338 |
u"length": item['enclosure'].length, |
|---|
| 339 |
u"type": item['enclosure'].mime_type}) |
|---|
| 340 |
|
|---|
| 341 |
# Categories. |
|---|
| 342 |
for cat in item['categories']: |
|---|
| 343 |
handler.addQuickElement(u"category", u"", {u"term": cat}) |
|---|
| 344 |
|
|---|
| 345 |
# Rights. |
|---|
| 346 |
if item['item_copyright'] is not None: |
|---|
| 347 |
handler.addQuickElement(u"rights", item['item_copyright']) |
|---|
| 348 |
|
|---|
| 349 |
# This isolates the decision of what the system default is, so calling code can |
|---|
| 350 |
# do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed". |
|---|
| 351 |
DefaultFeed = Rss201rev2Feed |
|---|