Ticket #498: feedgenerator.py

File feedgenerator.py, 10.7 KB (added by alastair@…, 18 years ago)
Line 
1"""
2Syndication feed generation library -- for RSS 2.0 and Atom 1.0
3
4Requires:
5
6* ElementTree <http://effbot.org/zone/element-index.htm>
7
8Changes from original django feedgenerator:
9* Added author_name, author_email, author_link, categories and subtitle
10 parameters to SyndicationFeed
11* Renamed writeString to write_string.
12* Uses ElementTree rather than SimpleXMLWriter.
13* DefaultRssFeed is renamed to DefaultFeed.
14* DefaultFeed is set to Atom10Feed.
15
16Notes:
17
18* All string input values need to be unicode objects.
19* Date objects need to be Python datetime objects.
20* Only RSS 2.01 and Atom 1.0 is implemented.
21* DefaultFeed is Atom10Feed
22
23* For RSS/Atom feeds, feed publication date will be set as the most recent
24 item date.
25* For Atom feeds, we only populate the <summary> fields for each item.
26* For Atom feeds, if not unique_id is supplied, a Tag URI is generated:
27 <http://diveintomark.org/archives/2004/05/28/howto-atom-id>
28* For Atom feeds, description field will be parsed as XHTML and embedded inline.
29 If it fails to validate as XML/XHTML, it will be escaped and embedded as HTML.
30
31Example:
32
33>>> feed = feedgenerator.Atom10Feed(
34... title=u"Another Boring Blog",
35... link=u"http://anotherboringblog.com/",
36... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.",
37... feed_url = u"http://anotherboringblog.com/atom.xml",
38... author_name = u"A. Blogger",
39... author_email = u"a.blogger@anotherboringblog.com",
40... categories = [u"blog", u"boring"]
41... )
42>>> feed.add_item(
43... title=u"What I had for breakfast",
44... link=u"http://anotherboringblog.com/what-i-had-for-breakfast/",
45... description=u"Eggs, Bacon and Potatoes.",
46... pubdate = datetime.now(),
47... categories = [u"food"]
48.. )
49>>> print feed.writeString('utf-8')
50
51"""
52
53from elementtree.ElementTree import Element, SubElement, ElementTree, fromstring, tostring
54from xml.parsers.expat import ExpatError
55import re
56import email
57import time
58from datetime import datetime
59
60ATOM_NS = 'http://www.w3.org/2005/Atom'
61XHTML_NS = 'http://www.w3.org/1999/xhtml'
62
63class Enclosure:
64 "Represents an RSS enclosure"
65 def __init__(self, url, length, mime_type):
66 "All args are expected to be Python Unicode objects"
67 self.url, self.length, self.mime_type = url, length, mime_type
68
69class SyndicationFeed:
70 "Base class for syndication feeds. Subclasses should provide write_string()"
71 def __init__(self, title, link, description, feed_url,
72 author_name = None, author_email = None, author_link = None,
73 categories = [], subtitle = None, language = None):
74 self.feed = {'title': title,
75 'link': link,
76 'feed_url': feed_url,
77 'description': description,
78 'author_name': author_name,
79 'author_email': author_email,
80 'author_link': author_link,
81 'categories': categories,
82 'subtitle': subtitle,
83 'language': language
84 }
85 self.items = []
86
87 def add_item(self, title, link, description,
88 author_name = None, author_email = None, pubdate = None,
89 comments = None, unique_id = None, enclosure = None,
90 categories = []):
91 """
92 Adds an item to the feed. All args are expected to be Python Unicode
93 objects except:
94
95 pubdate - DateTime object.
96 enclosure - Enclosure object.
97 categories - List of Unicode objects.
98 """
99 self.items.append({
100 'title': title,
101 'link': link,
102 'description': description,
103 'author_email': author_email,
104 'author_name': author_name,
105 'pubdate': pubdate,
106 'comments': comments,
107 'unique_id': unique_id,
108 'enclosure': enclosure,
109 'categories': categories,
110 })
111
112 def count(self):
113 return len(self.items)
114
115 def write(self, f, encoding):
116 """ Writes output to a file object with specified encoding """
117 f.write(self.write_string(encoding))
118
119 def latest_post_date(self):
120 """ Get the latest item date otherwise just return the current date """
121 updates = [i['pubdate'] for i in self.items if (i['pubdate'] is not None)]
122 if len(updates) > 0:
123 updates.sort()
124 return updates[-1]
125 else:
126 return datetime.now()
127
128
129class Rss201Feed(SyndicationFeed):
130 " RSS 2.01 rev 2 feed <http://blogs.law.harvard.edu/tech/rss>"
131
132 def rfc2822_date(self, date):
133 return email.Utils.formatdate(time.mktime(date.timetuple()))
134
135 def write_string(self, encoding):
136 tree = Element('rss', {'version':'2.0'})
137 channel = self.write_channel_header(tree)
138 self.write_items(channel)
139 return tostring(tree, encoding = encoding)
140
141 def write_channel_header(self, tree):
142 channel = SubElement(tree, 'channel')
143 SubElement(channel, 'title').text = self.feed['title']
144 SubElement(channel, 'link').text = self.feed['link']
145 SubElement(channel, 'description').text = self.feed['description']
146
147 if self.feed['language'] is not None:
148 SubElement(channel, 'language').text = self.feed['language']
149
150 if self.feed['categories'] is not []:
151 for cat in self.feed['categories']:
152 SubElement(channel, 'category').text = cat
153
154 SubElement(channel, 'pubDate').text = self.rfc2822_date(self.latest_post_date())
155
156 return channel
157
158 def write_items(self, channel):
159 for item in self.items:
160 entry = SubElement(channel, 'item')
161 SubElement(entry, 'title').text = item['title']
162 SubElement(entry, 'link').text = item['link']
163 if item['description'] is not None:
164 SubElement(entry, 'description').text = item['description']
165 if item['author_name'] is not None and item['author_email'] is not None:
166 SubElement(entry, 'author').text = u'%s (%s)' % (item['author_email'], item['author_name'])
167 if item['pubdate'] is not None:
168 SubElement(entry, 'pubDate').text = self.rfc2822_date(item['pubdate'])
169 if item['unique_id'] is not None:
170 SubElement(entry, 'guid').text = item['unique_id']
171 else:
172 SubElement(entry, 'guid').text = item['link']
173 if item['enclosure'] is not None:
174 SubElement(entry, 'enclosure', {'url': item['enclosure'].url,
175 'length': item['enclosure'].length,
176 'type': item['enclosure'].mime_type})
177 for cat in item['categories']:
178 SubElement(entry, 'category').text = cat
179
180
181class Atom10Feed(SyndicationFeed):
182 """ An Atom 1.0 Feed
183 <http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html>
184 """
185
186 def rfc3339_date(self, date):
187 return date.strftime('%Y-%m-%dT%H:%M:%SZ')
188
189 def tag_uri(self, url, date):
190 """ Creates a TagURI
191 <http://diveintomark.org/archives/2004/05/28/howto-atom-id>
192 """
193 tag = re.sub('^http://', '', url)
194 tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1)
195 tag = re.sub('#', '/', tag)
196 return 'tag:' + tag
197
198 def write_string(self, encoding):
199 tree = Element('feed', {'xmlns':ATOM_NS})
200 self.write_feed_header(tree)
201 self.write_items(tree)
202 return tostring(tree, encoding = encoding)
203
204 def write_feed_header(self, tree):
205 SubElement(tree, 'title').text = self.feed['title']
206 SubElement(tree, 'link', {'rel':'alternate', 'href': self.feed['link']})
207 SubElement(tree, 'link', {'rel':'self', 'href':self.feed['feed_url']})
208 SubElement(tree, 'id').text = self.feed['link']
209
210 if self.feed['author_name'] is not None:
211 author = SubElement(tree, 'author')
212 SubElement(author, 'name').text = self.feed['author_name']
213 if self.feed['author_email'] is not None:
214 SubElement(author, 'email').text = self.feed['author_email']
215 if self.feed['author_link'] is not None:
216 SubElement(author, 'email').text = self.feed['author_link']
217
218 if self.feed['subtitle'] is not None:
219 SubElement(tree, 'subtitle').text = self.feed['subtitle']
220
221
222 SubElement(tree, 'updated').text = self.rfc3339_date(self.latest_post_date())
223
224 for cat in self.feed['categories']:
225 SubElement(tree, 'category', {'term':cat})
226
227 def write_items(self, tree):
228 for item in self.items:
229 entry = SubElement(tree, 'entry')
230 SubElement(entry, 'title').text = item['title']
231 SubElement(entry, 'link', {'rel':'alternate', 'href': item['link']})
232 SubElement(entry, 'updated').text = self.rfc3339_date(item['pubdate'])
233
234 if item['unique_id'] is not None:
235 SubElement(entry, 'id').text = item['unique_id']
236 else:
237 tag_uri = self.tag_uri(item['link'], item['pubdate'])
238 SubElement(entry, 'id').text = tag_uri
239
240 # here we do something cunning, try parsing fragment as xml
241 # and if it works, we just put it in, otherwise, just
242 # use type="html" and escape the contents
243 try:
244 fragment = fromstring('<div>%s</div>' % item['description'].encode('utf-8'))
245 fragment.attrib['xmlns'] = XHTML_NS
246 SubElement(entry, 'summary', {'type':'xhtml'}).append(fragment)
247 except ExpatError:
248 SubElement(entry, 'summary', {'type':'html'}).text = item['description']
249
250 if item['enclosure'] is not None:
251 SubElement(entry, 'link',
252 {'rel':'enclosure',
253 'href':item['enclosure'].url,
254 'length':item['enclosure'].length,
255 'type':item['enclosure'].mime_type})
256
257 for cat in item['categories']:
258 SubElement(entry, 'category', {'term':cat})
259
260 if item['author_name'] is not None:
261 author = SubElement(entry, 'author')
262 SubElement(author, 'name').text = item['author_name']
263 if item['author_email'] is not None:
264 SubElement(author, 'email').text = item['author_email']
265
266DefaultFeed = Atom10Feed
267
268
Back to Top