Context Navigation

Back to Ticket #498

Ticket #498: feedgenerator.py

File feedgenerator.py, 10.7 KB (added by alastair@…, 19 years ago)

Line
1	"""
2	Syndication feed generation library -- for RSS 2.0 and Atom 1.0
3
4	Requires:
5
6	* ElementTree <http://effbot.org/zone/element-index.htm>
7
8	Changes from original django feedgenerator:
9	* Added author_name, author_email, author_link, categories and subtitle
10	parameters to SyndicationFeed
11	* Renamed writeString to write_string.
12	* Uses ElementTree rather than SimpleXMLWriter.
13	* DefaultRssFeed is renamed to DefaultFeed.
14	* DefaultFeed is set to Atom10Feed.
15
16	Notes:
17
18	* All string input values need to be unicode objects.
19	* Date objects need to be Python datetime objects.
20	* Only RSS 2.01 and Atom 1.0 is implemented.
21	* DefaultFeed is Atom10Feed
22
23	* For RSS/Atom feeds, feed publication date will be set as the most recent
24	item date.
25	* For Atom feeds, we only populate the <summary> fields for each item.
26	* For Atom feeds, if not unique_id is supplied, a Tag URI is generated:
27	<http://diveintomark.org/archives/2004/05/28/howto-atom-id>
28	* For Atom feeds, description field will be parsed as XHTML and embedded inline.
29	If it fails to validate as XML/XHTML, it will be escaped and embedded as HTML.
30
31	Example:
32
33	>>> feed = feedgenerator.Atom10Feed(
34	... title=u"Another Boring Blog",
35	... link=u"http://anotherboringblog.com/",
36	... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.",
37	... feed_url = u"http://anotherboringblog.com/atom.xml",
38	... author_name = u"A. Blogger",
39	... author_email = u"a.blogger@anotherboringblog.com",
40	... categories = [u"blog", u"boring"]
41	... )
42	>>> feed.add_item(
43	... title=u"What I had for breakfast",
44	... link=u"http://anotherboringblog.com/what-i-had-for-breakfast/",
45	... description=u"Eggs, Bacon and Potatoes.",
46	... pubdate = datetime.now(),
47	... categories = [u"food"]
48	.. )
49	>>> print feed.writeString('utf-8')
50
51	"""
52
53	from elementtree.ElementTree import Element, SubElement, ElementTree, fromstring, tostring
54	from xml.parsers.expat import ExpatError
55	import re
56	import email
57	import time
58	from datetime import datetime
59
60	ATOM_NS = 'http://www.w3.org/2005/Atom'
61	XHTML_NS = 'http://www.w3.org/1999/xhtml'
62
63	class Enclosure:
64	"Represents an RSS enclosure"
65	def __init__(self, url, length, mime_type):
66	"All args are expected to be Python Unicode objects"
67	self.url, self.length, self.mime_type = url, length, mime_type
68
69	class SyndicationFeed:
70	"Base class for syndication feeds. Subclasses should provide write_string()"
71	def __init__(self, title, link, description, feed_url,
72	author_name = None, author_email = None, author_link = None,
73	categories = [], subtitle = None, language = None):
74	self.feed = {'title': title,
75	'link': link,
76	'feed_url': feed_url,
77	'description': description,
78	'author_name': author_name,
79	'author_email': author_email,
80	'author_link': author_link,
81	'categories': categories,
82	'subtitle': subtitle,
83	'language': language
84	}
85	self.items = []
86
87	def add_item(self, title, link, description,
88	author_name = None, author_email = None, pubdate = None,
89	comments = None, unique_id = None, enclosure = None,
90	categories = []):
91	"""
92	Adds an item to the feed. All args are expected to be Python Unicode
93	objects except:
94
95	pubdate - DateTime object.
96	enclosure - Enclosure object.
97	categories - List of Unicode objects.
98	"""
99	self.items.append({
100	'title': title,
101	'link': link,
102	'description': description,
103	'author_email': author_email,
104	'author_name': author_name,
105	'pubdate': pubdate,
106	'comments': comments,
107	'unique_id': unique_id,
108	'enclosure': enclosure,
109	'categories': categories,
110	})
111
112	def count(self):
113	return len(self.items)
114
115	def write(self, f, encoding):
116	""" Writes output to a file object with specified encoding """
117	f.write(self.write_string(encoding))
118
119	def latest_post_date(self):
120	""" Get the latest item date otherwise just return the current date """
121	updates = [i['pubdate'] for i in self.items if (i['pubdate'] is not None)]
122	if len(updates) > 0:
123	updates.sort()
124	return updates[-1]
125	else:
126	return datetime.now()
127
128
129	class Rss201Feed(SyndicationFeed):
130	" RSS 2.01 rev 2 feed <http://blogs.law.harvard.edu/tech/rss>"
131
132	def rfc2822_date(self, date):
133	return email.Utils.formatdate(time.mktime(date.timetuple()))
134
135	def write_string(self, encoding):
136	tree = Element('rss', {'version':'2.0'})
137	channel = self.write_channel_header(tree)
138	self.write_items(channel)
139	return tostring(tree, encoding = encoding)
140
141	def write_channel_header(self, tree):
142	channel = SubElement(tree, 'channel')
143	SubElement(channel, 'title').text = self.feed['title']
144	SubElement(channel, 'link').text = self.feed['link']
145	SubElement(channel, 'description').text = self.feed['description']
146
147	if self.feed['language'] is not None:
148	SubElement(channel, 'language').text = self.feed['language']
149
150	if self.feed['categories'] is not []:
151	for cat in self.feed['categories']:
152	SubElement(channel, 'category').text = cat
153
154	SubElement(channel, 'pubDate').text = self.rfc2822_date(self.latest_post_date())
155
156	return channel
157
158	def write_items(self, channel):
159	for item in self.items:
160	entry = SubElement(channel, 'item')
161	SubElement(entry, 'title').text = item['title']
162	SubElement(entry, 'link').text = item['link']
163	if item['description'] is not None:
164	SubElement(entry, 'description').text = item['description']
165	if item['author_name'] is not None and item['author_email'] is not None:
166	SubElement(entry, 'author').text = u'%s (%s)' % (item['author_email'], item['author_name'])
167	if item['pubdate'] is not None:
168	SubElement(entry, 'pubDate').text = self.rfc2822_date(item['pubdate'])
169	if item['unique_id'] is not None:
170	SubElement(entry, 'guid').text = item['unique_id']
171	else:
172	SubElement(entry, 'guid').text = item['link']
173	if item['enclosure'] is not None:
174	SubElement(entry, 'enclosure', {'url': item['enclosure'].url,
175	'length': item['enclosure'].length,
176	'type': item['enclosure'].mime_type})
177	for cat in item['categories']:
178	SubElement(entry, 'category').text = cat
179
180
181	class Atom10Feed(SyndicationFeed):
182	""" An Atom 1.0 Feed
183	<http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html>
184	"""
185
186	def rfc3339_date(self, date):
187	return date.strftime('%Y-%m-%dT%H:%M:%SZ')
188
189	def tag_uri(self, url, date):
190	""" Creates a TagURI
191	<http://diveintomark.org/archives/2004/05/28/howto-atom-id>
192	"""
193	tag = re.sub('^http://', '', url)
194	tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1)
195	tag = re.sub('#', '/', tag)
196	return 'tag:' + tag
197
198	def write_string(self, encoding):
199	tree = Element('feed', {'xmlns':ATOM_NS})
200	self.write_feed_header(tree)
201	self.write_items(tree)
202	return tostring(tree, encoding = encoding)
203
204	def write_feed_header(self, tree):
205	SubElement(tree, 'title').text = self.feed['title']
206	SubElement(tree, 'link', {'rel':'alternate', 'href': self.feed['link']})
207	SubElement(tree, 'link', {'rel':'self', 'href':self.feed['feed_url']})
208	SubElement(tree, 'id').text = self.feed['link']
209
210	if self.feed['author_name'] is not None:
211	author = SubElement(tree, 'author')
212	SubElement(author, 'name').text = self.feed['author_name']
213	if self.feed['author_email'] is not None:
214	SubElement(author, 'email').text = self.feed['author_email']
215	if self.feed['author_link'] is not None:
216	SubElement(author, 'email').text = self.feed['author_link']
217
218	if self.feed['subtitle'] is not None:
219	SubElement(tree, 'subtitle').text = self.feed['subtitle']
220
221
222	SubElement(tree, 'updated').text = self.rfc3339_date(self.latest_post_date())
223
224	for cat in self.feed['categories']:
225	SubElement(tree, 'category', {'term':cat})
226
227	def write_items(self, tree):
228	for item in self.items:
229	entry = SubElement(tree, 'entry')
230	SubElement(entry, 'title').text = item['title']
231	SubElement(entry, 'link', {'rel':'alternate', 'href': item['link']})
232	SubElement(entry, 'updated').text = self.rfc3339_date(item['pubdate'])
233
234	if item['unique_id'] is not None:
235	SubElement(entry, 'id').text = item['unique_id']
236	else:
237	tag_uri = self.tag_uri(item['link'], item['pubdate'])
238	SubElement(entry, 'id').text = tag_uri
239
240	# here we do something cunning, try parsing fragment as xml
241	# and if it works, we just put it in, otherwise, just
242	# use type="html" and escape the contents
243	try:
244	fragment = fromstring('<div>%s</div>' % item['description'].encode('utf-8'))
245	fragment.attrib['xmlns'] = XHTML_NS
246	SubElement(entry, 'summary', {'type':'xhtml'}).append(fragment)
247	except ExpatError:
248	SubElement(entry, 'summary', {'type':'html'}).text = item['description']
249
250	if item['enclosure'] is not None:
251	SubElement(entry, 'link',
252	{'rel':'enclosure',
253	'href':item['enclosure'].url,
254	'length':item['enclosure'].length,
255	'type':item['enclosure'].mime_type})
256
257	for cat in item['categories']:
258	SubElement(entry, 'category', {'term':cat})
259
260	if item['author_name'] is not None:
261	author = SubElement(entry, 'author')
262	SubElement(author, 'name').text = item['author_name']
263	if item['author_email'] is not None:
264	SubElement(author, 'email').text = item['author_email']
265
266	DefaultFeed = Atom10Feed
267
268

Download in other formats:

Original Format

Issues

Context Navigation

Ticket #498: feedgenerator.py

Download in other formats:

Django Links

Learn More

Get Involved

Get Help

Follow Us

Support Us