1 | """
|
---|
2 | Syndication feed generation library -- for RSS 2.0 and Atom 1.0
|
---|
3 |
|
---|
4 | Requires:
|
---|
5 |
|
---|
6 | * ElementTree <http://effbot.org/zone/element-index.htm>
|
---|
7 |
|
---|
8 | Changes from original django feedgenerator:
|
---|
9 | * Added author_name, author_email, author_link, categories and subtitle
|
---|
10 | parameters to SyndicationFeed
|
---|
11 | * Renamed writeString to write_string.
|
---|
12 | * Uses ElementTree rather than SimpleXMLWriter.
|
---|
13 | * DefaultRssFeed is renamed to DefaultFeed.
|
---|
14 | * DefaultFeed is set to Atom10Feed.
|
---|
15 |
|
---|
16 | Notes:
|
---|
17 |
|
---|
18 | * All string input values need to be unicode objects.
|
---|
19 | * Date objects need to be Python datetime objects.
|
---|
20 | * Only RSS 2.01 and Atom 1.0 is implemented.
|
---|
21 | * DefaultFeed is Atom10Feed
|
---|
22 |
|
---|
23 | * For RSS/Atom feeds, feed publication date will be set as the most recent
|
---|
24 | item date.
|
---|
25 | * For Atom feeds, we only populate the <summary> fields for each item.
|
---|
26 | * For Atom feeds, if not unique_id is supplied, a Tag URI is generated:
|
---|
27 | <http://diveintomark.org/archives/2004/05/28/howto-atom-id>
|
---|
28 | * For Atom feeds, description field will be parsed as XHTML and embedded inline.
|
---|
29 | If it fails to validate as XML/XHTML, it will be escaped and embedded as HTML.
|
---|
30 |
|
---|
31 | Example:
|
---|
32 |
|
---|
33 | >>> feed = feedgenerator.Atom10Feed(
|
---|
34 | ... title=u"Another Boring Blog",
|
---|
35 | ... link=u"http://anotherboringblog.com/",
|
---|
36 | ... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.",
|
---|
37 | ... feed_url = u"http://anotherboringblog.com/atom.xml",
|
---|
38 | ... author_name = u"A. Blogger",
|
---|
39 | ... author_email = u"a.blogger@anotherboringblog.com",
|
---|
40 | ... categories = [u"blog", u"boring"]
|
---|
41 | ... )
|
---|
42 | >>> feed.add_item(
|
---|
43 | ... title=u"What I had for breakfast",
|
---|
44 | ... link=u"http://anotherboringblog.com/what-i-had-for-breakfast/",
|
---|
45 | ... description=u"Eggs, Bacon and Potatoes.",
|
---|
46 | ... pubdate = datetime.now(),
|
---|
47 | ... categories = [u"food"]
|
---|
48 | .. )
|
---|
49 | >>> print feed.writeString('utf-8')
|
---|
50 |
|
---|
51 | """
|
---|
52 |
|
---|
53 | from elementtree.ElementTree import Element, SubElement, ElementTree, fromstring, tostring
|
---|
54 | from xml.parsers.expat import ExpatError
|
---|
55 | import re
|
---|
56 | import email
|
---|
57 | import time
|
---|
58 | from datetime import datetime
|
---|
59 |
|
---|
60 | ATOM_NS = 'http://www.w3.org/2005/Atom'
|
---|
61 | XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
---|
62 |
|
---|
63 | class Enclosure:
|
---|
64 | "Represents an RSS enclosure"
|
---|
65 | def __init__(self, url, length, mime_type):
|
---|
66 | "All args are expected to be Python Unicode objects"
|
---|
67 | self.url, self.length, self.mime_type = url, length, mime_type
|
---|
68 |
|
---|
69 | class SyndicationFeed:
|
---|
70 | "Base class for syndication feeds. Subclasses should provide write_string()"
|
---|
71 | def __init__(self, title, link, description, feed_url,
|
---|
72 | author_name = None, author_email = None, author_link = None,
|
---|
73 | categories = [], subtitle = None, language = None):
|
---|
74 | self.feed = {'title': title,
|
---|
75 | 'link': link,
|
---|
76 | 'feed_url': feed_url,
|
---|
77 | 'description': description,
|
---|
78 | 'author_name': author_name,
|
---|
79 | 'author_email': author_email,
|
---|
80 | 'author_link': author_link,
|
---|
81 | 'categories': categories,
|
---|
82 | 'subtitle': subtitle,
|
---|
83 | 'language': language
|
---|
84 | }
|
---|
85 | self.items = []
|
---|
86 |
|
---|
87 | def add_item(self, title, link, description,
|
---|
88 | author_name = None, author_email = None, pubdate = None,
|
---|
89 | comments = None, unique_id = None, enclosure = None,
|
---|
90 | categories = []):
|
---|
91 | """
|
---|
92 | Adds an item to the feed. All args are expected to be Python Unicode
|
---|
93 | objects except:
|
---|
94 |
|
---|
95 | pubdate - DateTime object.
|
---|
96 | enclosure - Enclosure object.
|
---|
97 | categories - List of Unicode objects.
|
---|
98 | """
|
---|
99 | self.items.append({
|
---|
100 | 'title': title,
|
---|
101 | 'link': link,
|
---|
102 | 'description': description,
|
---|
103 | 'author_email': author_email,
|
---|
104 | 'author_name': author_name,
|
---|
105 | 'pubdate': pubdate,
|
---|
106 | 'comments': comments,
|
---|
107 | 'unique_id': unique_id,
|
---|
108 | 'enclosure': enclosure,
|
---|
109 | 'categories': categories,
|
---|
110 | })
|
---|
111 |
|
---|
112 | def count(self):
|
---|
113 | return len(self.items)
|
---|
114 |
|
---|
115 | def write(self, f, encoding):
|
---|
116 | """ Writes output to a file object with specified encoding """
|
---|
117 | f.write(self.write_string(encoding))
|
---|
118 |
|
---|
119 | def latest_post_date(self):
|
---|
120 | """ Get the latest item date otherwise just return the current date """
|
---|
121 | updates = [i['pubdate'] for i in self.items if (i['pubdate'] is not None)]
|
---|
122 | if len(updates) > 0:
|
---|
123 | updates.sort()
|
---|
124 | return updates[-1]
|
---|
125 | else:
|
---|
126 | return datetime.now()
|
---|
127 |
|
---|
128 |
|
---|
129 | class Rss201Feed(SyndicationFeed):
|
---|
130 | " RSS 2.01 rev 2 feed <http://blogs.law.harvard.edu/tech/rss>"
|
---|
131 |
|
---|
132 | def rfc2822_date(self, date):
|
---|
133 | return email.Utils.formatdate(time.mktime(date.timetuple()))
|
---|
134 |
|
---|
135 | def write_string(self, encoding):
|
---|
136 | tree = Element('rss', {'version':'2.0'})
|
---|
137 | channel = self.write_channel_header(tree)
|
---|
138 | self.write_items(channel)
|
---|
139 | return tostring(tree, encoding = encoding)
|
---|
140 |
|
---|
141 | def write_channel_header(self, tree):
|
---|
142 | channel = SubElement(tree, 'channel')
|
---|
143 | SubElement(channel, 'title').text = self.feed['title']
|
---|
144 | SubElement(channel, 'link').text = self.feed['link']
|
---|
145 | SubElement(channel, 'description').text = self.feed['description']
|
---|
146 |
|
---|
147 | if self.feed['language'] is not None:
|
---|
148 | SubElement(channel, 'language').text = self.feed['language']
|
---|
149 |
|
---|
150 | if self.feed['categories'] is not []:
|
---|
151 | for cat in self.feed['categories']:
|
---|
152 | SubElement(channel, 'category').text = cat
|
---|
153 |
|
---|
154 | SubElement(channel, 'pubDate').text = self.rfc2822_date(self.latest_post_date())
|
---|
155 |
|
---|
156 | return channel
|
---|
157 |
|
---|
158 | def write_items(self, channel):
|
---|
159 | for item in self.items:
|
---|
160 | entry = SubElement(channel, 'item')
|
---|
161 | SubElement(entry, 'title').text = item['title']
|
---|
162 | SubElement(entry, 'link').text = item['link']
|
---|
163 | if item['description'] is not None:
|
---|
164 | SubElement(entry, 'description').text = item['description']
|
---|
165 | if item['author_name'] is not None and item['author_email'] is not None:
|
---|
166 | SubElement(entry, 'author').text = u'%s (%s)' % (item['author_email'], item['author_name'])
|
---|
167 | if item['pubdate'] is not None:
|
---|
168 | SubElement(entry, 'pubDate').text = self.rfc2822_date(item['pubdate'])
|
---|
169 | if item['unique_id'] is not None:
|
---|
170 | SubElement(entry, 'guid').text = item['unique_id']
|
---|
171 | else:
|
---|
172 | SubElement(entry, 'guid').text = item['link']
|
---|
173 | if item['enclosure'] is not None:
|
---|
174 | SubElement(entry, 'enclosure', {'url': item['enclosure'].url,
|
---|
175 | 'length': item['enclosure'].length,
|
---|
176 | 'type': item['enclosure'].mime_type})
|
---|
177 | for cat in item['categories']:
|
---|
178 | SubElement(entry, 'category').text = cat
|
---|
179 |
|
---|
180 |
|
---|
181 | class Atom10Feed(SyndicationFeed):
|
---|
182 | """ An Atom 1.0 Feed
|
---|
183 | <http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html>
|
---|
184 | """
|
---|
185 |
|
---|
186 | def rfc3339_date(self, date):
|
---|
187 | return date.strftime('%Y-%m-%dT%H:%M:%SZ')
|
---|
188 |
|
---|
189 | def tag_uri(self, url, date):
|
---|
190 | """ Creates a TagURI
|
---|
191 | <http://diveintomark.org/archives/2004/05/28/howto-atom-id>
|
---|
192 | """
|
---|
193 | tag = re.sub('^http://', '', url)
|
---|
194 | tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1)
|
---|
195 | tag = re.sub('#', '/', tag)
|
---|
196 | return 'tag:' + tag
|
---|
197 |
|
---|
198 | def write_string(self, encoding):
|
---|
199 | tree = Element('feed', {'xmlns':ATOM_NS})
|
---|
200 | self.write_feed_header(tree)
|
---|
201 | self.write_items(tree)
|
---|
202 | return tostring(tree, encoding = encoding)
|
---|
203 |
|
---|
204 | def write_feed_header(self, tree):
|
---|
205 | SubElement(tree, 'title').text = self.feed['title']
|
---|
206 | SubElement(tree, 'link', {'rel':'alternate', 'href': self.feed['link']})
|
---|
207 | SubElement(tree, 'link', {'rel':'self', 'href':self.feed['feed_url']})
|
---|
208 | SubElement(tree, 'id').text = self.feed['link']
|
---|
209 |
|
---|
210 | if self.feed['author_name'] is not None:
|
---|
211 | author = SubElement(tree, 'author')
|
---|
212 | SubElement(author, 'name').text = self.feed['author_name']
|
---|
213 | if self.feed['author_email'] is not None:
|
---|
214 | SubElement(author, 'email').text = self.feed['author_email']
|
---|
215 | if self.feed['author_link'] is not None:
|
---|
216 | SubElement(author, 'email').text = self.feed['author_link']
|
---|
217 |
|
---|
218 | if self.feed['subtitle'] is not None:
|
---|
219 | SubElement(tree, 'subtitle').text = self.feed['subtitle']
|
---|
220 |
|
---|
221 |
|
---|
222 | SubElement(tree, 'updated').text = self.rfc3339_date(self.latest_post_date())
|
---|
223 |
|
---|
224 | for cat in self.feed['categories']:
|
---|
225 | SubElement(tree, 'category', {'term':cat})
|
---|
226 |
|
---|
227 | def write_items(self, tree):
|
---|
228 | for item in self.items:
|
---|
229 | entry = SubElement(tree, 'entry')
|
---|
230 | SubElement(entry, 'title').text = item['title']
|
---|
231 | SubElement(entry, 'link', {'rel':'alternate', 'href': item['link']})
|
---|
232 | SubElement(entry, 'updated').text = self.rfc3339_date(item['pubdate'])
|
---|
233 |
|
---|
234 | if item['unique_id'] is not None:
|
---|
235 | SubElement(entry, 'id').text = item['unique_id']
|
---|
236 | else:
|
---|
237 | tag_uri = self.tag_uri(item['link'], item['pubdate'])
|
---|
238 | SubElement(entry, 'id').text = tag_uri
|
---|
239 |
|
---|
240 | # here we do something cunning, try parsing fragment as xml
|
---|
241 | # and if it works, we just put it in, otherwise, just
|
---|
242 | # use type="html" and escape the contents
|
---|
243 | try:
|
---|
244 | fragment = fromstring('<div>%s</div>' % item['description'].encode('utf-8'))
|
---|
245 | fragment.attrib['xmlns'] = XHTML_NS
|
---|
246 | SubElement(entry, 'summary', {'type':'xhtml'}).append(fragment)
|
---|
247 | except ExpatError:
|
---|
248 | SubElement(entry, 'summary', {'type':'html'}).text = item['description']
|
---|
249 |
|
---|
250 | if item['enclosure'] is not None:
|
---|
251 | SubElement(entry, 'link',
|
---|
252 | {'rel':'enclosure',
|
---|
253 | 'href':item['enclosure'].url,
|
---|
254 | 'length':item['enclosure'].length,
|
---|
255 | 'type':item['enclosure'].mime_type})
|
---|
256 |
|
---|
257 | for cat in item['categories']:
|
---|
258 | SubElement(entry, 'category', {'term':cat})
|
---|
259 |
|
---|
260 | if item['author_name'] is not None:
|
---|
261 | author = SubElement(entry, 'author')
|
---|
262 | SubElement(author, 'name').text = item['author_name']
|
---|
263 | if item['author_email'] is not None:
|
---|
264 | SubElement(author, 'email').text = item['author_email']
|
---|
265 |
|
---|
266 | DefaultFeed = Atom10Feed
|
---|
267 |
|
---|
268 |
|
---|