PolishLocalflavor: terc.py

File terc.py, 2.0 KB (added by Piotr Lewandowski <django@…>, 16 years ago)

+ terc.py

Line 
1#!/usr/bin/python2.5
2# encoding=UTF-8
3'''
4 Usage: terc.py <TERC.xml
5
6 TERC.xml can be found at http://www.stat.gov.pl/
7'''
8# Copyright © 2008
9# Piotr Lewandowski <piotr.lewandowski+django@gmail.com>,
10#
11# This program is free software; you can redistribute it and/or modify it
12# under the terms of the GNU General Public License, version 2, as
13# published by the Free Software Foundation.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19
20import re
21import sys
22import xml.etree.cElementTree as etree
23
24def parse_TERC(stream):
25 for event, element in etree.iterparse(stream):
26 if element.tag != 'row':
27 continue
28 item = dict((child.get('name'), child.text) for child in element)
29 yield ''.join([item[n] or '' for n in ('WOJ', 'POW', 'GMI', 'RODZ')]), item['NAZWA'].strip()
30
31CITY_PREFIX_RE = r'[Mm][.]( st[.])?'
32PROVINCE_RE = re.compile(r'^WOJ[.] ')
33COUNTY_RE = re.compile(r'^Powiat( %s)? ' % CITY_PREFIX_RE)
34COMMUNE_RE = re.compile(r'^%s ' % CITY_PREFIX_RE)
35
36DATASETS = {
37 2: ('provinces', {},
38 lambda c, n: "ugettext_lazy(u'%s')" % PROVINCE_RE.sub('', n).lower()
39 ),
40 4: ('counties', {},
41 lambda c, n: "u'%s'" % COUNTY_RE.sub('', n)
42 ),
43 7: ('communes', {},
44 lambda c, n: ("u'%s'" % COMMUNE_RE.sub('', n) if int(c[-1]) in (1, 2, 3) else None)
45 ),
46}
47
48if __name__ == '__main__':
49 for code, name in parse_TERC(sys.stdin):
50 index = len(code)
51 _, dict_, clean_name = DATASETS[index]
52 name = clean_name(code, name.replace("'", "\\'"))
53 if name:
54 dict_[code] = name
55
56 for _, (dict_name, dict_, _) in sorted(DATASETS.iteritems()):
57 print '%s = {' % ('PL_' + dict_name.upper())
58 for code, name in sorted(dict_.iteritems()):
59 print " %r: %s," % (code, name.encode('UTF-8'))
60 print '}\n'
61
62# vim:et ts=4 sw=4
Back to Top