1 | #!/usr/bin/python2.5
|
---|
2 | # encoding=UTF-8
|
---|
3 | '''
|
---|
4 | Usage: terc.py <TERC.xml
|
---|
5 |
|
---|
6 | TERC.xml can be found at http://www.stat.gov.pl/
|
---|
7 | '''
|
---|
8 | # Copyright © 2008
|
---|
9 | # Piotr Lewandowski <piotr.lewandowski+django@gmail.com>,
|
---|
10 | #
|
---|
11 | # This program is free software; you can redistribute it and/or modify it
|
---|
12 | # under the terms of the GNU General Public License, version 2, as
|
---|
13 | # published by the Free Software Foundation.
|
---|
14 | #
|
---|
15 | # This program is distributed in the hope that it will be useful,
|
---|
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
18 | # GNU General Public License for more details.
|
---|
19 |
|
---|
20 | import re
|
---|
21 | import sys
|
---|
22 | import xml.etree.cElementTree as etree
|
---|
23 |
|
---|
24 | def parse_TERC(stream):
|
---|
25 | for event, element in etree.iterparse(stream):
|
---|
26 | if element.tag != 'row':
|
---|
27 | continue
|
---|
28 | item = dict((child.get('name'), child.text) for child in element)
|
---|
29 | yield ''.join([item[n] or '' for n in ('WOJ', 'POW', 'GMI', 'RODZ')]), item['NAZWA'].strip()
|
---|
30 |
|
---|
31 | CITY_PREFIX_RE = r'[Mm][.]( st[.])?'
|
---|
32 | PROVINCE_RE = re.compile(r'^WOJ[.] ')
|
---|
33 | COUNTY_RE = re.compile(r'^Powiat( %s)? ' % CITY_PREFIX_RE)
|
---|
34 | COMMUNE_RE = re.compile(r'^%s ' % CITY_PREFIX_RE)
|
---|
35 |
|
---|
36 | DATASETS = {
|
---|
37 | 2: ('provinces', {},
|
---|
38 | lambda c, n: "ugettext_lazy(u'%s')" % PROVINCE_RE.sub('', n).lower()
|
---|
39 | ),
|
---|
40 | 4: ('counties', {},
|
---|
41 | lambda c, n: "u'%s'" % COUNTY_RE.sub('', n)
|
---|
42 | ),
|
---|
43 | 7: ('communes', {},
|
---|
44 | lambda c, n: ("u'%s'" % COMMUNE_RE.sub('', n) if int(c[-1]) in (1, 2, 3) else None)
|
---|
45 | ),
|
---|
46 | }
|
---|
47 |
|
---|
48 | if __name__ == '__main__':
|
---|
49 | for code, name in parse_TERC(sys.stdin):
|
---|
50 | index = len(code)
|
---|
51 | _, dict_, clean_name = DATASETS[index]
|
---|
52 | name = clean_name(code, name.replace("'", "\\'"))
|
---|
53 | if name:
|
---|
54 | dict_[code] = name
|
---|
55 |
|
---|
56 | for _, (dict_name, dict_, _) in sorted(DATASETS.iteritems()):
|
---|
57 | print '%s = {' % ('PL_' + dict_name.upper())
|
---|
58 | for code, name in sorted(dict_.iteritems()):
|
---|
59 | print " %r: %s," % (code, name.encode('UTF-8'))
|
---|
60 | print '}\n'
|
---|
61 |
|
---|
62 | # vim:et ts=4 sw=4
|
---|