5 | | The provinces/counties/communes (województwa/powiaty/gminy) lists should be updated from time to time - they are not constant. Fortunately, it can be automated. Polish [http://www.stat.gov.pl/gus/index_ENG_HTML.htm Central Statistical Office] ([http://www.stat.gov.pl/ Główny Urząd Statystyczny]) published an XML file with all necessary data needed to generate `pl_regions.py`: [http://www.stat.gov.pl/broker/access/prefile/downloadPreFile.jspa?id=68 `TERC.xml`]. |
6 | | |
7 | | {{{ |
8 | | #!python |
9 | | # encoding=UTF-8 |
10 | | ''' |
11 | | Usage: terc.py <TERC.xml |
12 | | |
13 | | TERC.xml can be found at http://www.stat.gov.pl/ |
14 | | ''' |
15 | | # Copyright © 2008 |
16 | | # Piotr Lewandowski <piotr.lewandowski+django@gmail.com>, |
17 | | # |
18 | | # This program is free software; you can redistribute it and/or modify it |
19 | | # under the terms of the GNU General Public License, version 2, as |
20 | | # published by the Free Software Foundation. |
21 | | # |
22 | | # This program is distributed in the hope that it will be useful, |
23 | | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
24 | | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
25 | | # GNU General Public License for more details. |
26 | | |
27 | | import re |
28 | | import sys |
29 | | import xml.etree.cElementTree as etree |
30 | | |
31 | | def parse_TERC(stream): |
32 | | for event, element in etree.iterparse(stream): |
33 | | if element.tag != 'row': |
34 | | continue |
35 | | item = dict((child.get('name'), child.text) for child in element) |
36 | | yield ''.join([item[n] or '' for n in ('WOJ', 'POW', 'GMI', 'RODZ')]), item['NAZWA'].strip() |
37 | | |
38 | | CITY_PREFIX_RE = r'[Mm][.]( st[.])?' |
39 | | PROVINCE_RE = re.compile(r'^WOJ[.] ') |
40 | | COUNTY_RE = re.compile(r'^Powiat( %s)? ' % CITY_PREFIX_RE) |
41 | | COMMUNE_RE = re.compile(r'^%s ' % CITY_PREFIX_RE) |
42 | | |
43 | | DATASETS = { |
44 | | 2: ('provinces', {}, |
45 | | lambda c, n: "ugettext_lazy(u'%s')" % PROVINCE_RE.sub('', n).lower() |
46 | | ), |
47 | | 4: ('counties', {}, |
48 | | lambda c, n: "u'%s'" % COUNTY_RE.sub('', n) |
49 | | ), |
50 | | 7: ('communes', {}, |
51 | | lambda c, n: ("u'%s'" % COMMUNE_RE.sub('', n) if int(c[-1]) in (1, 2, 3) else None) |
52 | | ), |
53 | | } |
54 | | |
55 | | if __name__ == '__main__': |
56 | | for code, name in parse_TERC(sys.stdin): |
57 | | index = len(code) |
58 | | _, dict_, clean_name = DATASETS[index] |
59 | | name = clean_name(code, name.replace("'", "\\'")) |
60 | | if name: |
61 | | dict_[code] = name |
62 | | |
63 | | for _, (dict_name, dict_, _) in sorted(DATASETS.iteritems()): |
64 | | print '%s = {' % ('PL_' + dict_name.upper()) |
65 | | for code, name in sorted(dict_.iteritems()): |
66 | | print " %r: %s," % (code, name.encode('UTF-8')) |
67 | | print '}\n' |
68 | | }}} |
| 5 | The provinces/counties/communes (województwa/powiaty/gminy) lists should be updated from time to time - they are not constant. Fortunately, it can be automated. Polish [http://www.stat.gov.pl/gus/index_ENG_HTML.htm Central Statistical Office] ([http://www.stat.gov.pl/ Główny Urząd Statystyczny]) published an XML file with all necessary data needed to generate `pl_regions.py`: [http://www.stat.gov.pl/broker/access/prefile/downloadPreFile.jspa?id=68 `TERC.xml`]. There is Python script - [http://code.djangoproject.com/attachment/wiki/PolishLocalflavor/terc.py terc.py] which exctracts those data. |