| 5 | | The provinces/counties/communes (województwa/powiaty/gminy) lists should be updated from time to time - they are not constant. Fortunately, it can be automated. Polish [http://www.stat.gov.pl/gus/index_ENG_HTML.htm Central Statistical Office] ([http://www.stat.gov.pl/ Główny Urząd Statystyczny]) published an XML file with all necessary data needed to generate `pl_regions.py`: [http://www.stat.gov.pl/broker/access/prefile/downloadPreFile.jspa?id=68 `TERC.xml`]. |
| 6 | | |
| 7 | | {{{ |
| 8 | | #!python |
| 9 | | # encoding=UTF-8 |
| 10 | | ''' |
| 11 | | Usage: terc.py <TERC.xml |
| 12 | | |
| 13 | | TERC.xml can be found at http://www.stat.gov.pl/ |
| 14 | | ''' |
| 15 | | # Copyright © 2008 |
| 16 | | # Piotr Lewandowski <piotr.lewandowski+django@gmail.com>, |
| 17 | | # |
| 18 | | # This program is free software; you can redistribute it and/or modify it |
| 19 | | # under the terms of the GNU General Public License, version 2, as |
| 20 | | # published by the Free Software Foundation. |
| 21 | | # |
| 22 | | # This program is distributed in the hope that it will be useful, |
| 23 | | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 24 | | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 25 | | # GNU General Public License for more details. |
| 26 | | |
| 27 | | import re |
| 28 | | import sys |
| 29 | | import xml.etree.cElementTree as etree |
| 30 | | |
| 31 | | def parse_TERC(stream): |
| 32 | | for event, element in etree.iterparse(stream): |
| 33 | | if element.tag != 'row': |
| 34 | | continue |
| 35 | | item = dict((child.get('name'), child.text) for child in element) |
| 36 | | yield ''.join([item[n] or '' for n in ('WOJ', 'POW', 'GMI', 'RODZ')]), item['NAZWA'].strip() |
| 37 | | |
| 38 | | CITY_PREFIX_RE = r'[Mm][.]( st[.])?' |
| 39 | | PROVINCE_RE = re.compile(r'^WOJ[.] ') |
| 40 | | COUNTY_RE = re.compile(r'^Powiat( %s)? ' % CITY_PREFIX_RE) |
| 41 | | COMMUNE_RE = re.compile(r'^%s ' % CITY_PREFIX_RE) |
| 42 | | |
| 43 | | DATASETS = { |
| 44 | | 2: ('provinces', {}, |
| 45 | | lambda c, n: "ugettext_lazy(u'%s')" % PROVINCE_RE.sub('', n).lower() |
| 46 | | ), |
| 47 | | 4: ('counties', {}, |
| 48 | | lambda c, n: "u'%s'" % COUNTY_RE.sub('', n) |
| 49 | | ), |
| 50 | | 7: ('communes', {}, |
| 51 | | lambda c, n: ("u'%s'" % COMMUNE_RE.sub('', n) if int(c[-1]) in (1, 2, 3) else None) |
| 52 | | ), |
| 53 | | } |
| 54 | | |
| 55 | | if __name__ == '__main__': |
| 56 | | for code, name in parse_TERC(sys.stdin): |
| 57 | | index = len(code) |
| 58 | | _, dict_, clean_name = DATASETS[index] |
| 59 | | name = clean_name(code, name.replace("'", "\\'")) |
| 60 | | if name: |
| 61 | | dict_[code] = name |
| 62 | | |
| 63 | | for _, (dict_name, dict_, _) in sorted(DATASETS.iteritems()): |
| 64 | | print '%s = {' % ('PL_' + dict_name.upper()) |
| 65 | | for code, name in sorted(dict_.iteritems()): |
| 66 | | print " %r: %s," % (code, name.encode('UTF-8')) |
| 67 | | print '}\n' |
| 68 | | }}} |
| | 5 | The provinces/counties/communes (województwa/powiaty/gminy) lists should be updated from time to time - they are not constant. Fortunately, it can be automated. Polish [http://www.stat.gov.pl/gus/index_ENG_HTML.htm Central Statistical Office] ([http://www.stat.gov.pl/ Główny Urząd Statystyczny]) published an XML file with all necessary data needed to generate `pl_regions.py`: [http://www.stat.gov.pl/broker/access/prefile/downloadPreFile.jspa?id=68 `TERC.xml`]. There is Python script - [http://code.djangoproject.com/attachment/wiki/PolishLocalflavor/terc.py terc.py] which exctracts those data. |