| 1 | = `django.contrib.localflavor.pl` = |
| 2 | |
| 3 | == `pl_regions.py` == |
| 4 | |
| 5 | The provinces/counties/communes (województwa/powiaty/gminy) lists should be updated from time to time - they are not constant. Fortunately, it can be automated. Polish [http://www.stat.gov.pl/gus/index_ENG_HTML.htm Central Statistical Office] ([http://www.stat.gov.pl/ Główny Urząd Statystyczny]) published an XML file with all necessary data needed to generate `pl_regions.py`: [http://www.stat.gov.pl/broker/access/prefile/downloadPreFile.jspa?id=68 `TERC.xml`]. |
| 6 | |
| 7 | {{{ |
| 8 | #!python |
| 9 | # encoding=UTF-8 |
| 10 | ''' |
| 11 | Usage: terc.py <TERC.xml |
| 12 | |
| 13 | TERC.xml can be found at http://www.stat.gov.pl/ |
| 14 | ''' |
| 15 | # Copyright © 2008 |
| 16 | # Piotr Lewandowski <piotr.lewandowski+django@gmail.com>, |
| 17 | # |
| 18 | # This program is free software; you can redistribute it and/or modify it |
| 19 | # under the terms of the GNU General Public License, version 2, as |
| 20 | # published by the Free Software Foundation. |
| 21 | # |
| 22 | # This program is distributed in the hope that it will be useful, |
| 23 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 24 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 25 | # GNU General Public License for more details. |
| 26 | |
| 27 | import re |
| 28 | import sys |
| 29 | import xml.etree.cElementTree as etree |
| 30 | |
| 31 | def parse_TERC(stream): |
| 32 | for event, element in etree.iterparse(stream): |
| 33 | if element.tag != 'row': |
| 34 | continue |
| 35 | item = dict((child.get('name'), child.text) for child in element) |
| 36 | yield ''.join([item[n] or '' for n in ('WOJ', 'POW', 'GMI', 'RODZ')]), item['NAZWA'].strip() |
| 37 | |
| 38 | CITY_PREFIX_RE = r'[Mm][.]( st[.])?' |
| 39 | PROVINCE_RE = re.compile(r'^WOJ[.] ') |
| 40 | COUNTY_RE = re.compile(r'^Powiat( %s)? ' % CITY_PREFIX_RE) |
| 41 | COMMUNE_RE = re.compile(r'^%s ' % CITY_PREFIX_RE) |
| 42 | |
| 43 | DATASETS = { |
| 44 | 2: ('provinces', {}, |
| 45 | lambda c, n: "ugettext_lazy(u'%s')" % PROVINCE_RE.sub('', n).lower() |
| 46 | ), |
| 47 | 4: ('counties', {}, |
| 48 | lambda c, n: "u'%s'" % COUNTY_RE.sub('', n) |
| 49 | ), |
| 50 | 7: ('communes', {}, |
| 51 | lambda c, n: ("u'%s'" % COMMUNE_RE.sub('', n) if int(c[-1]) in (1, 2, 3) else None) |
| 52 | ), |
| 53 | } |
| 54 | |
| 55 | if __name__ == '__main__': |
| 56 | for code, name in parse_TERC(sys.stdin): |
| 57 | index = len(code) |
| 58 | _, dict_, clean_name = DATASETS[index] |
| 59 | name = clean_name(code, name.replace("'", "\\'")) |
| 60 | if name: |
| 61 | dict_[code] = name |
| 62 | |
| 63 | for _, (dict_name, dict_, _) in sorted(DATASETS.iteritems()): |
| 64 | print '%s = {' % ('PL_' + dict_name.upper()) |
| 65 | for code, name in sorted(dict_.iteritems()): |
| 66 | print " %r: %s," % (code, name.encode('UTF-8')) |
| 67 | print '}\n' |
| 68 | }}} |