| | 1 | = `django.contrib.localflavor.pl` = |
| | 2 | |
| | 3 | == `pl_regions.py` == |
| | 4 | |
| | 5 | The provinces/counties/communes (województwa/powiaty/gminy) lists should be updated from time to time - they are not constant. Fortunately, it can be automated. Polish [http://www.stat.gov.pl/gus/index_ENG_HTML.htm Central Statistical Office] ([http://www.stat.gov.pl/ Główny Urząd Statystyczny]) published an XML file with all necessary data needed to generate `pl_regions.py`: [http://www.stat.gov.pl/broker/access/prefile/downloadPreFile.jspa?id=68 `TERC.xml`]. |
| | 6 | |
| | 7 | {{{ |
| | 8 | #!python |
| | 9 | # encoding=UTF-8 |
| | 10 | ''' |
| | 11 | Usage: terc.py <TERC.xml |
| | 12 | |
| | 13 | TERC.xml can be found at http://www.stat.gov.pl/ |
| | 14 | ''' |
| | 15 | # Copyright © 2008 |
| | 16 | # Piotr Lewandowski <piotr.lewandowski+django@gmail.com>, |
| | 17 | # |
| | 18 | # This program is free software; you can redistribute it and/or modify it |
| | 19 | # under the terms of the GNU General Public License, version 2, as |
| | 20 | # published by the Free Software Foundation. |
| | 21 | # |
| | 22 | # This program is distributed in the hope that it will be useful, |
| | 23 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| | 24 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| | 25 | # GNU General Public License for more details. |
| | 26 | |
| | 27 | import re |
| | 28 | import sys |
| | 29 | import xml.etree.cElementTree as etree |
| | 30 | |
| | 31 | def parse_TERC(stream): |
| | 32 | for event, element in etree.iterparse(stream): |
| | 33 | if element.tag != 'row': |
| | 34 | continue |
| | 35 | item = dict((child.get('name'), child.text) for child in element) |
| | 36 | yield ''.join([item[n] or '' for n in ('WOJ', 'POW', 'GMI', 'RODZ')]), item['NAZWA'].strip() |
| | 37 | |
| | 38 | CITY_PREFIX_RE = r'[Mm][.]( st[.])?' |
| | 39 | PROVINCE_RE = re.compile(r'^WOJ[.] ') |
| | 40 | COUNTY_RE = re.compile(r'^Powiat( %s)? ' % CITY_PREFIX_RE) |
| | 41 | COMMUNE_RE = re.compile(r'^%s ' % CITY_PREFIX_RE) |
| | 42 | |
| | 43 | DATASETS = { |
| | 44 | 2: ('provinces', {}, |
| | 45 | lambda c, n: "ugettext_lazy(u'%s')" % PROVINCE_RE.sub('', n).lower() |
| | 46 | ), |
| | 47 | 4: ('counties', {}, |
| | 48 | lambda c, n: "u'%s'" % COUNTY_RE.sub('', n) |
| | 49 | ), |
| | 50 | 7: ('communes', {}, |
| | 51 | lambda c, n: ("u'%s'" % COMMUNE_RE.sub('', n) if int(c[-1]) in (1, 2, 3) else None) |
| | 52 | ), |
| | 53 | } |
| | 54 | |
| | 55 | if __name__ == '__main__': |
| | 56 | for code, name in parse_TERC(sys.stdin): |
| | 57 | index = len(code) |
| | 58 | _, dict_, clean_name = DATASETS[index] |
| | 59 | name = clean_name(code, name.replace("'", "\\'")) |
| | 60 | if name: |
| | 61 | dict_[code] = name |
| | 62 | |
| | 63 | for _, (dict_name, dict_, _) in sorted(DATASETS.iteritems()): |
| | 64 | print '%s = {' % ('PL_' + dict_name.upper()) |
| | 65 | for code, name in sorted(dict_.iteritems()): |
| | 66 | print " %r: %s," % (code, name.encode('UTF-8')) |
| | 67 | print '}\n' |
| | 68 | }}} |