Version 1 (modified by 16 years ago) ( diff ) | ,
---|
django.contrib.localflavor.pl
pl_regions.py
The provinces/counties/communes (województwa/powiaty/gminy) lists should be updated from time to time - they are not constant. Fortunately, it can be automated. Polish Central Statistical Office (Główny Urząd Statystyczny) published an XML file with all necessary data needed to generate pl_regions.py
: `TERC.xml`.
# encoding=UTF-8 ''' Usage: terc.py <TERC.xml TERC.xml can be found at http://www.stat.gov.pl/ ''' # Copyright © 2008 # Piotr Lewandowski <piotr.lewandowski+django@gmail.com>, # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License, version 2, as # published by the Free Software Foundation. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. import re import sys import xml.etree.cElementTree as etree def parse_TERC(stream): for event, element in etree.iterparse(stream): if element.tag != 'row': continue item = dict((child.get('name'), child.text) for child in element) yield ''.join([item[n] or '' for n in ('WOJ', 'POW', 'GMI', 'RODZ')]), item['NAZWA'].strip() CITY_PREFIX_RE = r'[Mm][.]( st[.])?' PROVINCE_RE = re.compile(r'^WOJ[.] ') COUNTY_RE = re.compile(r'^Powiat( %s)? ' % CITY_PREFIX_RE) COMMUNE_RE = re.compile(r'^%s ' % CITY_PREFIX_RE) DATASETS = { 2: ('provinces', {}, lambda c, n: "ugettext_lazy(u'%s')" % PROVINCE_RE.sub('', n).lower() ), 4: ('counties', {}, lambda c, n: "u'%s'" % COUNTY_RE.sub('', n) ), 7: ('communes', {}, lambda c, n: ("u'%s'" % COMMUNE_RE.sub('', n) if int(c[-1]) in (1, 2, 3) else None) ), } if __name__ == '__main__': for code, name in parse_TERC(sys.stdin): index = len(code) _, dict_, clean_name = DATASETS[index] name = clean_name(code, name.replace("'", "\\'")) if name: dict_[code] = name for _, (dict_name, dict_, _) in sorted(DATASETS.iteritems()): print '%s = {' % ('PL_' + dict_name.upper()) for code, name in sorted(dict_.iteritems()): print " %r: %s," % (code, name.encode('UTF-8')) print '}\n'
Attachments (1)
-
terc.py
(2.0 KB
) - added by 16 years ago.
+ terc.py
Download all attachments as: .zip
Note:
See TracWiki
for help on using the wiki.