PolishLocalflavor: terc.py

File terc.py, 2.0 KB (added by Piotr Lewandowski <django@…>, 7 years ago)

+ terc.py

Line 
1#!/usr/bin/python2.5
2# encoding=UTF-8
3'''
4    Usage: terc.py <TERC.xml
5
6    TERC.xml can be found at http://www.stat.gov.pl/
7'''
8# Copyright © 2008
9#   Piotr Lewandowski <piotr.lewandowski+django@gmail.com>,
10#
11# This program is free software; you can redistribute it and/or modify it
12# under the terms of the GNU General Public License, version 2, as
13# published by the Free Software Foundation.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19
20import re
21import sys
22import xml.etree.cElementTree as etree
23
24def parse_TERC(stream):
25    for event, element in etree.iterparse(stream):
26        if element.tag != 'row':
27            continue
28        item = dict((child.get('name'), child.text) for child in element)
29        yield ''.join([item[n] or '' for n in ('WOJ', 'POW', 'GMI', 'RODZ')]), item['NAZWA'].strip()
30       
31CITY_PREFIX_RE = r'[Mm][.]( st[.])?'
32PROVINCE_RE = re.compile(r'^WOJ[.] ')
33COUNTY_RE = re.compile(r'^Powiat( %s)? ' % CITY_PREFIX_RE)
34COMMUNE_RE = re.compile(r'^%s ' % CITY_PREFIX_RE)
35
36DATASETS = {
37    2: ('provinces', {},
38        lambda c, n: "ugettext_lazy(u'%s')" % PROVINCE_RE.sub('', n).lower()
39    ),
40    4: ('counties', {},
41        lambda c, n: "u'%s'" % COUNTY_RE.sub('', n)
42    ),
43    7: ('communes', {},
44        lambda c, n: ("u'%s'" % COMMUNE_RE.sub('', n) if int(c[-1]) in (1, 2, 3) else None)
45    ),
46}
47
48if __name__ == '__main__':
49    for code, name in parse_TERC(sys.stdin):
50        index = len(code)
51        _, dict_, clean_name = DATASETS[index]
52        name = clean_name(code, name.replace("'", "\\'"))
53        if name:
54            dict_[code] = name
55   
56    for _, (dict_name, dict_, _) in sorted(DATASETS.iteritems()):
57        print '%s = {' % ('PL_' + dict_name.upper())
58        for code, name in sorted(dict_.iteritems()):
59            print "    %r: %s," % (code, name.encode('UTF-8'))
60        print '}\n'
61
62# vim:et ts=4 sw=4
Back to Top