#!/usr/bin/python2.5
# encoding=UTF-8
'''
    Usage: terc.py <TERC.xml

    TERC.xml can be found at http://www.stat.gov.pl/
'''
# Copyright © 2008
#   Piotr Lewandowski <piotr.lewandowski+django@gmail.com>,
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License, version 2, as 
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.

import re
import sys
import xml.etree.cElementTree as etree

def parse_TERC(stream):
    for event, element in etree.iterparse(stream):
        if element.tag != 'row':
            continue
        item = dict((child.get('name'), child.text) for child in element)
        yield ''.join([item[n] or '' for n in ('WOJ', 'POW', 'GMI', 'RODZ')]), item['NAZWA'].strip()
        
CITY_PREFIX_RE = r'[Mm][.]( st[.])?'
PROVINCE_RE = re.compile(r'^WOJ[.] ')
COUNTY_RE = re.compile(r'^Powiat( %s)? ' % CITY_PREFIX_RE)
COMMUNE_RE = re.compile(r'^%s ' % CITY_PREFIX_RE)

DATASETS = {
    2: ('provinces', {},
        lambda c, n: "ugettext_lazy(u'%s')" % PROVINCE_RE.sub('', n).lower()
    ),
    4: ('counties', {},
        lambda c, n: "u'%s'" % COUNTY_RE.sub('', n)
    ),
    7: ('communes', {},
        lambda c, n: ("u'%s'" % COMMUNE_RE.sub('', n) if int(c[-1]) in (1, 2, 3) else None)
    ),
}

if __name__ == '__main__':
    for code, name in parse_TERC(sys.stdin):
        index = len(code)
        _, dict_, clean_name = DATASETS[index]
        name = clean_name(code, name.replace("'", "\\'"))
        if name:
            dict_[code] = name
    
    for _, (dict_name, dict_, _) in sorted(DATASETS.iteritems()):
        print '%s = {' % ('PL_' + dict_name.upper())
        for code, name in sorted(dict_.iteritems()):
            print "    %r: %s," % (code, name.encode('UTF-8'))
        print '}\n'

# vim:et ts=4 sw=4
