Django

Code

Show
Ignore:
Timestamp:
12/14/07 15:42:37 (1 year ago)
Author:
jbronn
Message:

gis: Fixed #6196 in GeoIP refactor. Added unit tests, improved path setup, and made mostly compatible w/existing MaxMind? Python API.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • django/branches/gis/django/contrib/gis/utils/geoip.py

    r6369 r6918  
    11""" 
    22 This module houses the GeoIP object, a ctypes wrapper for the MaxMind GeoIP(R) 
    3   C API (http://www.maxmind.com/app/c). 
     3 C API (http://www.maxmind.com/app/c).  This is an alternative to the GPL 
     4 licensed Python GeoIP interface provided by MaxMind. 
    45 
    56 GeoIP(R) is a registered trademark of MaxMind, LLC of Boston, Massachusetts. 
    67 
    78 For IP-based geolocation, this module requires the GeoLite Country and City 
    8   datasets, in binary format (CSV will not work!).  The datasets may be  
    9   downloaded from MaxMind at http://www.maxmind.com/download/geoip/database/. 
    10   Grab GeoIP.dat.gz and GeoLiteCity.dat.gz, and unzip them in the directory 
    11   corresponding to settings.GEOIP_PATH.  See the GeoIP docstring and examples 
    12   below for more details. 
     9 datasets, in binary format (CSV will not work!).  The datasets may be  
     10 downloaded from MaxMind at http://www.maxmind.com/download/geoip/database/. 
     11 Grab GeoIP.dat.gz and GeoLiteCity.dat.gz, and unzip them in the directory 
     12 corresponding to settings.GEOIP_PATH.  See the GeoIP docstring and examples 
     13 below for more details. 
    1314 
    1415 TODO: Verify compatibility with Windows. 
     
    3940""" 
    4041import os, re 
    41 from ctypes import c_char_p, c_float, c_int, string_at, Structure, CDLL, POINTER 
     42from ctypes import c_char_p, c_float, c_int, Structure, CDLL, POINTER 
    4243from django.conf import settings 
    43  
    44 # The Exception class for GeoIP Errors. 
     44if not settings._target: settings.configure() 
     45 
     46# Creating the settings dictionary with any settings, if needed. 
     47GEOIP_SETTINGS = dict((key, getattr(settings, key))  
     48                      for key in ('GEOIP_PATH', 'GEOIP_LIBRARY_PATH', 'GEOIP_COUNTRY', 'GEOIP_CITY') 
     49                      if hasattr(settings, key)) 
     50lib_name = GEOIP_SETTINGS.get('GEOIP_LIBRARY_PATH', None) 
     51 
     52# GeoIP Exception class. 
    4553class GeoIPException(Exception): pass 
    4654 
    4755# The shared library for the GeoIP C API.  May be downloaded 
    4856#  from http://www.maxmind.com/download/geoip/api/c/ 
    49 if os.name == 'nt': 
    50     ext = '.dll' 
     57if lib_name: 
     58    pass 
     59elif os.name == 'nt': 
     60    lib_name = 'libGeoIP.dll' 
    5161elif os.name == 'posix': 
    5262    platform = os.uname()[0] 
    53     if platform in ('Linux', 'SunOS'): 
    54         ext = '.so' 
    55     elif platofm == 'Darwin': 
    56         ext = '.dylib' 
     63    if platform == 'Darwin': 
     64        lib_name = 'libGeoIP.dylib' 
    5765    else: 
    58         raise GeoIPException('Unknown POSIX platform "%s"' % platform) 
    59 lgeoip = CDLL('libGeoIP' + ext) 
    60  
    61 # A regular expression for recognizing IP addresses 
     66        lib_name = 'libGeoIP.so' 
     67else: 
     68    raise GeoIPException('Unknown POSIX platform "%s"' % platform) 
     69lgeoip = CDLL(lib_name) 
     70 
     71# Regular expressions for recognizing IP addresses and the GeoIP 
     72# free database editions. 
    6273ipregex = re.compile(r'^(?P<w>\d\d?\d?)\.(?P<x>\d\d?\d?)\.(?P<y>\d\d?\d?)\.(?P<z>\d\d?\d?)$') 
    63  
    64 # The flags for GeoIP memory caching. 
    65 # GEOIP_STANDARD - read database from filesystem, uses least memory. 
    66 
    67 # GEOIP_MEMORY_CACHE - load database into memory, faster performance 
    68 #        but uses more memory 
    69 
    70 # GEOIP_CHECK_CACHE - check for updated database.  If database has been updated, 
    71 #        reload filehandle and/or memory cache. 
    72 
    73 # GEOIP_INDEX_CACHE - just cache 
    74 #        the most frequently accessed index portion of the database, resulting 
    75 #        in faster lookups than GEOIP_STANDARD, but less memory usage than 
    76 #        GEOIP_MEMORY_CACHE - useful for larger databases such as 
    77 #        GeoIP Organization and GeoIP City.  Note, for GeoIP Country, Region 
    78 #        and Netspeed databases, GEOIP_INDEX_CACHE is equivalent to GEOIP_MEMORY_CACHE 
    79 
    80 cache_options = {0 : c_int(0), # GEOIP_STANDARD 
    81                  1 : c_int(1), # GEOIP_MEMORY_CACHE 
    82                  2 : c_int(2), # GEOIP_CHECK_CACHE 
    83                  4 : c_int(4), # GEOIP_INDEX_CACHE 
    84                  } 
    85  
    86 # GeoIPRecord C Structure definition. 
     74free_regex = re.compile(r'^GEO-\d{3}FREE') 
     75lite_regex = re.compile(r'^GEO-\d{3}LITE') 
     76 
     77#### GeoIP C Structure definitions #### 
    8778class GeoIPRecord(Structure): 
    8879    _fields_ = [('country_code', c_char_p), 
     
    9788                ('area_code', c_int), 
    9889                ] 
    99  
    100 # ctypes function prototypes 
    101 record_by_addr = lgeoip.GeoIP_record_by_addr 
    102 record_by_addr.restype = POINTER(GeoIPRecord) 
    103 record_by_name = lgeoip.GeoIP_record_by_name 
    104 record_by_name.restype = POINTER(GeoIPRecord) 
    105  
    106 # The exception class for GeoIP Errors. 
    107 class GeoIPException(Exception): pass 
    108  
     90class GeoIPTag(Structure): pass 
     91 
     92#### ctypes function prototypes #### 
     93RECTYPE = POINTER(GeoIPRecord) 
     94DBTYPE = POINTER(GeoIPTag) 
     95 
     96# For retrieving records by name or address. 
     97def record_output(func): 
     98    func.restype = RECTYPE 
     99    return func 
     100rec_by_addr = record_output(lgeoip.GeoIP_record_by_addr) 
     101rec_by_name = record_output(lgeoip.GeoIP_record_by_name) 
     102 
     103# For opening up GeoIP databases. 
     104geoip_open = lgeoip.GeoIP_open 
     105geoip_open.restype = DBTYPE 
     106 
     107# String output routines. 
     108def string_output(func): 
     109    func.restype = c_char_p 
     110    return func 
     111geoip_dbinfo = string_output(lgeoip.GeoIP_database_info) 
     112cntry_code_by_addr = string_output(lgeoip.GeoIP_country_code_by_addr) 
     113cntry_code_by_name = string_output(lgeoip.GeoIP_country_code_by_name) 
     114cntry_name_by_addr = string_output(lgeoip.GeoIP_country_name_by_addr) 
     115cntry_name_by_name = string_output(lgeoip.GeoIP_country_name_by_name) 
     116 
     117#### GeoIP class #### 
    109118class GeoIP(object): 
    110     def __init__(self, path=None, country=None, city=None, 
    111                  cache=0): 
     119    # The flags for GeoIP memory caching. 
     120    # GEOIP_STANDARD - read database from filesystem, uses least memory. 
     121    # 
     122    # GEOIP_MEMORY_CACHE - load database into memory, faster performance 
     123    #        but uses more memory 
     124    # 
     125    # GEOIP_CHECK_CACHE - check for updated database.  If database has been updated, 
     126    #        reload filehandle and/or memory cache. 
     127    # 
     128    # GEOIP_INDEX_CACHE - just cache 
     129    #        the most frequently accessed index portion of the database, resulting 
     130    #        in faster lookups than GEOIP_STANDARD, but less memory usage than 
     131    #        GEOIP_MEMORY_CACHE - useful for larger databases such as 
     132    #        GeoIP Organization and GeoIP City.  Note, for GeoIP Country, Region 
     133    #        and Netspeed databases, GEOIP_INDEX_CACHE is equivalent to GEOIP_MEMORY_CACHE 
     134    # 
     135    GEOIP_STANDARD = 0 
     136    GEOIP_MEMORY_CACHE = 1 
     137    GEOIP_CHECK_CACHE = 2 
     138    GEOIP_INDEX_CACHE = 4 
     139    cache_options = dict((opt, None) for opt in (0, 1, 2, 4)) 
     140 
     141    def __init__(self, path=None, cache=0, country=None, city=None): 
    112142        """ 
    113143        Initializes the GeoIP object, no parameters are required to use default 
    114          settings.  Keyword arguments may be passed in to customize the locations 
    115          of the GeoIP data sets. 
    116  
    117         * path: Base directory where the GeoIP data files (*.dat) are located. 
     144        settings.  Keyword arguments may be passed in to customize the locations 
     145        of the GeoIP data sets. 
     146 
     147        * path: Base directory to where GeoIP data is located or the full path 
     148            to where the city or country data files (*.dat) are located. 
    118149            Assumes that both the city and country data sets are located in 
    119             this directory.  Overrides the GEOIP_PATH settings attribute. 
     150            this directory; overrides the GEOIP_PATH settings attribute. 
     151 
     152        * cache: The cache settings when opening up the GeoIP datasets, 
     153            and may be an integer in (0, 1, 2, 4) corresponding to 
     154            the GEOIP_STANDARD, GEOIP_MEMORY_CACHE, GEOIP_CHECK_CACHE, 
     155            and GEOIP_INDEX_CACHE `GeoIPOptions` C API settings, 
     156            respectively.  Defaults to 0, meaning that the data is read 
     157            from the disk. 
    120158 
    121159        * country: The name of the GeoIP country data file.  Defaults to 
     
    124162        * city: The name of the GeoIP city data file.  Defaults to 
    125163            'GeoLiteCity.dat'; overrides the GEOIP_CITY settings attribute. 
    126  
    127         * cache: The cache settings when opening up the GeoIP datasets, 
    128             and may be an integer in (0, 1, 2, 4).  Defaults to 0, meaning 
    129             that the data is read from the disk. 
    130         """ 
    131  
     164        """ 
    132165        # Checking the given cache option. 
    133         if cache in cache_options: 
    134             self._cache = cache_options[cache] 
     166        if cache in self.cache_options: 
     167            self._cache = self.cache_options[cache] 
    135168        else: 
    136169            raise GeoIPException('Invalid caching option: %s' % cache) 
     
    138171        # Getting the GeoIP data path. 
    139172        if not path: 
    140             try: 
    141                 self._path = settings.GEOIP_PATH 
    142             except AttributeError: 
    143                 raise GeoIPException('Must specify GEOIP_PATH in your settings.py') 
    144         else: 
    145             self._path = path 
    146         if not os.path.isdir(self._path): 
    147             raise GeoIPException('GEOIP_PATH must be set to a directory.') 
    148  
    149         # Getting the GeoIP country data file. 
    150         if not country: 
    151             try: 
    152                 cntry_file = settings.GEOIP_COUNTRY 
    153             except AttributeError: 
    154                 cntry_file = 'GeoIP.dat' 
    155         else: 
    156             cntry_file = country 
    157         self._country_file = os.path.join(self._path, cntry_file) 
    158  
    159         # Getting the GeoIP city data file. 
    160         if not city: 
    161             try: 
    162                 city_file = settings.GEOIP_CITY 
    163             except AttributeError: 
    164                 city_file = 'GeoLiteCity.dat' 
    165         else: 
    166             city_file = city 
    167         self._city_file = os.path.join(self._path, city_file) 
    168  
    169         # Opening up the GeoIP country data file. 
    170         if os.path.isfile(self._country_file): 
    171             self._country = lgeoip.GeoIP_open(c_char_p(self._country_file), self._cache) 
    172         else: 
    173             self._country = None 
    174  
    175         # Opening the GeoIP city data file. 
    176         if os.path.isfile(self._city_file): 
    177             self._city = lgeoip.GeoIP_open(c_char_p(self._city_file), self._cache) 
    178         else: 
    179             self._city = None 
    180      
    181     def country(self, query): 
    182         """ 
    183         Returns a dictonary with with the country code and name when given an  
    184          IP address or a Fully Qualified Domain Name (FQDN).  For example, both 
    185          '24.124.1.80' and 'djangoproject.com' are valid parameters. 
    186         """ 
    187         if self._country is None: 
     173            path = GEOIP_SETTINGS.get('GEOIP_PATH', None) 
     174            if not path: raise GeoIPException('GeoIP path must be provided via parameter or the GEOIP_PATH setting.') 
     175        if not isinstance(path, basestring): 
     176            raise TypeError('Invalid path type: %s' % type(path).__name__) 
     177 
     178        cntry_ptr, city_ptr = (None, None) 
     179        if os.path.isdir(path): 
     180            # Getting the country and city files using the settings 
     181            # dictionary.  If no settings are provided, default names 
     182            # are assigned. 
     183            country = os.path.join(path, country or GEOIP_SETTINGS.get('GEOIP_COUNTRY', 'GeoIP.dat')) 
     184            city = os.path.join(path, city or GEOIP_SETTINGS.get('GEOIP_CITY', 'GeoLiteCity.dat')) 
     185        elif os.path.isfile(path): 
     186            # Otherwise, some detective work will be needed to figure 
     187            # out whether the given database path is for the GeoIP country 
     188            # or city databases. 
     189            ptr = geoip_open(path, cache) 
     190            info = geoip_dbinfo(ptr) 
     191            if lite_regex.match(info): 
     192                # GeoLite City database. 
     193                city, city_ptr = path, ptr 
     194            elif free_regex.match(info): 
     195                # GeoIP Country database. 
     196                country, cntry_ptr = path, ptr 
     197            else: 
     198                raise GeoIPException('Unable to recognize database edition: %s' % info) 
     199        else: 
     200            raise GeoIPException('GeoIP path must be a valid file or directory.') 
     201         
     202        # `_init_db` does the dirty work. 
     203        self._init_db(country, cache, '_country', cntry_ptr) 
     204        self._init_db(city, cache, '_city', city_ptr) 
     205 
     206    def _init_db(self, db_file, cache, attname, ptr=None): 
     207        "Helper routine for setting GeoIP ctypes database properties." 
     208        if ptr: 
     209            # Pointer already retrieved. 
     210            pass 
     211        elif os.path.isfile(db_file or ''): 
     212            ptr = geoip_open(db_file, cache) 
     213        setattr(self, attname, ptr) 
     214        setattr(self, '%s_file' % attname, db_file) 
     215 
     216    def _check_query(self, query, country=False, city=False, city_or_country=False): 
     217        "Helper routine for checking the query and database availability." 
     218        # Making sure a string was passed in for the query. 
     219        if not isinstance(query, basestring): 
     220            raise TypeError('GeoIP query must be a string, not type %s' % type(query).__name__) 
     221 
     222        # Extra checks for the existence of country and city databases. 
     223        if city_or_country and self._country is None and self._city is None: 
     224            raise GeoIPException('Invalid GeoIP country and city data files.') 
     225        elif country and self._country is None: 
    188226            raise GeoIPException('Invalid GeoIP country data file: %s' % self._country_file) 
    189  
    190         if ipregex.match(query): 
    191             # If an IP address was passed in. 
    192             code = lgeoip.GeoIP_country_code_by_addr(self._country, c_char_p(query)) 
    193             name = lgeoip.GeoIP_country_name_by_addr(self._country, c_char_p(query)) 
    194         else: 
    195             # If a FQDN was passed in. 
    196             code = lgeoip.GeoIP_country_code_by_name(self._country, c_char_p(query)) 
    197             name = lgeoip.GeoIP_country_name_by_name(self._country, c_char_p(query)) 
    198  
    199         # Checking our returned country code and name, setting each to 
    200         #  None, if pointer is invalid. 
    201         if bool(code): code = string_at(code) 
    202         else: code = None 
    203         if bool(name): name = string_at(name) 
    204         else: name = None 
    205  
    206         # Returning the country code and name 
    207         return {'country_code' : code,  
    208                 'country_name' : name, 
    209                 } 
     227        elif city and self._city is None: 
     228            raise GeoIPException('Invalid GeoIP city data file: %s' % self._city_file) 
    210229 
    211230    def city(self, query): 
    212231        """ 
    213         Returns a dictionary of city information for the given IP address or  
    214          Fully Qualified Domain Name (FQDN).  Some information in the dictionary 
    215          may be undefined (None). 
    216         """ 
    217         if self._city is None: 
    218             raise GeoIPException('Invalid GeoIP country data file: %s' % self._city_file) 
    219  
     232        Returns a dictionary of city information for the given IP address or 
     233        Fully Qualified Domain Name (FQDN).  Some information in the dictionary 
     234        may be undefined (None). 
     235        """ 
     236        self._check_query(query, city=True) 
    220237        if ipregex.match(query): 
    221238            # If an IP address was passed in 
    222             ptr = record_by_addr(self._city, c_char_p(query)) 
     239            ptr = rec_by_addr(self._city, c_char_p(query)) 
    223240        else: 
    224241            # If a FQDN was passed in. 
    225             ptr = record_by_name(self._city, c_char_p(query)) 
     242            ptr = rec_by_name(self._city, c_char_p(query)) 
    226243 
    227244        # Checking the pointer to the C structure, if valid pull out elements 
    228         # into a dicionary and return. 
     245        # into a dicionary and return. 
    229246        if bool(ptr): 
    230247            record = ptr.contents 
     
    232249        else: 
    233250            return None 
     251     
     252    def country_code(self, query): 
     253        "Returns the country code for the given IP Address or FQDN." 
     254        self._check_query(query, city_or_country=True) 
     255        if self._country: 
     256            if ipregex.match(query): return cntry_code_by_addr(self._country, query) 
     257            else: return cntry_code_by_name(self._country, query) 
     258        else: 
     259            return self.city(query)['country_code'] 
     260 
     261    def country_name(self, query): 
     262        "Returns the country name for the given IP Address or FQDN." 
     263        self._check_query(query, city_or_country=True) 
     264        if self._country: 
     265            if ipregex.match(query): return cntry_name_by_addr(self._country, query) 
     266            else: return cntry_name_by_name(self._country, query) 
     267        else: 
     268            return self.city(query)['country_name'] 
     269 
     270    def country(self, query): 
     271        """ 
     272        Returns a dictonary with with the country code and name when given an  
     273        IP address or a Fully Qualified Domain Name (FQDN).  For example, both 
     274        '24.124.1.80' and 'djangoproject.com' are valid parameters. 
     275        """ 
     276        # Returning the country code and name 
     277        return {'country_code' : self.country_code(query),  
     278                'country_name' : self.country_name(query), 
     279                } 
    234280 
    235281    #### Coordinate retrieval routines #### 
    236     def _coords(self, query, ordering): 
     282    def coords(self, query, ordering=('longitude', 'latitude')): 
    237283        cdict = self.city(query) 
    238284        if cdict is None: return None 
    239285        else: return tuple(cdict[o] for o in ordering) 
    240      
     286 
    241287    def lon_lat(self, query): 
    242288        "Returns a tuple of the (longitude, latitude) for the given query." 
    243         return self._coords(query, ('longitude', 'latitude')
     289        return self.coords(query
    244290 
    245291    def lat_lon(self, query): 
    246292        "Returns a tuple of the (latitude, longitude) for the given query." 
    247         return self._coords(query, ('latitude', 'longitude')) 
     293        return self.coords(query, ('latitude', 'longitude')) 
    248294 
    249295    def geos(self, query): 
     
    262308            ci = 'No GeoIP Country data in "%s"' % self._country_file 
    263309        else: 
    264             ci = string_at(lgeoip.GeoIP_database_info(self._country)
     310            ci = geoip_dbinfo(self._country
    265311        return ci 
    266312    country_info = property(country_info) 
     
    271317            ci = 'No GeoIP City data in "%s"' % self._city_file 
    272318        else: 
    273             ci = string_at(lgeoip.GeoIP_database_info(self._city)
     319            ci = geoip_dbinfo(self._city
    274320        return ci 
    275321    city_info = property(city_info) 
     
    279325        return 'Country:\n\t%s\nCity:\n\t%s' % (self.country_info, self.city_info) 
    280326    info = property(info) 
     327 
     328    #### Methods for compatibility w/the GeoIP-Python API. #### 
     329    @classmethod 
     330    def open(cls, full_path, cache): 
     331        return GeoIP(full_path, cache) 
     332 
     333    def _rec_by_arg(self, arg): 
     334        if self._city: 
     335            return self.city(arg) 
     336        else: 
     337            return self.country(arg) 
     338    region_by_addr = city 
     339    region_by_name = city 
     340    record_by_addr = _rec_by_arg 
     341    record_by_name = _rec_by_arg 
     342    country_code_by_addr = country_code 
     343    country_code_by_name = country_code 
     344    country_name_by_addr = country_name 
     345    country_name_by_name = country_name