Commit 05e29716 authored by Justin Bronn's avatar Justin Bronn
Browse files

Fixed #16553 -- Refactored the `GeoIP` module, moving it...

Fixed #16553 -- Refactored the `GeoIP` module, moving it `django.contrib.gis.geoip`; fixed memory leaks, and encoding issues.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@16783 bcc190cf-cafb-0310-a4f2-bffc1f526a37
parent efb83274
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
"""
 This module houses the GeoIP object, a ctypes wrapper for the MaxMind GeoIP(R)
 C API (http://www.maxmind.com/app/c).  This is an alternative to the GPL
 licensed Python GeoIP interface provided by MaxMind.

 GeoIP(R) is a registered trademark of MaxMind, LLC of Boston, Massachusetts.

 For IP-based geolocation, this module requires the GeoLite Country and City
 datasets, in binary format (CSV will not work!).  The datasets may be
 downloaded from MaxMind at http://www.maxmind.com/download/geoip/database/.
 Grab GeoIP.dat.gz and GeoLiteCity.dat.gz, and unzip them in the directory
 corresponding to settings.GEOIP_PATH.
"""
try:
    from django.contrib.gis.geoip.base import GeoIP, GeoIPException
    HAS_GEOIP = True
except:
    HAS_GEOIP = False
+257 −0
Original line number Diff line number Diff line
import os
import re
from ctypes import c_char_p

from django.core.validators import ipv4_re
from django.contrib.gis.geoip.libgeoip import GEOIP_SETTINGS
from django.contrib.gis.geoip.prototypes import (
    GeoIPRecord, GeoIPTag, GeoIP_open, GeoIP_delete, GeoIP_database_info,
    GeoIP_lib_version, GeoIP_record_by_addr, GeoIP_record_by_name,
    GeoIP_country_code_by_addr, GeoIP_country_code_by_name,
    GeoIP_country_name_by_addr, GeoIP_country_name_by_name)

# Regular expressions for recognizing the GeoIP free database editions.
free_regex = re.compile(r'^GEO-\d{3}FREE')
lite_regex = re.compile(r'^GEO-\d{3}LITE')

#### GeoIP classes ####
class GeoIPException(Exception): pass

class GeoIP(object):
    # The flags for GeoIP memory caching.
    # GEOIP_STANDARD - read database from filesystem, uses least memory.
    #
    # GEOIP_MEMORY_CACHE - load database into memory, faster performance
    #        but uses more memory
    #
    # GEOIP_CHECK_CACHE - check for updated database.  If database has been
    #        updated, reload filehandle and/or memory cache.  This option
    #        is not thread safe.
    #
    # GEOIP_INDEX_CACHE - just cache the most frequently accessed index
    #        portion of the database, resulting in faster lookups than
    #        GEOIP_STANDARD, but less memory usage than GEOIP_MEMORY_CACHE -
    #        useful for larger databases such as GeoIP Organization and
    #        GeoIP City.  Note, for GeoIP Country, Region and Netspeed
    #        databases, GEOIP_INDEX_CACHE is equivalent to GEOIP_MEMORY_CACHE
    #
    # GEOIP_MMAP_CACHE - load database into mmap shared memory ( not available
    #       on Windows).
    GEOIP_STANDARD     = 0
    GEOIP_MEMORY_CACHE = 1
    GEOIP_CHECK_CACHE  = 2
    GEOIP_INDEX_CACHE  = 4
    GEOIP_MMAP_CACHE   = 8
    cache_options = dict((opt, None) for opt in (0, 1, 2, 4, 8))

    # Paths to the city & country binary databases.
    _city_file = ''
    _country_file = ''

    # Initially, pointers to GeoIP file references are NULL.
    _city = None
    _country = None

    def __init__(self, path=None, cache=0, country=None, city=None):
        """
        Initializes the GeoIP object, no parameters are required to use default
        settings.  Keyword arguments may be passed in to customize the locations
        of the GeoIP data sets.

        * path: Base directory to where GeoIP data is located or the full path
            to where the city or country data files (*.dat) are located.
            Assumes that both the city and country data sets are located in
            this directory; overrides the GEOIP_PATH settings attribute.

        * cache: The cache settings when opening up the GeoIP datasets,
            and may be an integer in (0, 1, 2, 4, 8) corresponding to
            the GEOIP_STANDARD, GEOIP_MEMORY_CACHE, GEOIP_CHECK_CACHE,
            GEOIP_INDEX_CACHE, and GEOIP_MMAP_CACHE, `GeoIPOptions` C API
            settings,  respectively.  Defaults to 0, meaning that the data is read
            from the disk.

        * country: The name of the GeoIP country data file.  Defaults to
            'GeoIP.dat'; overrides the GEOIP_COUNTRY settings attribute.

        * city: The name of the GeoIP city data file.  Defaults to
            'GeoLiteCity.dat'; overrides the GEOIP_CITY settings attribute.
        """
        # Checking the given cache option.
        if cache in self.cache_options:
            self._cache = cache
        else:
            raise GeoIPException('Invalid GeoIP caching option: %s' % cache)

        # Getting the GeoIP data path.
        if not path:
            path = GEOIP_SETTINGS.get('GEOIP_PATH', None)
            if not path: raise GeoIPException('GeoIP path must be provided via parameter or the GEOIP_PATH setting.')
        if not isinstance(path, basestring):
            raise TypeError('Invalid path type: %s' % type(path).__name__)

        if os.path.isdir(path):
            # Constructing the GeoIP database filenames using the settings
            # dictionary.  If the database files for the GeoLite country
            # and/or city datasets exist, then try and open them.
            country_db = os.path.join(path, country or GEOIP_SETTINGS.get('GEOIP_COUNTRY', 'GeoIP.dat'))
            if os.path.isfile(country_db):
                self._country = GeoIP_open(country_db, cache)
                self._country_file = country_db

            city_db = os.path.join(path, city or GEOIP_SETTINGS.get('GEOIP_CITY', 'GeoLiteCity.dat'))
            if os.path.isfile(city_db):
                self._city = GeoIP_open(city_db, cache)
                self._city_file = city_db
        elif os.path.isfile(path):
            # Otherwise, some detective work will be needed to figure
            # out whether the given database path is for the GeoIP country
            # or city databases.
            ptr = GeoIP_open(path, cache)
            info = GeoIP_database_info(ptr)
            if lite_regex.match(info):
                # GeoLite City database detected.
                self._city = ptr
                self._city_file = path
            elif free_regex.match(info):
                # GeoIP Country database detected.
                self._country = ptr
                self._country_file = path
            else:
                raise GeoIPException('Unable to recognize database edition: %s' % info)
        else:
            raise GeoIPException('GeoIP path must be a valid file or directory.')

    def __del__(self):
        # Cleaning any GeoIP file handles lying around.
        if self._country: GeoIP_delete(self._country)
        if self._city: GeoIP_delete(self._city)

    def _check_query(self, query, country=False, city=False, city_or_country=False):
        "Helper routine for checking the query and database availability."
        # Making sure a string was passed in for the query.
        if not isinstance(query, basestring):
            raise TypeError('GeoIP query must be a string, not type %s' % type(query).__name__)

        # Extra checks for the existence of country and city databases.
        if city_or_country and not (self._country or self._city):
            raise GeoIPException('Invalid GeoIP country and city data files.')
        elif country and not self._country:
            raise GeoIPException('Invalid GeoIP country data file: %s' % self._country_file)
        elif city and not self._city:
            raise GeoIPException('Invalid GeoIP city data file: %s' % self._city_file)

    def city(self, query):
        """
        Returns a dictionary of city information for the given IP address or
        Fully Qualified Domain Name (FQDN).  Some information in the dictionary
        may be undefined (None).
        """
        self._check_query(query, city=True)
        if ipv4_re.match(query):
            # If an IP address was passed in
            return GeoIP_record_by_addr(self._city, c_char_p(query))
        else:
            # If a FQDN was passed in.
            return GeoIP_record_by_name(self._city, c_char_p(query))

    def country_code(self, query):
        "Returns the country code for the given IP Address or FQDN."
        self._check_query(query, city_or_country=True)
        if self._country:
            if ipv4_re.match(query):
                return GeoIP_country_code_by_addr(self._country, query)
            else:
                return GeoIP_country_code_by_name(self._country, query)
        else:
            return self.city(query)['country_code']

    def country_name(self, query):
        "Returns the country name for the given IP Address or FQDN."
        self._check_query(query, city_or_country=True)
        if self._country:
            if ipv4_re.match(query):
                return GeoIP_country_name_by_addr(self._country, query)
            else:
                return GeoIP_country_name_by_name(self._country, query)
        else:
            return self.city(query)['country_name']

    def country(self, query):
        """
        Returns a dictonary with with the country code and name when given an
        IP address or a Fully Qualified Domain Name (FQDN).  For example, both
        '24.124.1.80' and 'djangoproject.com' are valid parameters.
        """
        # Returning the country code and name
        return {'country_code' : self.country_code(query),
                'country_name' : self.country_name(query),
                }

    #### Coordinate retrieval routines ####
    def coords(self, query, ordering=('longitude', 'latitude')):
        cdict = self.city(query)
        if cdict is None: return None
        else: return tuple(cdict[o] for o in ordering)

    def lon_lat(self, query):
        "Returns a tuple of the (longitude, latitude) for the given query."
        return self.coords(query)

    def lat_lon(self, query):
        "Returns a tuple of the (latitude, longitude) for the given query."
        return self.coords(query, ('latitude', 'longitude'))

    def geos(self, query):
        "Returns a GEOS Point object for the given query."
        ll = self.lon_lat(query)
        if ll:
            from django.contrib.gis.geos import Point
            return Point(ll, srid=4326)
        else:
            return None

    #### GeoIP Database Information Routines ####
    @property
    def country_info(self):
        "Returns information about the GeoIP country database."
        if self._country is None:
            ci = 'No GeoIP Country data in "%s"' % self._country_file
        else:
            ci = GeoIP_database_info(self._country)
        return ci

    @property
    def city_info(self):
        "Retuns information about the GeoIP city database."
        if self._city is None:
            ci = 'No GeoIP City data in "%s"' % self._city_file
        else:
            ci = GeoIP_database_info(self._city)
        return ci

    @property
    def info(self):
        "Returns information about the GeoIP library and databases in use."
        info = ''
        if GeoIP_lib_version:
            info += 'GeoIP Library:\n\t%s\n' % GeoIP_lib_version()
        return info + 'Country:\n\t%s\nCity:\n\t%s' % (self.country_info, self.city_info)

    #### Methods for compatibility w/the GeoIP-Python API. ####
    @classmethod
    def open(cls, full_path, cache):
        return GeoIP(full_path, cache)

    def _rec_by_arg(self, arg):
        if self._city:
            return self.city(arg)
        else:
            return self.country(arg)
    region_by_addr = city
    region_by_name = city
    record_by_addr = _rec_by_arg
    record_by_name = _rec_by_arg
    country_code_by_addr = country_code
    country_code_by_name = country_code
    country_name_by_addr = country_name
    country_name_by_name = country_name
+31 −0
Original line number Diff line number Diff line
import os
from ctypes import CDLL
from ctypes.util import find_library
from django.conf import settings

# Creating the settings dictionary with any settings, if needed.
GEOIP_SETTINGS = dict((key, getattr(settings, key))
                      for key in ('GEOIP_PATH', 'GEOIP_LIBRARY_PATH', 'GEOIP_COUNTRY', 'GEOIP_CITY')
                      if hasattr(settings, key))
lib_path = GEOIP_SETTINGS.get('GEOIP_LIBRARY_PATH', None)

# The shared library for the GeoIP C API.  May be downloaded
#  from http://www.maxmind.com/download/geoip/api/c/
if lib_path:
    lib_name = None
else:
    # TODO: Is this really the library name for Windows?
    lib_name = 'GeoIP'

# Getting the path to the GeoIP library.
if lib_name: lib_path = find_library(lib_name)
if lib_path is None: raise GeoIPException('Could not find the GeoIP library (tried "%s"). '
                                          'Try setting GEOIP_LIBRARY_PATH in your settings.' % lib_name)
lgeoip = CDLL(lib_path)

# Getting the C `free` for the platform.
if os.name == 'nt':
    libc = CDLL('msvcrt')
else:
    libc = CDLL(None)
free = libc.free
+109 −0
Original line number Diff line number Diff line
from ctypes import c_char_p, c_float, c_int, string_at, Structure, POINTER
from django.contrib.gis.geoip.libgeoip import lgeoip, free

#### GeoIP C Structure definitions ####

class GeoIPRecord(Structure):
    _fields_ = [('country_code', c_char_p),
                ('country_code3', c_char_p),
                ('country_name', c_char_p),
                ('region', c_char_p),
                ('city', c_char_p),
                ('postal_code', c_char_p),
                ('latitude', c_float),
                ('longitude', c_float),
                # TODO: In 1.4.6 this changed from `int dma_code;` to
                # `union {int metro_code; int dma_code;};`.  Change
                # to a `ctypes.Union` in to accomodate in future when
                # pre-1.4.6 versions are no longer distributed.
                ('dma_code', c_int),
                ('area_code', c_int),
                ('charset', c_int),
                ('continent_code', c_char_p),
                ]
geoip_char_fields = [name for name, ctype in GeoIPRecord._fields_ if ctype is c_char_p]
geoip_encodings = { 0: 'iso-8859-1',
                    1: 'utf8',
                    }

class GeoIPTag(Structure): pass

RECTYPE = POINTER(GeoIPRecord)
DBTYPE = POINTER(GeoIPTag)

#### ctypes function prototypes ####

# GeoIP_lib_version appeared in version 1.4.7.
if hasattr(lgeoip, 'GeoIP_lib_version'):
    GeoIP_lib_version = lgeoip.GeoIP_lib_version
    GeoIP_lib_version.argtypes = None
    GeoIP_lib_version.restype = c_char_p
else:
    GeoIP_lib_version = None

# For freeing memory allocated within a record
GeoIPRecord_delete = lgeoip.GeoIPRecord_delete
GeoIPRecord_delete.argtypes = [RECTYPE]
GeoIPRecord_delete.restype = None

# For retrieving records by name or address.
def check_record(result, func, cargs):
    if bool(result):
        # Checking the pointer to the C structure, if valid pull out elements
        # into a dicionary.
        rec = result.contents
        record = dict((fld, getattr(rec, fld)) for fld, ctype in rec._fields_)

        # Now converting the strings to unicode using the proper encoding.
        encoding = geoip_encodings[record['charset']]
        for char_field in geoip_char_fields:
            if record[char_field]:
                record[char_field] = record[char_field].decode(encoding)

        # Free the memory allocated for the struct & return.
        GeoIPRecord_delete(result)
        return record
    else:
        return None

def record_output(func):
    func.argtypes = [DBTYPE, c_char_p]
    func.restype = RECTYPE
    func.errcheck = check_record
    return func
GeoIP_record_by_addr = record_output(lgeoip.GeoIP_record_by_addr)
GeoIP_record_by_name = record_output(lgeoip.GeoIP_record_by_name)


# For opening & closing GeoIP database files.
GeoIP_open = lgeoip.GeoIP_open
GeoIP_open.restype = DBTYPE
GeoIP_delete = lgeoip.GeoIP_delete
GeoIP_delete.argtypes = [DBTYPE]
GeoIP_delete.restype = None

# This is so the string pointer can be freed within Python.
class geoip_char_p(c_char_p):
    pass

def check_string(result, func, cargs):
    if result:
        s = string_at(result)
        free(result)
    else:
        s = ''
    return s

GeoIP_database_info = lgeoip.GeoIP_database_info
GeoIP_database_info.restype = geoip_char_p
GeoIP_database_info.errcheck = check_string

# String output routines.
def string_output(func):
    func.restype = c_char_p
    return func

GeoIP_country_code_by_addr = string_output(lgeoip.GeoIP_country_code_by_addr)
GeoIP_country_code_by_name = string_output(lgeoip.GeoIP_country_code_by_name)
GeoIP_country_name_by_addr = string_output(lgeoip.GeoIP_country_name_by_addr)
GeoIP_country_name_by_name = string_output(lgeoip.GeoIP_country_name_by_name)
+14 −7
Original line number Diff line number Diff line
import os
import unittest
from django.db import settings
from django.conf import settings
from django.contrib.gis.geos import GEOSGeometry
from django.contrib.gis.utils import GeoIP, GeoIPException
from django.contrib.gis.geoip import GeoIP, GeoIPException
from django.utils import unittest

# Note: Requires use of both the GeoIP country and city datasets.
# The GEOIP_DATA path should be the only setting set (the directory
@@ -69,8 +69,8 @@ class GeoIPTest(unittest.TestCase):
        "Testing GeoIP city querying methods."
        g = GeoIP(country='<foo>')

        addr = '130.80.29.3'
        fqdn = 'chron.com'
        addr = '128.249.1.1'
        fqdn = 'tmc.edu'
        for query in (fqdn, addr):
            # Country queries should still work.
            for func in (g.country_code, g.country_code_by_addr, g.country_code_by_name):
@@ -88,17 +88,24 @@ class GeoIPTest(unittest.TestCase):
            self.assertEqual(713, d['area_code'])
            geom = g.geos(query)
            self.failIf(not isinstance(geom, GEOSGeometry))
            lon, lat = (-95.3670, 29.7523)
            lon, lat = (-95.4010, 29.7079)
            lat_lon = g.lat_lon(query)
            lat_lon = (lat_lon[1], lat_lon[0])
            for tup in (geom.tuple, g.coords(query), g.lon_lat(query), lat_lon):
                self.assertAlmostEqual(lon, tup[0], 4)
                self.assertAlmostEqual(lat, tup[1], 4)

    def test05_unicode(self):
        "Testing that GeoIP strings are properly encoded, see #16553."
        g = GeoIP()
        d = g.city('62.224.93.23')
        self.assertEqual(u'Sch\xf6mberg', d['city'])


def suite():
    s = unittest.TestSuite()
    s.addTest(unittest.makeSuite(GeoIPTest))
    return s

def run(verbosity=2):
def run(verbosity=1):
    unittest.TextTestRunner(verbosity=verbosity).run(suite())
Loading