Ticket #1465: regex-field-lookups.2.diff

File regex-field-lookups.2.diff, 10.9 KB (added by Tom Tobin <korpios@…>, 17 years ago)

Patch aganst r5490 of trunk to implement regex field lookups, w/docs, w/tests

  • django/db/backends/mysql/base.py

    diff -r 8208e2011e15 django/db/backends/mysql/base.py
    a b OPERATOR_MAPPING = {  
    228228    'iexact': 'LIKE %s',
    229229    'contains': 'LIKE BINARY %s',
    230230    'icontains': 'LIKE %s',
     231    'regex': 'REGEXP BINARY %s',
     232    'iregex': 'REGEXP %s',
    231233    'gt': '> %s',
    232234    'gte': '>= %s',
    233235    'lt': '< %s',
  • django/db/backends/mysql_old/base.py

    diff -r 8208e2011e15 django/db/backends/mysql_old/base.py
    a b OPERATOR_MAPPING = {  
    229229    'iexact': 'LIKE %s',
    230230    'contains': 'LIKE BINARY %s',
    231231    'icontains': 'LIKE %s',
     232    'regex': 'REGEXP BINARY %s',
     233    'iregex': 'REGEXP %s',
    232234    'gt': '> %s',
    233235    'gte': '>= %s',
    234236    'lt': '< %s',
  • django/db/backends/postgresql/base.py

    diff -r 8208e2011e15 django/db/backends/postgresql/base.py
    a b OPERATOR_MAPPING = {  
    261261    'iexact': 'ILIKE %s',
    262262    'contains': 'LIKE %s',
    263263    'icontains': 'ILIKE %s',
     264    'regex': '~ %s',
     265    'iregex': '~* %s',
    264266    'gt': '> %s',
    265267    'gte': '>= %s',
    266268    'lt': '< %s',
  • django/db/backends/postgresql_psycopg2/base.py

    diff -r 8208e2011e15 django/db/backends/postgresql_psycopg2/base.py
    a b OPERATOR_MAPPING = {  
    206206    'iexact': 'ILIKE %s',
    207207    'contains': 'LIKE %s',
    208208    'icontains': 'ILIKE %s',
     209    'regex': '~ %s',
     210    'iregex': '~* %s',
    209211    'gt': '> %s',
    210212    'gte': '>= %s',
    211213    'lt': '< %s',
  • django/db/backends/sqlite3/base.py

    diff -r 8208e2011e15 django/db/backends/sqlite3/base.py
    a b class DatabaseWrapper(local):  
    6464            }
    6565            kwargs.update(self.options)
    6666            self.connection = Database.connect(**kwargs)
    67             # Register extract and date_trunc functions.
     67            # Register extract, date_trunc, and regexp functions.
    6868            self.connection.create_function("django_extract", 2, _sqlite_extract)
    6969            self.connection.create_function("django_date_trunc", 2, _sqlite_date_trunc)
     70            self.connection.create_function("regexp", 2, _sqlite_regexp)
    7071        cursor = self.connection.cursor(factory=SQLiteCursorWrapper)
    7172        cursor.row_factory = utf8rowFactory
    7273        if settings.DEBUG:
    def get_sql_flush(style, tables, sequenc  
    164165    """Return a list of SQL statements required to remove all data from
    165166    all tables in the database (without actually removing the tables
    166167    themselves) and put the database in an empty 'initial' state
    167    
     168
    168169    """
    169170    # NB: The generated SQL below is specific to SQLite
    170171    # Note: The DELETE FROM... SQL generated below works for SQLite databases
    def get_sql_sequence_reset(style, model_  
    182183    "Returns a list of the SQL statements to reset sequences for the given models."
    183184    # No sequence reset required
    184185    return []
    185    
     186
    186187def _sqlite_date_trunc(lookup_type, dt):
    187188    try:
    188189        dt = util.typecast_timestamp(dt)
    def _sqlite_date_trunc(lookup_type, dt):  
    195196    elif lookup_type == 'day':
    196197        return "%i-%02i-%02i 00:00:00" % (dt.year, dt.month, dt.day)
    197198
     199def _sqlite_regexp(re_pattern, re_string):
     200    import re
     201    try:
     202        return bool(re.search(re_pattern, re_string))
     203    except:
     204        return False
     205
    198206# SQLite requires LIKE statements to include an ESCAPE clause if the value
    199207# being escaped has a percent or underscore in it.
    200208# See http://www.sqlite.org/lang_expr.html for an explanation.
    OPERATOR_MAPPING = {  
    203211    'iexact': "LIKE %s ESCAPE '\\'",
    204212    'contains': "LIKE %s ESCAPE '\\'",
    205213    'icontains': "LIKE %s ESCAPE '\\'",
     214    'regex': 'REGEXP %s',
     215    'iregex': "REGEXP '(?i)' || %s",
    206216    'gt': '> %s',
    207217    'gte': '>= %s',
    208218    'lt': '< %s',
  • django/db/models/fields/__init__.py

    diff -r 8208e2011e15 django/db/models/fields/__init__.py
    a b class Field(object):  
    169169
    170170    def get_db_prep_lookup(self, lookup_type, value):
    171171        "Returns field's value prepared for database lookup."
    172         if lookup_type in ('exact', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'):
     172        if lookup_type in ('exact', 'regex', 'iregex', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'):
    173173            return [value]
    174174        elif lookup_type in ('range', 'in'):
    175175            return value
  • django/db/models/query.py

    diff -r 8208e2011e15 django/db/models/query.py
    a b QUERY_TERMS = (  
    2020    'gt', 'gte', 'lt', 'lte', 'in',
    2121    'startswith', 'istartswith', 'endswith', 'iendswith',
    2222    'range', 'year', 'month', 'day', 'isnull', 'search',
     23    'regex', 'iregex',
    2324)
    2425
    2526# Size of each "chunk" for get_iterator calls.
    def get_where_clause(lookup_type, table_  
    748749        return "%s%s IS %sNULL" % (table_prefix, field_name, (not value and 'NOT ' or ''))
    749750    elif lookup_type == 'search':
    750751        return backend.get_fulltext_search_sql(table_prefix + field_name)
     752    elif lookup_type in ('regex', 'iregex'):
     753        raise NotImplementedError
    751754    raise TypeError, "Got invalid lookup_type: %s" % repr(lookup_type)
    752755
    753756def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0):
  • docs/db-api.txt

    diff -r 8208e2011e15 docs/db-api.txt
    a b Note this is only available in MySQL and  
    11731173Note this is only available in MySQL and requires direct manipulation of the
    11741174database to add the full-text index.
    11751175
     1176regex
     1177~~~~~
     1178
     1179Case-sensitive regular expression match.
     1180
     1181The regular expression syntax is that of the database backend in use; for the
     1182``sqlite`` backend, the syntax is that of Python's ``re`` module.
     1183
     1184Example::
     1185
     1186    Entry.objects.get(title__regex=r'^(An?|The) +')
     1187
     1188SQL equivalents::
     1189
     1190    SELECT ... WHERE title REGEXP BINARY '^(An?|The) +'; -- MySQL
     1191
     1192    SELECT ... WHERE title ~ '^(An?|The) +'; -- PostgreSQL
     1193
     1194    SELECT ... WHERE title REGEXP '^(An?|The) +'; -- sqlite
     1195
     1196Using raw strings for passing in the regular expression syntax is recommended.
     1197
     1198Regular expression matching is not supported on the ``ado_mssql`` and
     1199``oracle`` backends; these will raise a ``NotImplementedError``.
     1200
     1201iregex
     1202~~~~~~
     1203
     1204Case-insensitive regular expression match.
     1205
     1206Example::
     1207
     1208    Entry.objects.get(title__iregex=r'^(an?|the) +')
     1209
     1210SQL equivalents::
     1211
     1212    SELECT ... WHERE title REGEXP '^(an?|the) +'; -- MySQL
     1213
     1214    SELECT ... WHERE title ~* '^(an?|the) +'; -- PostgreSQL
     1215
     1216    SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- sqlite
     1217
    11761218Default lookups are exact
    11771219-------------------------
    11781220
  • tests/modeltests/lookup/models.py

    diff -r 8208e2011e15 tests/modeltests/lookup/models.py
    a b Traceback (most recent call last):  
    251251    ...
    252252TypeError: Cannot resolve keyword 'headline__starts' into field. Choices are: id, headline, pub_date
    253253
     254# Create some articles with a bit more interesting headlines for testing field lookups:
     255>>> now = datetime.now()
     256>>> for a in Article.objects.all():
     257...     a.delete()
     258>>> a1 = Article(pub_date=now, headline='f')
     259>>> a1.save()
     260>>> a2 = Article(pub_date=now, headline='fo')
     261>>> a2.save()
     262>>> a3 = Article(pub_date=now, headline='foo')
     263>>> a3.save()
     264>>> a4 = Article(pub_date=now, headline='fooo')
     265>>> a4.save()
     266>>> a5 = Article(pub_date=now, headline='Foo')
     267>>> a5.save()
     268
     269# zero-or-more
     270>>> Article.objects.filter(headline__regex=r'fo*')
     271[<Article: f>, <Article: fo>, <Article: foo>, <Article: fooo>]
     272>>> Article.objects.filter(headline__iregex=r'fo*')
     273[<Article: Foo>, <Article: f>, <Article: fo>, <Article: foo>, <Article: fooo>]
     274
     275# one-or-more
     276>>> Article.objects.filter(headline__regex=r'fo+')
     277[<Article: fo>, <Article: foo>, <Article: fooo>]
     278
     279# wildcard
     280>>> Article.objects.filter(headline__regex=r'fooo?')
     281[<Article: foo>, <Article: fooo>]
     282
     283# and some more:
     284>>> a6 = Article(pub_date=now, headline='bar')
     285>>> a6.save()
     286>>> a7 = Article(pub_date=now, headline='Bar')
     287>>> a7.save()
     288>>> a8 = Article(pub_date=now, headline='baz')
     289>>> a8.save()
     290>>> a9 = Article(pub_date=now, headline='baZ')
     291>>> a9.save()
     292
     293# leading anchor
     294>>> Article.objects.filter(headline__regex=r'^b')
     295[<Article: baZ>, <Article: bar>, <Article: baz>]
     296>>> Article.objects.filter(headline__iregex=r'^b')
     297[<Article: Bar>, <Article: baZ>, <Article: bar>, <Article: baz>]
     298
     299# trailing anchor
     300>>> Article.objects.filter(headline__regex=r'z$')
     301[<Article: baz>]
     302>>> Article.objects.filter(headline__iregex=r'z$')
     303[<Article: baZ>, <Article: baz>]
     304
     305# character sets
     306>>> Article.objects.filter(headline__regex=r'ba[rz]')
     307[<Article: bar>, <Article: baz>]
     308>>> Article.objects.filter(headline__regex=r'ba[RZ]')
     309[<Article: baZ>]
     310>>> Article.objects.filter(headline__iregex=r'ba[RZ]')
     311[<Article: Bar>, <Article: baZ>, <Article: bar>, <Article: baz>]
     312
     313# and yet more:
     314>>> a10 = Article(pub_date=now, headline='foobar')
     315>>> a10.save()
     316>>> a11 = Article(pub_date=now, headline='foobaz')
     317>>> a11.save()
     318>>> a12 = Article(pub_date=now, headline='FooBarBaz')
     319>>> a12.save()
     320>>> a13 = Article(pub_date=now, headline='foobarbaz')
     321>>> a13.save()
     322>>> a14 = Article(pub_date=now, headline='zoocarfaz')
     323>>> a14.save()
     324>>> a15 = Article(pub_date=now, headline='barfoobaz')
     325>>> a15.save()
     326>>> a16 = Article(pub_date=now, headline='BAZBARFOO')
     327>>> a16.save()
     328
     329# alternation
     330>>> Article.objects.filter(headline__regex=r'foo(bar|baz)')
     331[<Article: barfoobaz>, <Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
     332>>> Article.objects.filter(headline__iregex=r'foo(bar|baz)')
     333[<Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
     334>>> Article.objects.filter(headline__regex=r'^foo(bar|baz)')
     335[<Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
     336
     337# greedy matching
     338>>> Article.objects.filter(headline__regex=r'f.*z')
     339[<Article: barfoobaz>, <Article: foobarbaz>, <Article: foobaz>, <Article: zoocarfaz>]
     340>>> Article.objects.filter(headline__iregex=r'f.*z')
     341[<Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobarbaz>, <Article: foobaz>, <Article: zoocarfaz>]
     342
     343# grouping and backreferences
     344>>> Article.objects.filter(headline__regex=r'b(.).*b\1')
     345[<Article: barfoobaz>, <Article: foobarbaz>]
     346>>> Article.objects.filter(headline__iregex=r'b(.).*b\1')
     347[<Article: BAZBARFOO>, <Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobarbaz>]
    254348"""}
Back to Top