Ticket #1465: regex-field-lookups.3.diff

File regex-field-lookups.3.diff, 10.2 KB (added by Tom Tobin <korpios@…>, 17 years ago)

Patch aganst r5519 of trunk (post-oracle-merge) to implement regex field lookups

  • django/db/backends/mysql/base.py

    diff -r bd46520df514 django/db/backends/mysql/base.py
    a b OPERATOR_MAPPING = {  
    247247    'iexact': 'LIKE %s',
    248248    'contains': 'LIKE BINARY %s',
    249249    'icontains': 'LIKE %s',
     250    'regex': 'REGEXP BINARY %s',
     251    'iregex': 'REGEXP %s',
    250252    'gt': '> %s',
    251253    'gte': '>= %s',
    252254    'lt': '< %s',
  • django/db/backends/mysql_old/base.py

    diff -r bd46520df514 django/db/backends/mysql_old/base.py
    a b OPERATOR_MAPPING = {  
    248248    'iexact': 'LIKE %s',
    249249    'contains': 'LIKE BINARY %s',
    250250    'icontains': 'LIKE %s',
     251    'regex': 'REGEXP BINARY %s',
     252    'iregex': 'REGEXP %s',
    251253    'gt': '> %s',
    252254    'gte': '>= %s',
    253255    'lt': '< %s',
  • django/db/backends/postgresql/base.py

    diff -r bd46520df514 django/db/backends/postgresql/base.py
    a b OPERATOR_MAPPING = {  
    280280    'iexact': 'ILIKE %s',
    281281    'contains': 'LIKE %s',
    282282    'icontains': 'ILIKE %s',
     283    'regex': '~ %s',
     284    'iregex': '~* %s',
    283285    'gt': '> %s',
    284286    'gte': '>= %s',
    285287    'lt': '< %s',
  • django/db/backends/postgresql_psycopg2/base.py

    diff -r bd46520df514 django/db/backends/postgresql_psycopg2/base.py
    a b OPERATOR_MAPPING = {  
    225225    'iexact': 'ILIKE %s',
    226226    'contains': 'LIKE %s',
    227227    'icontains': 'ILIKE %s',
     228    'regex': '~ %s',
     229    'iregex': '~* %s',
    228230    'gt': '> %s',
    229231    'gte': '>= %s',
    230232    'lt': '< %s',
  • django/db/backends/sqlite3/base.py

    diff -r bd46520df514 django/db/backends/sqlite3/base.py
    a b class DatabaseWrapper(local):  
    6464            }
    6565            kwargs.update(self.options)
    6666            self.connection = Database.connect(**kwargs)
    67             # Register extract and date_trunc functions.
     67            # Register extract, date_trunc, and regexp functions.
    6868            self.connection.create_function("django_extract", 2, _sqlite_extract)
    6969            self.connection.create_function("django_date_trunc", 2, _sqlite_date_trunc)
     70            self.connection.create_function("regexp", 2, _sqlite_regexp)
    7071        cursor = self.connection.cursor(factory=SQLiteCursorWrapper)
    7172        cursor.row_factory = utf8rowFactory
    7273        if settings.DEBUG:
    def _sqlite_date_trunc(lookup_type, dt):  
    214215    elif lookup_type == 'day':
    215216        return "%i-%02i-%02i 00:00:00" % (dt.year, dt.month, dt.day)
    216217
     218def _sqlite_regexp(re_pattern, re_string):
     219    import re
     220    try:
     221        return bool(re.search(re_pattern, re_string))
     222    except:
     223        return False
     224
    217225# SQLite requires LIKE statements to include an ESCAPE clause if the value
    218226# being escaped has a percent or underscore in it.
    219227# See http://www.sqlite.org/lang_expr.html for an explanation.
    OPERATOR_MAPPING = {  
    222230    'iexact': "LIKE %s ESCAPE '\\'",
    223231    'contains': "LIKE %s ESCAPE '\\'",
    224232    'icontains': "LIKE %s ESCAPE '\\'",
     233    'regex': 'REGEXP %s',
     234    'iregex': "REGEXP '(?i)' || %s",
    225235    'gt': '> %s',
    226236    'gte': '>= %s',
    227237    'lt': '< %s',
  • django/db/models/fields/__init__.py

    diff -r bd46520df514 django/db/models/fields/__init__.py
    a b class Field(object):  
    174174
    175175    def get_db_prep_lookup(self, lookup_type, value):
    176176        "Returns field's value prepared for database lookup."
    177         if lookup_type in ('exact', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'):
     177        if lookup_type in ('exact', 'regex', 'iregex', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'):
    178178            return [value]
    179179        elif lookup_type in ('range', 'in'):
    180180            return value
  • django/db/models/query.py

    diff -r bd46520df514 django/db/models/query.py
    a b QUERY_TERMS = (  
    2222    'gt', 'gte', 'lt', 'lte', 'in',
    2323    'startswith', 'istartswith', 'endswith', 'iendswith',
    2424    'range', 'year', 'month', 'day', 'isnull', 'search',
     25    'regex', 'iregex',
    2526)
    2627
    2728# Size of each "chunk" for get_iterator calls.
    def get_where_clause(lookup_type, table_  
    797798        return "%s%s IS %sNULL" % (table_prefix, field_name, (not value and 'NOT ' or ''))
    798799    elif lookup_type == 'search':
    799800        return backend.get_fulltext_search_sql(table_prefix + field_name)
     801    elif lookup_type in ('regex', 'iregex'):
     802        raise NotImplementedError
    800803    raise TypeError, "Got invalid lookup_type: %s" % repr(lookup_type)
    801804
    802805def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0):
  • docs/db-api.txt

    diff -r bd46520df514 docs/db-api.txt
    a b Note this is only available in MySQL and  
    11731173Note this is only available in MySQL and requires direct manipulation of the
    11741174database to add the full-text index.
    11751175
     1176regex
     1177~~~~~
     1178
     1179Case-sensitive regular expression match.
     1180
     1181The regular expression syntax is that of the database backend in use; for the
     1182``sqlite`` backend, the syntax is that of Python's ``re`` module.
     1183
     1184Example::
     1185
     1186    Entry.objects.get(title__regex=r'^(An?|The) +')
     1187
     1188SQL equivalents::
     1189
     1190    SELECT ... WHERE title REGEXP BINARY '^(An?|The) +'; -- MySQL
     1191
     1192    SELECT ... WHERE title ~ '^(An?|The) +'; -- PostgreSQL
     1193
     1194    SELECT ... WHERE title REGEXP '^(An?|The) +'; -- sqlite
     1195
     1196Using raw strings for passing in the regular expression syntax is recommended.
     1197
     1198Regular expression matching is not supported on the ``ado_mssql`` and
     1199``oracle`` backends; these will raise a ``NotImplementedError``.
     1200
     1201iregex
     1202~~~~~~
     1203
     1204Case-insensitive regular expression match.
     1205
     1206Example::
     1207
     1208    Entry.objects.get(title__iregex=r'^(an?|the) +')
     1209
     1210SQL equivalents::
     1211
     1212    SELECT ... WHERE title REGEXP '^(an?|the) +'; -- MySQL
     1213
     1214    SELECT ... WHERE title ~* '^(an?|the) +'; -- PostgreSQL
     1215
     1216    SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- sqlite
     1217
    11761218Default lookups are exact
    11771219-------------------------
    11781220
  • tests/modeltests/lookup/models.py

    diff -r bd46520df514 tests/modeltests/lookup/models.py
    a b Traceback (most recent call last):  
    251251    ...
    252252TypeError: Cannot resolve keyword 'headline__starts' into field. Choices are: id, headline, pub_date
    253253
     254# Create some articles with a bit more interesting headlines for testing field lookups:
     255>>> now = datetime.now()
     256>>> for a in Article.objects.all():
     257...     a.delete()
     258>>> a1 = Article(pub_date=now, headline='f')
     259>>> a1.save()
     260>>> a2 = Article(pub_date=now, headline='fo')
     261>>> a2.save()
     262>>> a3 = Article(pub_date=now, headline='foo')
     263>>> a3.save()
     264>>> a4 = Article(pub_date=now, headline='fooo')
     265>>> a4.save()
     266>>> a5 = Article(pub_date=now, headline='Foo')
     267>>> a5.save()
     268
     269# zero-or-more
     270>>> Article.objects.filter(headline__regex=r'fo*')
     271[<Article: f>, <Article: fo>, <Article: foo>, <Article: fooo>]
     272>>> Article.objects.filter(headline__iregex=r'fo*')
     273[<Article: Foo>, <Article: f>, <Article: fo>, <Article: foo>, <Article: fooo>]
     274
     275# one-or-more
     276>>> Article.objects.filter(headline__regex=r'fo+')
     277[<Article: fo>, <Article: foo>, <Article: fooo>]
     278
     279# wildcard
     280>>> Article.objects.filter(headline__regex=r'fooo?')
     281[<Article: foo>, <Article: fooo>]
     282
     283# and some more:
     284>>> a6 = Article(pub_date=now, headline='bar')
     285>>> a6.save()
     286>>> a7 = Article(pub_date=now, headline='Bar')
     287>>> a7.save()
     288>>> a8 = Article(pub_date=now, headline='baz')
     289>>> a8.save()
     290>>> a9 = Article(pub_date=now, headline='baZ')
     291>>> a9.save()
     292
     293# leading anchor
     294>>> Article.objects.filter(headline__regex=r'^b')
     295[<Article: baZ>, <Article: bar>, <Article: baz>]
     296>>> Article.objects.filter(headline__iregex=r'^b')
     297[<Article: Bar>, <Article: baZ>, <Article: bar>, <Article: baz>]
     298
     299# trailing anchor
     300>>> Article.objects.filter(headline__regex=r'z$')
     301[<Article: baz>]
     302>>> Article.objects.filter(headline__iregex=r'z$')
     303[<Article: baZ>, <Article: baz>]
     304
     305# character sets
     306>>> Article.objects.filter(headline__regex=r'ba[rz]')
     307[<Article: bar>, <Article: baz>]
     308>>> Article.objects.filter(headline__regex=r'ba[RZ]')
     309[<Article: baZ>]
     310>>> Article.objects.filter(headline__iregex=r'ba[RZ]')
     311[<Article: Bar>, <Article: baZ>, <Article: bar>, <Article: baz>]
     312
     313# and yet more:
     314>>> a10 = Article(pub_date=now, headline='foobar')
     315>>> a10.save()
     316>>> a11 = Article(pub_date=now, headline='foobaz')
     317>>> a11.save()
     318>>> a12 = Article(pub_date=now, headline='FooBarBaz')
     319>>> a12.save()
     320>>> a13 = Article(pub_date=now, headline='foobarbaz')
     321>>> a13.save()
     322>>> a14 = Article(pub_date=now, headline='zoocarfaz')
     323>>> a14.save()
     324>>> a15 = Article(pub_date=now, headline='barfoobaz')
     325>>> a15.save()
     326>>> a16 = Article(pub_date=now, headline='BAZBARFOO')
     327>>> a16.save()
     328
     329# alternation
     330>>> Article.objects.filter(headline__regex=r'foo(bar|baz)')
     331[<Article: barfoobaz>, <Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
     332>>> Article.objects.filter(headline__iregex=r'foo(bar|baz)')
     333[<Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
     334>>> Article.objects.filter(headline__regex=r'^foo(bar|baz)')
     335[<Article: foobar>, <Article: foobarbaz>, <Article: foobaz>]
     336
     337# greedy matching
     338>>> Article.objects.filter(headline__regex=r'f.*z')
     339[<Article: barfoobaz>, <Article: foobarbaz>, <Article: foobaz>, <Article: zoocarfaz>]
     340>>> Article.objects.filter(headline__iregex=r'f.*z')
     341[<Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobarbaz>, <Article: foobaz>, <Article: zoocarfaz>]
     342
     343# grouping and backreferences
     344>>> Article.objects.filter(headline__regex=r'b(.).*b\1')
     345[<Article: barfoobaz>, <Article: foobarbaz>]
     346>>> Article.objects.filter(headline__iregex=r'b(.).*b\1')
     347[<Article: BAZBARFOO>, <Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobarbaz>]
    254348"""}
Back to Top