Code

Ticket #1465: regex-field-lookups.patch

File regex-field-lookups.patch, 10.5 KB (added by Tom Tobin <korpios@…, 7 years ago)

Added (yet-untested) oracle support.

  • django/db/backends/mysql/base.py

    diff -r 7a0796fe7efd django/db/backends/mysql/base.py
    a b OPERATOR_MAPPING = { 
    247247    'iexact': 'LIKE %s', 
    248248    'contains': 'LIKE BINARY %s', 
    249249    'icontains': 'LIKE %s', 
     250    'regex': 'REGEXP BINARY %s', 
     251    'iregex': 'REGEXP %s', 
    250252    'gt': '> %s', 
    251253    'gte': '>= %s', 
    252254    'lt': '< %s', 
  • django/db/backends/mysql_old/base.py

    diff -r 7a0796fe7efd django/db/backends/mysql_old/base.py
    a b OPERATOR_MAPPING = { 
    248248    'iexact': 'LIKE %s', 
    249249    'contains': 'LIKE BINARY %s', 
    250250    'icontains': 'LIKE %s', 
     251    'regex': 'REGEXP BINARY %s', 
     252    'iregex': 'REGEXP %s', 
    251253    'gt': '> %s', 
    252254    'gte': '>= %s', 
    253255    'lt': '< %s', 
  • django/db/backends/postgresql/base.py

    diff -r 7a0796fe7efd django/db/backends/postgresql/base.py
    a b OPERATOR_MAPPING = { 
    280280    'iexact': 'ILIKE %s', 
    281281    'contains': 'LIKE %s', 
    282282    'icontains': 'ILIKE %s', 
     283    'regex': '~ %s', 
     284    'iregex': '~* %s', 
    283285    'gt': '> %s', 
    284286    'gte': '>= %s', 
    285287    'lt': '< %s', 
  • django/db/backends/postgresql_psycopg2/base.py

    diff -r 7a0796fe7efd django/db/backends/postgresql_psycopg2/base.py
    a b OPERATOR_MAPPING = { 
    225225    'iexact': 'ILIKE %s', 
    226226    'contains': 'LIKE %s', 
    227227    'icontains': 'ILIKE %s', 
     228    'regex': '~ %s', 
     229    'iregex': '~* %s', 
    228230    'gt': '> %s', 
    229231    'gte': '>= %s', 
    230232    'lt': '< %s', 
  • django/db/backends/sqlite3/base.py

    diff -r 7a0796fe7efd django/db/backends/sqlite3/base.py
    a b class DatabaseWrapper(local): 
    6464            } 
    6565            kwargs.update(self.options) 
    6666            self.connection = Database.connect(**kwargs) 
    67             # Register extract and date_trunc functions. 
     67            # Register extract, date_trunc, and regexp functions. 
    6868            self.connection.create_function("django_extract", 2, _sqlite_extract) 
    6969            self.connection.create_function("django_date_trunc", 2, _sqlite_date_trunc) 
     70            self.connection.create_function("regexp", 2, _sqlite_regexp) 
    7071        cursor = self.connection.cursor(factory=SQLiteCursorWrapper) 
    7172        cursor.row_factory = utf8rowFactory 
    7273        if settings.DEBUG: 
    def _sqlite_date_trunc(lookup_type, dt): 
    214215    elif lookup_type == 'day': 
    215216        return "%i-%02i-%02i 00:00:00" % (dt.year, dt.month, dt.day) 
    216217 
     218def _sqlite_regexp(re_pattern, re_string): 
     219    import re 
     220    try: 
     221        return bool(re.search(re_pattern, re_string)) 
     222    except: 
     223        return False 
     224 
    217225# SQLite requires LIKE statements to include an ESCAPE clause if the value 
    218226# being escaped has a percent or underscore in it. 
    219227# See http://www.sqlite.org/lang_expr.html for an explanation. 
    OPERATOR_MAPPING = { 
    222230    'iexact': "LIKE %s ESCAPE '\\'", 
    223231    'contains': "LIKE %s ESCAPE '\\'", 
    224232    'icontains': "LIKE %s ESCAPE '\\'", 
     233    'regex': 'REGEXP %s', 
     234    'iregex': "REGEXP '(?i)' || %s", 
    225235    'gt': '> %s', 
    226236    'gte': '>= %s', 
    227237    'lt': '< %s', 
  • django/db/models/fields/__init__.py

    diff -r 7a0796fe7efd django/db/models/fields/__init__.py
    a b class Field(object): 
    174174 
    175175    def get_db_prep_lookup(self, lookup_type, value): 
    176176        "Returns field's value prepared for database lookup." 
    177         if lookup_type in ('exact', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'): 
     177        if lookup_type in ('exact', 'regex', 'iregex', 'gt', 'gte', 'lt', 'lte', 'month', 'day', 'search'): 
    178178            return [value] 
    179179        elif lookup_type in ('range', 'in'): 
    180180            return value 
  • django/db/models/query.py

    diff -r 7a0796fe7efd django/db/models/query.py
    a b QUERY_TERMS = ( 
    2222    'gt', 'gte', 'lt', 'lte', 'in', 
    2323    'startswith', 'istartswith', 'endswith', 'iendswith', 
    2424    'range', 'year', 'month', 'day', 'isnull', 'search', 
     25    'regex', 'iregex', 
    2526) 
    2627 
    2728# Size of each "chunk" for get_iterator calls. 
    def get_where_clause(lookup_type, table_ 
    797798        return "%s%s IS %sNULL" % (table_prefix, field_name, (not value and 'NOT ' or '')) 
    798799    elif lookup_type == 'search': 
    799800        return backend.get_fulltext_search_sql(table_prefix + field_name) 
     801    elif lookup_type in ('regex', 'iregex'): 
     802        if settings.DATABASE_ENGINE == 'oracle': 
     803            if lookup_type == 'regex': 
     804                match_option = 'c' 
     805            else: 
     806                match_option = 'i' 
     807            return "REGEXP_LIKE(%s%s, %s, '%s')" % (table_prefix, field_name, cast_sql, match_option) 
     808        else: 
     809            raise NotImplementedError 
    800810    raise TypeError, "Got invalid lookup_type: %s" % repr(lookup_type) 
    801811 
    802812def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0): 
  • docs/db-api.txt

    diff -r 7a0796fe7efd docs/db-api.txt
    a b Note this is only available in MySQL and 
    11731173Note this is only available in MySQL and requires direct manipulation of the 
    11741174database to add the full-text index. 
    11751175 
     1176regex 
     1177~~~~~ 
     1178 
     1179Case-sensitive regular expression match. 
     1180 
     1181The regular expression syntax is that of the database backend in use; for the 
     1182``sqlite`` backend, the syntax is that of Python's ``re`` module. 
     1183 
     1184Example:: 
     1185 
     1186    Entry.objects.get(title__regex=r'^(An?|The) +') 
     1187 
     1188SQL equivalents:: 
     1189 
     1190    SELECT ... WHERE title REGEXP BINARY '^(An?|The) +'; -- MySQL 
     1191 
     1192    SELECT ... WHERE title ~ '^(An?|The) +'; -- PostgreSQL 
     1193 
     1194    SELECT ... WHERE title REGEXP '^(An?|The) +'; -- sqlite 
     1195 
     1196Using raw strings for passing in the regular expression syntax is recommended. 
     1197 
     1198Regular expression matching is not supported on the ``ado_mssql`` and 
     1199``oracle`` backends; these will raise a ``NotImplementedError``. 
     1200 
     1201iregex 
     1202~~~~~~ 
     1203 
     1204Case-insensitive regular expression match. 
     1205 
     1206Example:: 
     1207 
     1208    Entry.objects.get(title__iregex=r'^(an?|the) +') 
     1209 
     1210SQL equivalents:: 
     1211 
     1212    SELECT ... WHERE title REGEXP '^(an?|the) +'; -- MySQL 
     1213 
     1214    SELECT ... WHERE title ~* '^(an?|the) +'; -- PostgreSQL 
     1215 
     1216    SELECT ... WHERE title REGEXP '(?i)^(an?|the) +'; -- sqlite 
     1217 
    11761218Default lookups are exact 
    11771219------------------------- 
    11781220 
  • tests/modeltests/lookup/models.py

    diff -r 7a0796fe7efd tests/modeltests/lookup/models.py
    a b Traceback (most recent call last): 
    251251    ... 
    252252TypeError: Cannot resolve keyword 'headline__starts' into field. Choices are: id, headline, pub_date 
    253253 
     254# Create some articles with a bit more interesting headlines for testing field lookups: 
     255>>> now = datetime.now() 
     256>>> for a in Article.objects.all(): 
     257...     a.delete() 
     258>>> a1 = Article(pub_date=now, headline='f') 
     259>>> a1.save() 
     260>>> a2 = Article(pub_date=now, headline='fo') 
     261>>> a2.save() 
     262>>> a3 = Article(pub_date=now, headline='foo') 
     263>>> a3.save() 
     264>>> a4 = Article(pub_date=now, headline='fooo') 
     265>>> a4.save() 
     266>>> a5 = Article(pub_date=now, headline='Foo') 
     267>>> a5.save() 
     268 
     269# zero-or-more 
     270>>> Article.objects.filter(headline__regex=r'fo*') 
     271[<Article: f>, <Article: fo>, <Article: foo>, <Article: fooo>] 
     272>>> Article.objects.filter(headline__iregex=r'fo*') 
     273[<Article: Foo>, <Article: f>, <Article: fo>, <Article: foo>, <Article: fooo>] 
     274 
     275# one-or-more 
     276>>> Article.objects.filter(headline__regex=r'fo+') 
     277[<Article: fo>, <Article: foo>, <Article: fooo>] 
     278 
     279# wildcard 
     280>>> Article.objects.filter(headline__regex=r'fooo?') 
     281[<Article: foo>, <Article: fooo>] 
     282 
     283# and some more: 
     284>>> a6 = Article(pub_date=now, headline='bar') 
     285>>> a6.save() 
     286>>> a7 = Article(pub_date=now, headline='Bar') 
     287>>> a7.save() 
     288>>> a8 = Article(pub_date=now, headline='baz') 
     289>>> a8.save() 
     290>>> a9 = Article(pub_date=now, headline='baZ') 
     291>>> a9.save() 
     292 
     293# leading anchor 
     294>>> Article.objects.filter(headline__regex=r'^b') 
     295[<Article: baZ>, <Article: bar>, <Article: baz>] 
     296>>> Article.objects.filter(headline__iregex=r'^b') 
     297[<Article: Bar>, <Article: baZ>, <Article: bar>, <Article: baz>] 
     298 
     299# trailing anchor 
     300>>> Article.objects.filter(headline__regex=r'z$') 
     301[<Article: baz>] 
     302>>> Article.objects.filter(headline__iregex=r'z$') 
     303[<Article: baZ>, <Article: baz>] 
     304 
     305# character sets 
     306>>> Article.objects.filter(headline__regex=r'ba[rz]') 
     307[<Article: bar>, <Article: baz>] 
     308>>> Article.objects.filter(headline__regex=r'ba[RZ]') 
     309[<Article: baZ>] 
     310>>> Article.objects.filter(headline__iregex=r'ba[RZ]') 
     311[<Article: Bar>, <Article: baZ>, <Article: bar>, <Article: baz>] 
     312 
     313# and yet more: 
     314>>> a10 = Article(pub_date=now, headline='foobar') 
     315>>> a10.save() 
     316>>> a11 = Article(pub_date=now, headline='foobaz') 
     317>>> a11.save() 
     318>>> a12 = Article(pub_date=now, headline='FooBarBaz') 
     319>>> a12.save() 
     320>>> a13 = Article(pub_date=now, headline='foobarbaz') 
     321>>> a13.save() 
     322>>> a14 = Article(pub_date=now, headline='zoocarfaz') 
     323>>> a14.save() 
     324>>> a15 = Article(pub_date=now, headline='barfoobaz') 
     325>>> a15.save() 
     326>>> a16 = Article(pub_date=now, headline='BAZBARFOO') 
     327>>> a16.save() 
     328 
     329# alternation 
     330>>> Article.objects.filter(headline__regex=r'foo(bar|baz)') 
     331[<Article: barfoobaz>, <Article: foobar>, <Article: foobarbaz>, <Article: foobaz>] 
     332>>> Article.objects.filter(headline__iregex=r'foo(bar|baz)') 
     333[<Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobar>, <Article: foobarbaz>, <Article: foobaz>] 
     334>>> Article.objects.filter(headline__regex=r'^foo(bar|baz)') 
     335[<Article: foobar>, <Article: foobarbaz>, <Article: foobaz>] 
     336 
     337# greedy matching 
     338>>> Article.objects.filter(headline__regex=r'f.*z') 
     339[<Article: barfoobaz>, <Article: foobarbaz>, <Article: foobaz>, <Article: zoocarfaz>] 
     340>>> Article.objects.filter(headline__iregex=r'f.*z') 
     341[<Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobarbaz>, <Article: foobaz>, <Article: zoocarfaz>] 
     342 
     343# grouping and backreferences 
     344>>> Article.objects.filter(headline__regex=r'b(.).*b\1') 
     345[<Article: barfoobaz>, <Article: foobarbaz>] 
     346>>> Article.objects.filter(headline__iregex=r'b(.).*b\1') 
     347[<Article: BAZBARFOO>, <Article: FooBarBaz>, <Article: barfoobaz>, <Article: foobarbaz>] 
    254348"""}