Code

Ticket #3214: sql-statement-iterator-updated.diff

File sql-statement-iterator-updated.diff, 5.9 KB (added by aseering@…, 6 years ago)

Updated version of shaunc's patch, that works with Django SVN as of r8255

Line 
1Index: django/core/management/sql.py
2===================================================================
3--- django/core/management/sql.py       (revision 8255)
4+++ django/core/management/sql.py       (working copy)
5@@ -1,6 +1,7 @@
6 from django.core.management.base import CommandError
7 import os
8 import re
9+from django.utils.sqltools import sql_statement_iterator
10 
11 try:
12     set
13@@ -452,7 +453,7 @@
14     for sql_file in sql_files:
15         if os.path.exists(sql_file):
16             fp = open(sql_file, 'U')
17-            for statement in statements.split(fp.read().decode(settings.FILE_CHARSET)):
18+            for statement in sql_statement_iterator( fp.read().decode(settings.FILE_CHARSET), double_percent_signs=True ):
19                 # Remove any comments from the file
20                 statement = re.sub(ur"--.*([\n\Z]|$)", "", statement)
21                 if statement.strip():
22Index: django/utils/sqltools.py
23===================================================================
24--- django/utils/sqltools.py    (revision 0)
25+++ django/utils/sqltools.py    (revision 0)
26@@ -0,0 +1,166 @@
27+r"""
28+Contains tools to massage sql text.
29+
30+#>>> re.findall( r'(?<!\%)(\%)(?!\%)', ' %a%% ' )
31+
32+
33+"""
34+import re
35+
36+_single_or_stmtend = r"(?P<semi>\;)|(?P<comment>\-\-)|(?P<ccomment>\/\*)|(?P<endccomment>\*\/)|(?P<lineend>\n|\r$)|(?P<firstquo>^\')|(?P<escquo>\'\')|(?:(?P<midquo>\')(?!\'))"
37+_dollarq = r"(?P<dollarquo>\$\w*\$)"
38+_percent = r"(?<!\%)\%(?!\%)"
39+
40+_single_id_re = re.compile( _single_or_stmtend )
41+_id_re = re.compile( '|'.join( ( _single_or_stmtend, _dollarq ) ) )
42+_percent_re = re.compile( _percent )
43+
44+def sql_statement_iterator(
45+        script, dollar_quotes = True, double_percent_signs = False
46+        ):
47+    r"""
48+    Iterate through the statements in an sql script,
49+    while respecting the boundaries of strings and comments.
50+
51+    Supports normal single quotes, dollar_quotes (unless turned off).
52+    Supports normal sql comments, and c-style comments
53+    If 'double_percent_signs' is specified, standalone percent signs
54+    will be doubled. This is useful when no parameters are intended,
55+    as otherwise python db api treats them as substitution points.
56+
57+
58+    Test various quirks of quotes:
59+
60+    >>> def show( s, **kw ):
61+    ...     for line in sql_statement_iterator( s, **kw ):
62+    ...         print line
63+
64+    >>> show( 'this is a \'simple;\'; test' )
65+    this is a 'simple;';
66+     test
67+    >>> show( ';' )
68+    ;
69+    >>> show( 'let\'s test a $DOLLAR$ quoted; $DOLLAR$ string' )
70+    let's test a $DOLLAR$ quoted; $DOLLAR$ string
71+
72+    >>> show( 'testing $$ nested; $A$ quotes; \'with;\' $A$;$$ many; fake; semicolons;'  )
73+    testing $$ nested; $A$ quotes; 'with;' $A$;$$ many;
74+     fake;
75+     semicolons;
76+
77+    >>> show( '''nested misformed $$ quote's $$; dont; cause; problems;''' )
78+    nested misformed $$ quote's $$;
79+     dont;
80+     cause;
81+     problems;
82+
83+    A real function:
84+   
85+    >>> show( '''CREATE OR REPLACE FUNCTION serviceIsCurrent( srow client_service )
86+    ... RETURNS bool LANGUAGE PLPGSQL AS $BODY$
87+    ... BEGIN
88+    ...     RETURN srow.start <= current_date AND (
89+    ...         srow.end IS NULL OR srow.end >= current_date ) AND
90+    ...         srow."_superceededBy_id" IS NULL;
91+    ... END;
92+    ... $BODY$;
93+    ...  ''' )
94+    CREATE OR REPLACE FUNCTION serviceIsCurrent( srow client_service )
95+    RETURNS bool LANGUAGE PLPGSQL AS $BODY$
96+    BEGIN
97+        RETURN srow.start <= current_date AND (
98+            srow.end IS NULL OR srow.end >= current_date ) AND
99+            srow."_superceededBy_id" IS NULL;
100+    END;
101+    $BODY$;
102+    <BLANKLINE>
103+    <BLANKLINE>
104+   
105+    Test ''
106+   
107+#    >>> show( "that''s it ';' ." )
108+    that''s it ';' .
109+    >>> show( "a''''b" )
110+    a''''b
111+
112+    Now test percent doubling
113+   
114+    >>> show( "hello there %1 %2 %%; %" )
115+    hello there %%1 %%2 %%;
116+     %%
117+
118+    Test w/o dollar quote:
119+   
120+    >>> show( "$$ d;d $$", dollar_quotes = False )
121+    $$ d;
122+    d $$
123+
124+    Test comments:
125+
126+    >>> show( "this is a line; -- this; doesn't; break\n'now; quote was in comment" )
127+    this is a line;
128+     -- this; doesn't; break
129+    'now; quote was in comment
130+
131+    >>> show( "/* ; a\n -- ; */;z b\n;c" )
132+    /* ; a
133+     -- ; */;
134+    z b
135+    ;
136+    c
137+
138+    """
139+    outer_quote = None
140+    is_comment = False
141+    is_c_comment = False
142+    if dollar_quotes:
143+        re = _id_re
144+    else:
145+        re = _single_id_re
146+
147+    lastPos = 0
148+    for match in re.finditer( script ):
149+        if match.group( 'semi' ):
150+            if not ( outer_quote or is_comment or is_c_comment ):
151+                newPos = match.end( 1 )
152+               
153+                statement = script[ lastPos : newPos ]
154+                yield _percent_re.sub( '%%', statement )
155+                lastPos = newPos
156+        elif match.group( 'comment' ) and not is_c_comment:
157+            if not outer_quote:
158+                is_comment = True
159+        elif match.group( 'ccomment' ) and not is_comment:
160+            if not outer_quote:
161+                is_c_comment = True
162+        elif match.group( 'lineend' ):
163+            is_comment = False
164+        elif match.group( 'endccomment' ):
165+            is_c_comment = False
166+        elif not ( is_comment or is_c_comment ):
167+            quote = filter( lambda g: g is not None, match.groups() )[ 0 ]
168+            if quote == "''":
169+                # double-quote is escape for quote
170+                continue
171+            elif outer_quote and quote == outer_quote:
172+                # strings embedded in other strings needn't be
173+                # well-formed -- throw away nesting if outer quote
174+                # is found
175+                outer_quote = None
176+            elif not outer_quote:
177+                outer_quote = quote
178+               
179+
180+    if lastPos < len( script ):
181+        yield _percent_re.sub( '%%', script[ lastPos : ] )
182+
183+
184+   
185+   
186+   
187+def _test():
188+    import doctest
189+    doctest.testmod()
190+
191+if __name__ == "__main__":
192+    _test()