| 1 | """Tests for jslex.""" |
| 2 | # encoding: utf-8 |
| 3 | # from https://bitbucket.org/ned/jslex |
| 4 | |
| 5 | import difflib, unittest |
| 6 | from django.utils.jslex import JsLexer, js_to_c_for_gettext |
| 7 | |
| 8 | class JsLexTestCase(unittest.TestCase): |
| 9 | def assertMultiLineEqual(self, first, second): |
| 10 | """Assert that two multi-line strings are equal. |
| 11 | |
| 12 | If they aren't, show a nice diff. |
| 13 | |
| 14 | """ |
| 15 | if first != second: |
| 16 | message = ''.join(difflib.ndiff(first.splitlines(True), second.splitlines(True))) |
| 17 | self.fail("Multi-line strings are unequal:\n" + message) |
| 18 | |
| 19 | def assertListsEqual(self, first, second): |
| 20 | """Assert that two lists are equal, with a nice diff output if not.""" |
| 21 | if first != second: |
| 22 | lines1 = [repr(e) for e in first] |
| 23 | lines2 = [repr(e) for e in second] |
| 24 | message = '\n'.join(difflib.ndiff(lines1, lines2)) |
| 25 | self.fail("Lists are unequal:\n" + message) |
| 26 | |
| 27 | |
| 28 | class JsTokensTest(JsLexTestCase): |
| 29 | LEX_CASES = [ |
| 30 | # ids |
| 31 | ("a ABC $ _ a123", ["id a", "id ABC", "id $", "id _", "id a123"]), |
| 32 | (r"\u1234 abc\u0020 \u0065_\u0067", [r"id \u1234", r"id abc\u0020", r"id \u0065_\u0067"]), |
| 33 | # numbers |
| 34 | ("123 1.234 0.123e-3 0 1E+40 1e1 .123", ["dnum 123", "dnum 1.234", "dnum 0.123e-3", "dnum 0", "dnum 1E+40", "dnum 1e1", "dnum .123"]), |
| 35 | ("0x1 0xabCD 0XABcd", ["hnum 0x1", "hnum 0xabCD", "hnum 0XABcd"]), |
| 36 | ("010 0377 090", ["onum 010", "onum 0377", "dnum 0", "dnum 90"]), |
| 37 | ("0xa123ghi", ["hnum 0xa123", "id ghi"]), |
| 38 | # keywords |
| 39 | ("function Function FUNCTION", ["keyword function", "id Function", "id FUNCTION"]), |
| 40 | ("const constructor in inherits", ["keyword const", "id constructor", "keyword in", "id inherits"]), |
| 41 | ("true true_enough", ["reserved true", "id true_enough"]), |
| 42 | # strings |
| 43 | (''' 'hello' "hello" ''', ["string 'hello'", 'string "hello"']), |
| 44 | (r""" 'don\'t' "don\"t" '"' "'" '\'' "\"" """, |
| 45 | [r"""string 'don\'t'""", r'''string "don\"t"''', r"""string '"'""", r'''string "'"''', r"""string '\''""", r'''string "\""''']), |
| 46 | (ur'"ƃuıxǝ⅂ ʇdıɹɔsɐʌɐſ\""', [ur'string "ƃuıxǝ⅂ ʇdıɹɔsɐʌɐſ\""']), |
| 47 | # comments |
| 48 | ("a//b", ["id a", "linecomment //b"]), |
| 49 | ("/****/a/=2//hello", ["comment /****/", "id a", "punct /=", "dnum 2", "linecomment //hello"]), |
| 50 | ("/*\n * Header\n */\na=1;", ["comment /*\n * Header\n */", "id a", "punct =", "dnum 1", "punct ;"]), |
| 51 | # punctuation |
| 52 | ("a+++b", ["id a", "punct ++", "punct +", "id b"]), |
| 53 | # regex |
| 54 | (r"a=/a*/,1", ["id a", "punct =", "regex /a*/", "punct ,", "dnum 1"]), |
| 55 | (r"a=/a*[^/]+/,1", ["id a", "punct =", "regex /a*[^/]+/", "punct ,", "dnum 1"]), |
| 56 | (r"a=/a*\[^/,1", ["id a", "punct =", r"regex /a*\[^/", "punct ,", "dnum 1"]), |
| 57 | (r"a=/\//,1", ["id a", "punct =", r"regex /\//", "punct ,", "dnum 1"]), |
| 58 | |
| 59 | # next two are from http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions |
| 60 | ("""for (var x = a in foo && "</x>" || mot ? z:/x:3;x<5;y</g/i) {xyz(x++);}""", |
| 61 | ["keyword for", "punct (", "keyword var", "id x", "punct =", "id a", "keyword in", |
| 62 | "id foo", "punct &&", 'string "</x>"', "punct ||", "id mot", "punct ?", "id z", |
| 63 | "punct :", "regex /x:3;x<5;y</g", "punct /", "id i", "punct )", "punct {", |
| 64 | "id xyz", "punct (", "id x", "punct ++", "punct )", "punct ;", "punct }"]), |
| 65 | ("""for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y</g/i) {xyz(x++);}""", |
| 66 | ["keyword for", "punct (", "keyword var", "id x", "punct =", "id a", "keyword in", |
| 67 | "id foo", "punct &&", 'string "</x>"', "punct ||", "id mot", "punct ?", "id z", |
| 68 | "punct /", "id x", "punct :", "dnum 3", "punct ;", "id x", "punct <", "dnum 5", |
| 69 | "punct ;", "id y", "punct <", "regex /g/i", "punct )", "punct {", |
| 70 | "id xyz", "punct (", "id x", "punct ++", "punct )", "punct ;", "punct }"]), |
| 71 | |
| 72 | # Various "illegal" regexes that are valid according to the std. |
| 73 | (r"""/????/, /++++/, /[----]/ """, ["regex /????/", "punct ,", "regex /++++/", "punct ,", "regex /[----]/"]), |
| 74 | |
| 75 | # Stress cases from http://stackoverflow.com/questions/5533925/what-javascript-constructs-does-jslex-incorrectly-lex/5573409#5573409 |
| 76 | (r"""/\[/""", [r"""regex /\[/"""]), |
| 77 | (r"""/[i]/""", [r"""regex /[i]/"""]), |
| 78 | (r"""/[\]]/""", [r"""regex /[\]]/"""]), |
| 79 | (r"""/a[\]]/""", [r"""regex /a[\]]/"""]), |
| 80 | (r"""/a[\]]b/""", [r"""regex /a[\]]b/"""]), |
| 81 | (r"""/[\]/]/gi""", [r"""regex /[\]/]/gi"""]), |
| 82 | (r"""/\[[^\]]+\]/gi""", [r"""regex /\[[^\]]+\]/gi"""]), |
| 83 | (""" |
| 84 | rexl.re = { |
| 85 | NAME: /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/, |
| 86 | UNQUOTED_LITERAL: /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/, |
| 87 | QUOTED_LITERAL: /^'(?:[^']|'')*'/, |
| 88 | NUMERIC_LITERAL: /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/, |
| 89 | SYMBOL: /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/ |
| 90 | }; |
| 91 | """, |
| 92 | ["id rexl", "punct .", "id re", "punct =", "punct {", |
| 93 | "id NAME", "punct :", r"""regex /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/""", "punct ,", |
| 94 | "id UNQUOTED_LITERAL", "punct :", r"""regex /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/""", "punct ,", |
| 95 | "id QUOTED_LITERAL", "punct :", r"""regex /^'(?:[^']|'')*'/""", "punct ,", |
| 96 | "id NUMERIC_LITERAL", "punct :", r"""regex /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/""", "punct ,", |
| 97 | "id SYMBOL", "punct :", r"""regex /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/""", |
| 98 | "punct }", "punct ;" |
| 99 | ]), |
| 100 | |
| 101 | (""" |
| 102 | rexl.re = { |
| 103 | NAME: /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/, |
| 104 | UNQUOTED_LITERAL: /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/, |
| 105 | QUOTED_LITERAL: /^'(?:[^']|'')*'/, |
| 106 | NUMERIC_LITERAL: /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/, |
| 107 | SYMBOL: /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/ |
| 108 | }; |
| 109 | str = '"'; |
| 110 | """, |
| 111 | ["id rexl", "punct .", "id re", "punct =", "punct {", |
| 112 | "id NAME", "punct :", r"""regex /^(?!\d)(?:\w)+|^"(?:[^"]|"")+"/""", "punct ,", |
| 113 | "id UNQUOTED_LITERAL", "punct :", r"""regex /^@(?:(?!\d)(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/""", "punct ,", |
| 114 | "id QUOTED_LITERAL", "punct :", r"""regex /^'(?:[^']|'')*'/""", "punct ,", |
| 115 | "id NUMERIC_LITERAL", "punct :", r"""regex /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/""", "punct ,", |
| 116 | "id SYMBOL", "punct :", r"""regex /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/""", |
| 117 | "punct }", "punct ;", |
| 118 | "id str", "punct =", """string '"'""", "punct ;", |
| 119 | ]), |
| 120 | |
| 121 | (r""" this._js = "e.str(\"" + this.value.replace(/\\/g, "\\\\").replace(/"/g, "\\\"") + "\")"; """, |
| 122 | ["keyword this", "punct .", "id _js", "punct =", r'''string "e.str(\""''', "punct +", "keyword this", "punct .", |
| 123 | "id value", "punct .", "id replace", "punct (", r"regex /\\/g", "punct ,", r'string "\\\\"', "punct )", |
| 124 | "punct .", "id replace", "punct (", r'regex /"/g', "punct ,", r'string "\\\""', "punct )", "punct +", |
| 125 | r'string "\")"', "punct ;"]), |
| 126 | ] |
| 127 | |
| 128 | def make_function(input, toks): |
| 129 | def test_func(self): |
| 130 | lexer = JsLexer() |
| 131 | result = ["%s %s" % (name, tok) for name, tok in lexer.lex(input) if name != 'ws'] |
| 132 | self.assertListsEqual(result, toks) |
| 133 | return test_func |
| 134 | |
| 135 | for i, (input, toks) in enumerate(JsTokensTest.LEX_CASES): |
| 136 | setattr(JsTokensTest, "test_case_%d" % i, make_function(input, toks)) |
| 137 | |
| 138 | |
| 139 | GETTEXT_CASES = r""" |
| 140 | ======================================== |
| 141 | a = 1; /* /[0-9]+/ */ |
| 142 | b = 0x2a0b / 1; // /[0-9]+/ |
| 143 | c = 3; |
| 144 | -------------------- |
| 145 | a = 1; /* /[0-9]+/ */ |
| 146 | b = 0x2a0b / 1; // /[0-9]+/ |
| 147 | c = 3; |
| 148 | ======================================== |
| 149 | a = 1.234e-5; |
| 150 | /* |
| 151 | * /[0-9+/ |
| 152 | */ |
| 153 | b = .0123; |
| 154 | -------------------- |
| 155 | a = 1.234e-5; |
| 156 | /* |
| 157 | * /[0-9+/ |
| 158 | */ |
| 159 | b = .0123; |
| 160 | ======================================== |
| 161 | x = y / z; |
| 162 | alert(gettext("hello")); |
| 163 | x /= 3; |
| 164 | -------------------- |
| 165 | x = y / z; |
| 166 | alert(gettext("hello")); |
| 167 | x /= 3; |
| 168 | ======================================== |
| 169 | s = "Hello \"th/foo/ere\""; |
| 170 | s = 'He\x23llo \'th/foo/ere\''; |
| 171 | s = 'slash quote \", just quote "'; |
| 172 | -------------------- |
| 173 | s = "Hello \"th/foo/ere\""; |
| 174 | s = "He\x23llo \'th/foo/ere\'"; |
| 175 | s = "slash quote \", just quote \""; |
| 176 | ======================================== |
| 177 | s = "Line continuation\ |
| 178 | continued /hello/ still the string";/hello/; |
| 179 | -------------------- |
| 180 | s = "Line continuation\ |
| 181 | continued /hello/ still the string";"REGEX"; |
| 182 | ======================================== |
| 183 | var regex = /pattern/; |
| 184 | var regex2 = /matter/gm; |
| 185 | var regex3 = /[*/]+/gm.foo("hey"); |
| 186 | -------------------- |
| 187 | var regex = "REGEX"; |
| 188 | var regex2 = "REGEX"; |
| 189 | var regex3 = "REGEX".foo("hey"); |
| 190 | ======================================== |
| 191 | for (var x = a in foo && "</x>" || mot ? z:/x:3;x<5;y</g/i) {xyz(x++);} |
| 192 | for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y</g/i) {xyz(x++);} |
| 193 | -------------------- |
| 194 | for (var x = a in foo && "</x>" || mot ? z:"REGEX"/i) {xyz(x++);} |
| 195 | for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y<"REGEX") {xyz(x++);} |
| 196 | ======================================== |
| 197 | \u1234xyz = gettext('Hello there'); |
| 198 | -------------------- |
| 199 | Uu1234xyz = gettext("Hello there"); |
| 200 | ======================================== |
| 201 | """ |
| 202 | |
| 203 | |
| 204 | class JsToCForGettextTest(JsLexTestCase): |
| 205 | pass |
| 206 | |
| 207 | def make_function(js, c): |
| 208 | def test_func(self): |
| 209 | self.assertMultiLineEqual(js_to_c_for_gettext(js), c) |
| 210 | return test_func |
| 211 | |
| 212 | for i, pair in enumerate(GETTEXT_CASES.split('='*40+'\n')): |
| 213 | if not pair.strip(): |
| 214 | continue |
| 215 | js, c = pair.split('-'*20+'\n') |
| 216 | setattr(JsToCForGettextTest, "test_case_%d" % i, make_function(js, c)) |
| 217 | |
| 218 | if __name__ == '__main__': |
| 219 | unittest.main() |
| 220 | |