]>
Commit | Line | Data |
---|---|---|
19a03940 | 1 | import collections |
2 | import contextlib | |
8f53dc44 | 3 | import itertools |
825abb81 | 4 | import json |
8f53dc44 | 5 | import math |
9e3f1991 | 6 | import operator |
2b25cb5d PH |
7 | import re |
8 | ||
8f53dc44 | 9 | from .utils import ( |
10 | NO_DEFAULT, | |
11 | ExtractorError, | |
12 | js_to_json, | |
13 | remove_quotes, | |
14 | truncate_string, | |
15 | unified_timestamp, | |
16 | write_string, | |
17 | ) | |
2b25cb5d | 18 | |
be13a6e5 | 19 | |
20 | def _js_bit_op(op): | |
f26af78a E |
21 | def zeroise(x): |
22 | return 0 if x in (None, JS_Undefined) else x | |
23 | ||
be13a6e5 | 24 | def wrapped(a, b): |
f26af78a | 25 | return op(zeroise(a), zeroise(b)) & 0xffffffff |
be13a6e5 | 26 | |
27 | return wrapped | |
28 | ||
29 | ||
30 | def _js_arith_op(op): | |
31 | ||
32 | def wrapped(a, b): | |
33 | if JS_Undefined in (a, b): | |
34 | return float('nan') | |
35 | return op(a or 0, b or 0) | |
36 | ||
37 | return wrapped | |
38 | ||
39 | ||
40 | def _js_div(a, b): | |
41 | if JS_Undefined in (a, b) or not (a and b): | |
42 | return float('nan') | |
43 | return (a or 0) / b if b else float('inf') | |
44 | ||
45 | ||
46 | def _js_mod(a, b): | |
47 | if JS_Undefined in (a, b) or not b: | |
48 | return float('nan') | |
49 | return (a or 0) % b | |
50 | ||
51 | ||
52 | def _js_exp(a, b): | |
53 | if not b: | |
54 | return 1 # even 0 ** 0 !! | |
55 | elif JS_Undefined in (a, b): | |
56 | return float('nan') | |
57 | return (a or 0) ** b | |
58 | ||
59 | ||
60 | def _js_eq_op(op): | |
61 | ||
62 | def wrapped(a, b): | |
63 | if {a, b} <= {None, JS_Undefined}: | |
64 | return op(a, a) | |
65 | return op(a, b) | |
66 | ||
67 | return wrapped | |
68 | ||
69 | ||
70 | def _js_comp_op(op): | |
71 | ||
72 | def wrapped(a, b): | |
73 | if JS_Undefined in (a, b): | |
74 | return False | |
1ac7f461 | 75 | if isinstance(a, str) or isinstance(b, str): |
76 | return op(str(a or 0), str(b or 0)) | |
be13a6e5 | 77 | return op(a or 0, b or 0) |
78 | ||
79 | return wrapped | |
80 | ||
81 | ||
82 | def _js_ternary(cndn, if_true=True, if_false=False): | |
83 | """Simulate JS's ternary operator (cndn?if_true:if_false)""" | |
84 | if cndn in (False, None, 0, '', JS_Undefined): | |
85 | return if_false | |
86 | with contextlib.suppress(TypeError): | |
87 | if math.isnan(cndn): # NB: NaN cannot be checked by membership | |
88 | return if_false | |
89 | return if_true | |
90 | ||
49b4ceae | 91 | |
92 | # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence | |
8f53dc44 | 93 | _OPERATORS = { # None => Defined in JSInterpreter._operator |
94 | '?': None, | |
be13a6e5 | 95 | '??': None, |
8f53dc44 | 96 | '||': None, |
97 | '&&': None, | |
be13a6e5 | 98 | |
99 | '|': _js_bit_op(operator.or_), | |
100 | '^': _js_bit_op(operator.xor), | |
101 | '&': _js_bit_op(operator.and_), | |
8f53dc44 | 102 | |
49b4ceae | 103 | '===': operator.is_, |
104 | '!==': operator.is_not, | |
164b03c4 | 105 | '==': _js_eq_op(operator.eq), |
be13a6e5 | 106 | '!=': _js_eq_op(operator.ne), |
8f53dc44 | 107 | |
be13a6e5 | 108 | '<=': _js_comp_op(operator.le), |
109 | '>=': _js_comp_op(operator.ge), | |
110 | '<': _js_comp_op(operator.lt), | |
111 | '>': _js_comp_op(operator.gt), | |
8f53dc44 | 112 | |
be13a6e5 | 113 | '>>': _js_bit_op(operator.rshift), |
114 | '<<': _js_bit_op(operator.lshift), | |
49b4ceae | 115 | |
be13a6e5 | 116 | '+': _js_arith_op(operator.add), |
117 | '-': _js_arith_op(operator.sub), | |
8f53dc44 | 118 | |
be13a6e5 | 119 | '*': _js_arith_op(operator.mul), |
be13a6e5 | 120 | '%': _js_mod, |
1a7c9fad | 121 | '/': _js_div, |
be13a6e5 | 122 | '**': _js_exp, |
230d5c82 | 123 | } |
9e3f1991 | 124 | |
49b4ceae | 125 | _COMP_OPERATORS = {'===', '!==', '==', '!=', '<=', '>=', '<', '>'} |
126 | ||
be13a6e5 | 127 | _NAME_RE = r'[a-zA-Z_$][\w$]*' |
128 | _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]'))) | |
f6ca640b | 129 | _QUOTES = '\'"/' |
06dfe0a0 | 130 | |
2b25cb5d | 131 | |
be13a6e5 | 132 | class JS_Undefined: |
133 | pass | |
8f53dc44 | 134 | |
135 | ||
404f611f | 136 | class JS_Break(ExtractorError): |
137 | def __init__(self): | |
138 | ExtractorError.__init__(self, 'Invalid break') | |
139 | ||
140 | ||
141 | class JS_Continue(ExtractorError): | |
142 | def __init__(self): | |
143 | ExtractorError.__init__(self, 'Invalid continue') | |
144 | ||
145 | ||
f6ca640b | 146 | class JS_Throw(ExtractorError): |
147 | def __init__(self, e): | |
148 | self.error = e | |
149 | ExtractorError.__init__(self, f'Uncaught exception {e}') | |
150 | ||
151 | ||
19a03940 | 152 | class LocalNameSpace(collections.ChainMap): |
404f611f | 153 | def __setitem__(self, key, value): |
19a03940 | 154 | for scope in self.maps: |
404f611f | 155 | if key in scope: |
156 | scope[key] = value | |
19a03940 | 157 | return |
158 | self.maps[0][key] = value | |
404f611f | 159 | |
160 | def __delitem__(self, key): | |
161 | raise NotImplementedError('Deleting is not supported') | |
162 | ||
404f611f | 163 | |
8f53dc44 | 164 | class Debugger: |
165 | import sys | |
49b4ceae | 166 | ENABLED = False and 'pytest' in sys.modules |
8f53dc44 | 167 | |
168 | @staticmethod | |
169 | def write(*args, level=100): | |
170 | write_string(f'[debug] JS: {" " * (100 - level)}' | |
171 | f'{" ".join(truncate_string(str(x), 50, 50) for x in args)}\n') | |
172 | ||
173 | @classmethod | |
174 | def wrap_interpreter(cls, f): | |
175 | def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs): | |
176 | if cls.ENABLED and stmt.strip(): | |
177 | cls.write(stmt, level=allow_recursion) | |
d81ba7d4 | 178 | try: |
179 | ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs) | |
180 | except Exception as e: | |
181 | if cls.ENABLED: | |
182 | if isinstance(e, ExtractorError): | |
183 | e = e.orig_msg | |
184 | cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion) | |
185 | raise | |
8f53dc44 | 186 | if cls.ENABLED and stmt.strip(): |
187 | cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion) | |
188 | return ret, should_ret | |
189 | return interpret_statement | |
190 | ||
191 | ||
86e5f3ed | 192 | class JSInterpreter: |
230d5c82 | 193 | __named_object_counter = 0 |
194 | ||
be13a6e5 | 195 | _RE_FLAGS = { |
196 | # special knowledge: Python's re flags are bitmask values, current max 128 | |
197 | # invent new bitmask values well above that for literal parsing | |
198 | # TODO: new pattern class to execute matches with these flags | |
199 | 'd': 1024, # Generate indices for substring matches | |
200 | 'g': 2048, # Global search | |
201 | 'i': re.I, # Case-insensitive search | |
202 | 'm': re.M, # Multi-line search | |
203 | 's': re.S, # Allows . to match newline characters | |
204 | 'u': re.U, # Treat a pattern as a sequence of unicode code points | |
205 | 'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string | |
206 | } | |
207 | ||
208 | _EXC_NAME = '__yt_dlp_exception__' | |
209 | ||
9e3f1991 | 210 | def __init__(self, code, objects=None): |
230d5c82 | 211 | self.code, self._functions = code, {} |
212 | self._objects = {} if objects is None else objects | |
404f611f | 213 | |
a1c5bd82 | 214 | class Exception(ExtractorError): |
215 | def __init__(self, msg, expr=None, *args, **kwargs): | |
216 | if expr is not None: | |
8f53dc44 | 217 | msg = f'{msg.rstrip()} in: {truncate_string(expr, 50, 50)}' |
a1c5bd82 | 218 | super().__init__(msg, *args, **kwargs) |
219 | ||
404f611f | 220 | def _named_object(self, namespace, obj): |
221 | self.__named_object_counter += 1 | |
222 | name = f'__yt_dlp_jsinterp_obj{self.__named_object_counter}' | |
223 | namespace[name] = obj | |
224 | return name | |
225 | ||
be13a6e5 | 226 | @classmethod |
227 | def _regex_flags(cls, expr): | |
228 | flags = 0 | |
229 | if not expr: | |
230 | return flags, expr | |
231 | for idx, ch in enumerate(expr): | |
232 | if ch not in cls._RE_FLAGS: | |
233 | break | |
234 | flags |= cls._RE_FLAGS[ch] | |
235 | return flags, expr[idx + 1:] | |
236 | ||
404f611f | 237 | @staticmethod |
e75bb0d6 | 238 | def _separate(expr, delim=',', max_split=None): |
0468a3b3 | 239 | OP_CHARS = '+-*/%&|^=<>!,;{}:[' |
404f611f | 240 | if not expr: |
241 | return | |
06dfe0a0 | 242 | counters = {k: 0 for k in _MATCHING_PARENS.values()} |
243 | start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1 | |
f6ca640b | 244 | in_quote, escaping, after_op, in_regex_char_group = None, False, True, False |
404f611f | 245 | for idx, char in enumerate(expr): |
8f53dc44 | 246 | if not in_quote and char in _MATCHING_PARENS: |
06dfe0a0 | 247 | counters[_MATCHING_PARENS[char]] += 1 |
8f53dc44 | 248 | elif not in_quote and char in counters: |
0468a3b3 | 249 | # Something's wrong if we get negative, but ignore it anyway |
250 | if counters[char]: | |
251 | counters[char] -= 1 | |
05deb747 | 252 | elif not escaping: |
253 | if char in _QUOTES and in_quote in (char, None): | |
254 | if in_quote or after_op or char != '/': | |
255 | in_quote = None if in_quote and not in_regex_char_group else char | |
256 | elif in_quote == '/' and char in '[]': | |
257 | in_regex_char_group = char == '[' | |
64fa820c | 258 | escaping = not escaping and in_quote and char == '\\' |
c4b2df87 | 259 | after_op = not in_quote and char in OP_CHARS or (char.isspace() and after_op) |
64fa820c | 260 | |
261 | if char != delim[pos] or any(counters.values()) or in_quote: | |
404f611f | 262 | pos = 0 |
06dfe0a0 | 263 | continue |
264 | elif pos != delim_len: | |
265 | pos += 1 | |
266 | continue | |
267 | yield expr[start: idx - delim_len] | |
268 | start, pos = idx + 1, 0 | |
269 | splits += 1 | |
270 | if max_split and splits >= max_split: | |
271 | break | |
404f611f | 272 | yield expr[start:] |
273 | ||
230d5c82 | 274 | @classmethod |
1ac7f461 | 275 | def _separate_at_paren(cls, expr, delim=None): |
276 | if delim is None: | |
277 | delim = expr and _MATCHING_PARENS[expr[0]] | |
230d5c82 | 278 | separated = list(cls._separate(expr, delim, 1)) |
e75bb0d6 | 279 | if len(separated) < 2: |
a1c5bd82 | 280 | raise cls.Exception(f'No terminating paren {delim}', expr) |
e75bb0d6 | 281 | return separated[0][1:].strip(), separated[1].strip() |
9e3f1991 | 282 | |
8f53dc44 | 283 | def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion): |
284 | if op in ('||', '&&'): | |
be13a6e5 | 285 | if (op == '&&') ^ _js_ternary(left_val): |
8f53dc44 | 286 | return left_val # short circuiting |
be13a6e5 | 287 | elif op == '??': |
288 | if left_val not in (None, JS_Undefined): | |
289 | return left_val | |
8f53dc44 | 290 | elif op == '?': |
be13a6e5 | 291 | right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1)) |
8f53dc44 | 292 | |
293 | right_val = self.interpret_expression(right_expr, local_vars, allow_recursion) | |
294 | if not _OPERATORS.get(op): | |
295 | return right_val | |
296 | ||
297 | try: | |
298 | return _OPERATORS[op](left_val, right_val) | |
299 | except Exception as e: | |
300 | raise self.Exception(f'Failed to evaluate {left_val!r} {op} {right_val!r}', expr, cause=e) | |
301 | ||
be13a6e5 | 302 | def _index(self, obj, idx, allow_undefined=False): |
8f53dc44 | 303 | if idx == 'length': |
304 | return len(obj) | |
305 | try: | |
306 | return obj[int(idx)] if isinstance(obj, list) else obj[idx] | |
307 | except Exception as e: | |
be13a6e5 | 308 | if allow_undefined: |
309 | return JS_Undefined | |
8f53dc44 | 310 | raise self.Exception(f'Cannot get index {idx}', repr(obj), cause=e) |
311 | ||
312 | def _dump(self, obj, namespace): | |
313 | try: | |
314 | return json.dumps(obj) | |
315 | except TypeError: | |
316 | return self._named_object(namespace, obj) | |
317 | ||
318 | @Debugger.wrap_interpreter | |
9e3f1991 | 319 | def interpret_statement(self, stmt, local_vars, allow_recursion=100): |
2b25cb5d | 320 | if allow_recursion < 0: |
a1c5bd82 | 321 | raise self.Exception('Recursion limit reached') |
8f53dc44 | 322 | allow_recursion -= 1 |
2b25cb5d | 323 | |
8f53dc44 | 324 | should_return = False |
230d5c82 | 325 | sub_statements = list(self._separate(stmt, ';')) or [''] |
8f53dc44 | 326 | expr = stmt = sub_statements.pop().strip() |
230d5c82 | 327 | |
404f611f | 328 | for sub_stmt in sub_statements: |
8f53dc44 | 329 | ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion) |
330 | if should_return: | |
331 | return ret, should_return | |
404f611f | 332 | |
f6ca640b | 333 | m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["\'])|$)|(?P<throw>throw\s+)', stmt) |
8f53dc44 | 334 | if m: |
335 | expr = stmt[len(m.group(0)):].strip() | |
f6ca640b | 336 | if m.group('throw'): |
337 | raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion)) | |
8f53dc44 | 338 | should_return = not m.group('var') |
230d5c82 | 339 | if not expr: |
8f53dc44 | 340 | return None, should_return |
341 | ||
342 | if expr[0] in _QUOTES: | |
343 | inner, outer = self._separate(expr, expr[0], 1) | |
f6ca640b | 344 | if expr[0] == '/': |
be13a6e5 | 345 | flags, outer = self._regex_flags(outer) |
346 | inner = re.compile(inner[1:], flags=flags) | |
f6ca640b | 347 | else: |
348 | inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True)) | |
8f53dc44 | 349 | if not outer: |
350 | return inner, should_return | |
351 | expr = self._named_object(local_vars, inner) + outer | |
352 | ||
353 | if expr.startswith('new '): | |
354 | obj = expr[4:] | |
355 | if obj.startswith('Date('): | |
1ac7f461 | 356 | left, right = self._separate_at_paren(obj[4:]) |
49b4ceae | 357 | expr = unified_timestamp( |
358 | self.interpret_expression(left, local_vars, allow_recursion), False) | |
8f53dc44 | 359 | if not expr: |
360 | raise self.Exception(f'Failed to parse date {left!r}', expr) | |
361 | expr = self._dump(int(expr * 1000), local_vars) + right | |
362 | else: | |
363 | raise self.Exception(f'Unsupported object {obj}', expr) | |
9e3f1991 | 364 | |
49b4ceae | 365 | if expr.startswith('void '): |
366 | left = self.interpret_expression(expr[5:], local_vars, allow_recursion) | |
367 | return None, should_return | |
368 | ||
404f611f | 369 | if expr.startswith('{'): |
1ac7f461 | 370 | inner, outer = self._separate_at_paren(expr) |
371 | # try for object expression (Map) | |
be13a6e5 | 372 | sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)] |
373 | if all(len(sub_expr) == 2 for sub_expr in sub_expressions): | |
374 | def dict_item(key, val): | |
375 | val = self.interpret_expression(val, local_vars, allow_recursion) | |
376 | if re.match(_NAME_RE, key): | |
377 | return key, val | |
378 | return self.interpret_expression(key, local_vars, allow_recursion), val | |
379 | ||
380 | return dict(dict_item(k, v) for k, v in sub_expressions), should_return | |
381 | ||
8f53dc44 | 382 | inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) |
404f611f | 383 | if not outer or should_abort: |
8f53dc44 | 384 | return inner, should_abort or should_return |
404f611f | 385 | else: |
8f53dc44 | 386 | expr = self._dump(inner, local_vars) + outer |
404f611f | 387 | |
9e3f1991 | 388 | if expr.startswith('('): |
1ac7f461 | 389 | inner, outer = self._separate_at_paren(expr) |
8f53dc44 | 390 | inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) |
391 | if not outer or should_abort: | |
392 | return inner, should_abort or should_return | |
404f611f | 393 | else: |
8f53dc44 | 394 | expr = self._dump(inner, local_vars) + outer |
404f611f | 395 | |
396 | if expr.startswith('['): | |
1ac7f461 | 397 | inner, outer = self._separate_at_paren(expr) |
404f611f | 398 | name = self._named_object(local_vars, [ |
399 | self.interpret_expression(item, local_vars, allow_recursion) | |
e75bb0d6 | 400 | for item in self._separate(inner)]) |
404f611f | 401 | expr = name + outer |
402 | ||
1ac7f461 | 403 | m = re.match(r'''(?x) |
404 | (?P<try>try)\s*\{| | |
405 | (?P<switch>switch)\s*\(| | |
406 | (?P<for>for)\s*\( | |
407 | ''', expr) | |
408 | md = m.groupdict() if m else {} | |
409 | if md.get('try'): | |
410 | try_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) | |
411 | err = None | |
f6ca640b | 412 | try: |
413 | ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion) | |
414 | if should_abort: | |
415 | return ret, True | |
f6ca640b | 416 | except Exception as e: |
417 | # XXX: This works for now, but makes debugging future issues very hard | |
1ac7f461 | 418 | err = e |
419 | ||
420 | pending = (None, False) | |
421 | m = re.match(r'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr) | |
422 | if m: | |
423 | sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) | |
424 | if err: | |
425 | catch_vars = {} | |
426 | if m.group('err'): | |
427 | catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err | |
428 | catch_vars = local_vars.new_child(catch_vars) | |
429 | err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion) | |
430 | ||
431 | m = re.match(r'finally\s*\{', expr) | |
432 | if m: | |
433 | sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) | |
434 | ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion) | |
f6ca640b | 435 | if should_abort: |
436 | return ret, True | |
437 | ||
1ac7f461 | 438 | ret, should_abort = pending |
439 | if should_abort: | |
440 | return ret, True | |
441 | ||
442 | if err: | |
443 | raise err | |
404f611f | 444 | |
1ac7f461 | 445 | elif md.get('for'): |
446 | constructor, remaining = self._separate_at_paren(expr[m.end() - 1:]) | |
404f611f | 447 | if remaining.startswith('{'): |
1ac7f461 | 448 | body, expr = self._separate_at_paren(remaining) |
404f611f | 449 | else: |
230d5c82 | 450 | switch_m = re.match(r'switch\s*\(', remaining) # FIXME |
451 | if switch_m: | |
1ac7f461 | 452 | switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:]) |
e75bb0d6 | 453 | body, expr = self._separate_at_paren(remaining, '}') |
404f611f | 454 | body = 'switch(%s){%s}' % (switch_val, body) |
9e3f1991 | 455 | else: |
404f611f | 456 | body, expr = remaining, '' |
e75bb0d6 | 457 | start, cndn, increment = self._separate(constructor, ';') |
8f53dc44 | 458 | self.interpret_expression(start, local_vars, allow_recursion) |
404f611f | 459 | while True: |
be13a6e5 | 460 | if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)): |
404f611f | 461 | break |
462 | try: | |
8f53dc44 | 463 | ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion) |
404f611f | 464 | if should_abort: |
8f53dc44 | 465 | return ret, True |
404f611f | 466 | except JS_Break: |
467 | break | |
468 | except JS_Continue: | |
469 | pass | |
8f53dc44 | 470 | self.interpret_expression(increment, local_vars, allow_recursion) |
404f611f | 471 | |
1ac7f461 | 472 | elif md.get('switch'): |
473 | switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:]) | |
404f611f | 474 | switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion) |
e75bb0d6 | 475 | body, expr = self._separate_at_paren(remaining, '}') |
a1fc7ca0 | 476 | items = body.replace('default:', 'case default:').split('case ')[1:] |
477 | for default in (False, True): | |
478 | matched = False | |
479 | for item in items: | |
86e5f3ed | 480 | case, stmt = (i.strip() for i in self._separate(item, ':', 1)) |
a1fc7ca0 | 481 | if default: |
482 | matched = matched or case == 'default' | |
483 | elif not matched: | |
49b4ceae | 484 | matched = (case != 'default' |
485 | and switch_val == self.interpret_expression(case, local_vars, allow_recursion)) | |
a1fc7ca0 | 486 | if not matched: |
487 | continue | |
404f611f | 488 | try: |
8f53dc44 | 489 | ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion) |
404f611f | 490 | if should_abort: |
491 | return ret | |
492 | except JS_Break: | |
9e3f1991 | 493 | break |
a1fc7ca0 | 494 | if matched: |
495 | break | |
1ac7f461 | 496 | |
497 | if md: | |
8f53dc44 | 498 | ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion) |
499 | return ret, should_abort or should_return | |
404f611f | 500 | |
e75bb0d6 U |
501 | # Comma separated statements |
502 | sub_expressions = list(self._separate(expr)) | |
6d3e7424 | 503 | if len(sub_expressions) > 1: |
504 | for sub_expr in sub_expressions: | |
505 | ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion) | |
506 | if should_abort: | |
507 | return ret, True | |
508 | return ret, False | |
404f611f | 509 | |
510 | for m in re.finditer(rf'''(?x) | |
511 | (?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})| | |
512 | (?P<var2>{_NAME_RE})(?P<post_sign>\+\+|--)''', expr): | |
513 | var = m.group('var1') or m.group('var2') | |
514 | start, end = m.span() | |
515 | sign = m.group('pre_sign') or m.group('post_sign') | |
516 | ret = local_vars[var] | |
517 | local_vars[var] += 1 if sign[0] == '+' else -1 | |
518 | if m.group('pre_sign'): | |
519 | ret = local_vars[var] | |
8f53dc44 | 520 | expr = expr[:start] + self._dump(ret, local_vars) + expr[end:] |
9e3f1991 | 521 | |
230d5c82 | 522 | if not expr: |
8f53dc44 | 523 | return None, should_return |
9e3f1991 | 524 | |
230d5c82 | 525 | m = re.match(fr'''(?x) |
526 | (?P<assign> | |
527 | (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s* | |
49b4ceae | 528 | (?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})? |
be13a6e5 | 529 | =(?!=)(?P<expr>.*)$ |
230d5c82 | 530 | )|(?P<return> |
d81ba7d4 | 531 | (?!if|return|true|false|null|undefined|NaN)(?P<name>{_NAME_RE})$ |
230d5c82 | 532 | )|(?P<indexing> |
533 | (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$ | |
534 | )|(?P<attribute> | |
be13a6e5 | 535 | (?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s* |
230d5c82 | 536 | )|(?P<function> |
8f53dc44 | 537 | (?P<fname>{_NAME_RE})\((?P<args>.*)\)$ |
230d5c82 | 538 | )''', expr) |
539 | if m and m.group('assign'): | |
230d5c82 | 540 | left_val = local_vars.get(m.group('out')) |
541 | ||
542 | if not m.group('index'): | |
8f53dc44 | 543 | local_vars[m.group('out')] = self._operator( |
544 | m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion) | |
545 | return local_vars[m.group('out')], should_return | |
be13a6e5 | 546 | elif left_val in (None, JS_Undefined): |
a1c5bd82 | 547 | raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr) |
230d5c82 | 548 | |
549 | idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion) | |
8f53dc44 | 550 | if not isinstance(idx, (int, float)): |
a1c5bd82 | 551 | raise self.Exception(f'List index {idx} must be integer', expr) |
8f53dc44 | 552 | idx = int(idx) |
553 | left_val[idx] = self._operator( | |
f6ca640b | 554 | m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion) |
8f53dc44 | 555 | return left_val[idx], should_return |
9e3f1991 | 556 | |
230d5c82 | 557 | elif expr.isdigit(): |
8f53dc44 | 558 | return int(expr), should_return |
2b25cb5d | 559 | |
230d5c82 | 560 | elif expr == 'break': |
404f611f | 561 | raise JS_Break() |
562 | elif expr == 'continue': | |
563 | raise JS_Continue() | |
be13a6e5 | 564 | elif expr == 'undefined': |
565 | return JS_Undefined, should_return | |
d81ba7d4 | 566 | elif expr == 'NaN': |
567 | return float('NaN'), should_return | |
404f611f | 568 | |
230d5c82 | 569 | elif m and m.group('return'): |
be13a6e5 | 570 | return local_vars.get(m.group('name'), JS_Undefined), should_return |
2b25cb5d | 571 | |
19a03940 | 572 | with contextlib.suppress(ValueError): |
8f53dc44 | 573 | return json.loads(js_to_json(expr, strict=True)), should_return |
825abb81 | 574 | |
230d5c82 | 575 | if m and m.group('indexing'): |
7769f837 | 576 | val = local_vars[m.group('in')] |
404f611f | 577 | idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion) |
8f53dc44 | 578 | return self._index(val, idx), should_return |
7769f837 | 579 | |
8f53dc44 | 580 | for op in _OPERATORS: |
e75bb0d6 | 581 | separated = list(self._separate(expr, op)) |
8f53dc44 | 582 | right_expr = separated.pop() |
be13a6e5 | 583 | while True: |
584 | if op in '?<>*-' and len(separated) > 1 and not separated[-1].strip(): | |
585 | separated.pop() | |
586 | elif not (separated and op == '?' and right_expr.startswith('.')): | |
587 | break | |
49b4ceae | 588 | right_expr = f'{op}{right_expr}' |
589 | if op != '-': | |
590 | right_expr = f'{separated.pop()}{op}{right_expr}' | |
591 | if not separated: | |
592 | continue | |
8f53dc44 | 593 | left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion) |
6d3e7424 | 594 | return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return |
404f611f | 595 | |
230d5c82 | 596 | if m and m.group('attribute'): |
be13a6e5 | 597 | variable, member, nullish = m.group('var', 'member', 'nullish') |
8f53dc44 | 598 | if not member: |
599 | member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion) | |
404f611f | 600 | arg_str = expr[m.end():] |
601 | if arg_str.startswith('('): | |
1ac7f461 | 602 | arg_str, remaining = self._separate_at_paren(arg_str) |
825abb81 | 603 | else: |
404f611f | 604 | arg_str, remaining = None, arg_str |
605 | ||
606 | def assertion(cndn, msg): | |
607 | """ assert, but without risk of getting optimized out """ | |
608 | if not cndn: | |
a1c5bd82 | 609 | raise self.Exception(f'{member} {msg}', expr) |
404f611f | 610 | |
611 | def eval_method(): | |
8f53dc44 | 612 | if (variable, member) == ('console', 'debug'): |
613 | if Debugger.ENABLED: | |
614 | Debugger.write(self.interpret_expression(f'[{arg_str}]', local_vars, allow_recursion)) | |
615 | return | |
616 | ||
617 | types = { | |
618 | 'String': str, | |
619 | 'Math': float, | |
620 | } | |
621 | obj = local_vars.get(variable, types.get(variable, NO_DEFAULT)) | |
622 | if obj is NO_DEFAULT: | |
404f611f | 623 | if variable not in self._objects: |
be13a6e5 | 624 | try: |
625 | self._objects[variable] = self.extract_object(variable) | |
626 | except self.Exception: | |
627 | if not nullish: | |
628 | raise | |
629 | obj = self._objects.get(variable, JS_Undefined) | |
630 | ||
631 | if nullish and obj is JS_Undefined: | |
632 | return JS_Undefined | |
404f611f | 633 | |
230d5c82 | 634 | # Member access |
404f611f | 635 | if arg_str is None: |
be13a6e5 | 636 | return self._index(obj, member, nullish) |
404f611f | 637 | |
638 | # Function call | |
639 | argvals = [ | |
825abb81 | 640 | self.interpret_expression(v, local_vars, allow_recursion) |
e75bb0d6 | 641 | for v in self._separate(arg_str)] |
404f611f | 642 | |
643 | if obj == str: | |
644 | if member == 'fromCharCode': | |
645 | assertion(argvals, 'takes one or more arguments') | |
646 | return ''.join(map(chr, argvals)) | |
8f53dc44 | 647 | raise self.Exception(f'Unsupported String method {member}', expr) |
648 | elif obj == float: | |
649 | if member == 'pow': | |
650 | assertion(len(argvals) == 2, 'takes two arguments') | |
651 | return argvals[0] ** argvals[1] | |
652 | raise self.Exception(f'Unsupported Math method {member}', expr) | |
404f611f | 653 | |
654 | if member == 'split': | |
655 | assertion(argvals, 'takes one or more arguments') | |
8f53dc44 | 656 | assertion(len(argvals) == 1, 'with limit argument is not implemented') |
657 | return obj.split(argvals[0]) if argvals[0] else list(obj) | |
404f611f | 658 | elif member == 'join': |
659 | assertion(isinstance(obj, list), 'must be applied on a list') | |
660 | assertion(len(argvals) == 1, 'takes exactly one argument') | |
661 | return argvals[0].join(obj) | |
662 | elif member == 'reverse': | |
663 | assertion(not argvals, 'does not take any arguments') | |
664 | obj.reverse() | |
665 | return obj | |
666 | elif member == 'slice': | |
667 | assertion(isinstance(obj, list), 'must be applied on a list') | |
668 | assertion(len(argvals) == 1, 'takes exactly one argument') | |
669 | return obj[argvals[0]:] | |
670 | elif member == 'splice': | |
671 | assertion(isinstance(obj, list), 'must be applied on a list') | |
672 | assertion(argvals, 'takes one or more arguments') | |
57dbe807 | 673 | index, howMany = map(int, (argvals + [len(obj)])[:2]) |
404f611f | 674 | if index < 0: |
675 | index += len(obj) | |
676 | add_items = argvals[2:] | |
677 | res = [] | |
678 | for i in range(index, min(index + howMany, len(obj))): | |
679 | res.append(obj.pop(index)) | |
680 | for i, item in enumerate(add_items): | |
681 | obj.insert(index + i, item) | |
682 | return res | |
683 | elif member == 'unshift': | |
684 | assertion(isinstance(obj, list), 'must be applied on a list') | |
685 | assertion(argvals, 'takes one or more arguments') | |
686 | for item in reversed(argvals): | |
687 | obj.insert(0, item) | |
688 | return obj | |
689 | elif member == 'pop': | |
690 | assertion(isinstance(obj, list), 'must be applied on a list') | |
691 | assertion(not argvals, 'does not take any arguments') | |
692 | if not obj: | |
693 | return | |
694 | return obj.pop() | |
695 | elif member == 'push': | |
696 | assertion(argvals, 'takes one or more arguments') | |
697 | obj.extend(argvals) | |
698 | return obj | |
699 | elif member == 'forEach': | |
700 | assertion(argvals, 'takes one or more arguments') | |
701 | assertion(len(argvals) <= 2, 'takes at-most 2 arguments') | |
702 | f, this = (argvals + [''])[:2] | |
8f53dc44 | 703 | return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)] |
404f611f | 704 | elif member == 'indexOf': |
705 | assertion(argvals, 'takes one or more arguments') | |
706 | assertion(len(argvals) <= 2, 'takes at-most 2 arguments') | |
707 | idx, start = (argvals + [0])[:2] | |
708 | try: | |
709 | return obj.index(idx, start) | |
710 | except ValueError: | |
711 | return -1 | |
f26af78a E |
712 | elif member == 'charCodeAt': |
713 | assertion(isinstance(obj, str), 'must be applied on a string') | |
714 | assertion(len(argvals) == 1, 'takes exactly one argument') | |
715 | idx = argvals[0] if isinstance(argvals[0], int) else 0 | |
716 | if idx >= len(obj): | |
717 | return None | |
718 | return ord(obj[idx]) | |
404f611f | 719 | |
8f53dc44 | 720 | idx = int(member) if isinstance(obj, list) else member |
721 | return obj[idx](argvals, allow_recursion=allow_recursion) | |
404f611f | 722 | |
723 | if remaining: | |
8f53dc44 | 724 | ret, should_abort = self.interpret_statement( |
404f611f | 725 | self._named_object(local_vars, eval_method()) + remaining, |
726 | local_vars, allow_recursion) | |
8f53dc44 | 727 | return ret, should_return or should_abort |
404f611f | 728 | else: |
8f53dc44 | 729 | return eval_method(), should_return |
2b25cb5d | 730 | |
230d5c82 | 731 | elif m and m.group('function'): |
732 | fname = m.group('fname') | |
8f53dc44 | 733 | argvals = [self.interpret_expression(v, local_vars, allow_recursion) |
734 | for v in self._separate(m.group('args'))] | |
404f611f | 735 | if fname in local_vars: |
8f53dc44 | 736 | return local_vars[fname](argvals, allow_recursion=allow_recursion), should_return |
404f611f | 737 | elif fname not in self._functions: |
1f749b66 | 738 | self._functions[fname] = self.extract_function(fname) |
8f53dc44 | 739 | return self._functions[fname](argvals, allow_recursion=allow_recursion), should_return |
740 | ||
741 | raise self.Exception( | |
742 | f'Unsupported JS expression {truncate_string(expr, 20, 20) if expr != stmt else ""}', stmt) | |
9e3f1991 | 743 | |
8f53dc44 | 744 | def interpret_expression(self, expr, local_vars, allow_recursion): |
745 | ret, should_return = self.interpret_statement(expr, local_vars, allow_recursion) | |
746 | if should_return: | |
747 | raise self.Exception('Cannot return from an expression', expr) | |
748 | return ret | |
2b25cb5d | 749 | |
ad25aee2 | 750 | def extract_object(self, objname): |
7769f837 | 751 | _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' |
ad25aee2 JMF |
752 | obj = {} |
753 | obj_m = re.search( | |
0e2d626d S |
754 | r'''(?x) |
755 | (?<!this\.)%s\s*=\s*{\s* | |
756 | (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*) | |
757 | }\s*; | |
758 | ''' % (re.escape(objname), _FUNC_NAME_RE), | |
ad25aee2 | 759 | self.code) |
8f53dc44 | 760 | if not obj_m: |
761 | raise self.Exception(f'Could not find object {objname}') | |
ad25aee2 JMF |
762 | fields = obj_m.group('fields') |
763 | # Currently, it only supports function definitions | |
764 | fields_m = re.finditer( | |
0e2d626d | 765 | r'''(?x) |
49b4ceae | 766 | (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)} |
767 | ''' % (_FUNC_NAME_RE, _NAME_RE), | |
ad25aee2 JMF |
768 | fields) |
769 | for f in fields_m: | |
770 | argnames = f.group('args').split(',') | |
7769f837 | 771 | obj[remove_quotes(f.group('key'))] = self.build_function(argnames, f.group('code')) |
ad25aee2 JMF |
772 | |
773 | return obj | |
774 | ||
404f611f | 775 | def extract_function_code(self, funcname): |
776 | """ @returns argnames, code """ | |
2b25cb5d | 777 | func_m = re.search( |
8f53dc44 | 778 | r'''(?xs) |
230d5c82 | 779 | (?: |
780 | function\s+%(name)s| | |
781 | [{;,]\s*%(name)s\s*=\s*function| | |
49b4ceae | 782 | (?:var|const|let)\s+%(name)s\s*=\s*function |
230d5c82 | 783 | )\s* |
9e3f1991 | 784 | \((?P<args>[^)]*)\)\s* |
8f53dc44 | 785 | (?P<code>{.+})''' % {'name': re.escape(funcname)}, |
2b25cb5d | 786 | self.code) |
1ac7f461 | 787 | code, _ = self._separate_at_paren(func_m.group('code')) |
77ffa957 | 788 | if func_m is None: |
a1c5bd82 | 789 | raise self.Exception(f'Could not find JS function "{funcname}"') |
8f53dc44 | 790 | return [x.strip() for x in func_m.group('args').split(',')], code |
2b25cb5d | 791 | |
404f611f | 792 | def extract_function(self, funcname): |
793 | return self.extract_function_from_code(*self.extract_function_code(funcname)) | |
794 | ||
795 | def extract_function_from_code(self, argnames, code, *global_stack): | |
796 | local_vars = {} | |
797 | while True: | |
798 | mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code) | |
799 | if mobj is None: | |
800 | break | |
801 | start, body_start = mobj.span() | |
1ac7f461 | 802 | body, remaining = self._separate_at_paren(code[body_start - 1:]) |
230d5c82 | 803 | name = self._named_object(local_vars, self.extract_function_from_code( |
804 | [x.strip() for x in mobj.group('args').split(',')], | |
805 | body, local_vars, *global_stack)) | |
404f611f | 806 | code = code[:start] + name + remaining |
807 | return self.build_function(argnames, code, local_vars, *global_stack) | |
ad25aee2 | 808 | |
9e3f1991 | 809 | def call_function(self, funcname, *args): |
404f611f | 810 | return self.extract_function(funcname)(args) |
811 | ||
812 | def build_function(self, argnames, code, *global_stack): | |
813 | global_stack = list(global_stack) or [{}] | |
8f53dc44 | 814 | argnames = tuple(argnames) |
404f611f | 815 | |
8f53dc44 | 816 | def resf(args, kwargs={}, allow_recursion=100): |
49b4ceae | 817 | global_stack[0].update(itertools.zip_longest(argnames, args, fillvalue=None)) |
818 | global_stack[0].update(kwargs) | |
19a03940 | 819 | var_stack = LocalNameSpace(*global_stack) |
d81ba7d4 | 820 | ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1) |
8f53dc44 | 821 | if should_abort: |
822 | return ret | |
2b25cb5d | 823 | return resf |