]>
Commit | Line | Data |
---|---|---|
1 | import collections | |
2 | import contextlib | |
3 | import itertools | |
4 | import json | |
5 | import math | |
6 | import operator | |
7 | import re | |
8 | ||
9 | from .utils import ( | |
10 | NO_DEFAULT, | |
11 | ExtractorError, | |
12 | function_with_repr, | |
13 | js_to_json, | |
14 | remove_quotes, | |
15 | truncate_string, | |
16 | unified_timestamp, | |
17 | write_string, | |
18 | ) | |
19 | ||
20 | ||
21 | def _js_bit_op(op): | |
22 | def zeroise(x): | |
23 | if x in (None, JS_Undefined): | |
24 | return 0 | |
25 | with contextlib.suppress(TypeError): | |
26 | if math.isnan(x): # NB: NaN cannot be checked by membership | |
27 | return 0 | |
28 | return x | |
29 | ||
30 | def wrapped(a, b): | |
31 | return op(zeroise(a), zeroise(b)) & 0xffffffff | |
32 | ||
33 | return wrapped | |
34 | ||
35 | ||
36 | def _js_arith_op(op): | |
37 | ||
38 | def wrapped(a, b): | |
39 | if JS_Undefined in (a, b): | |
40 | return float('nan') | |
41 | return op(a or 0, b or 0) | |
42 | ||
43 | return wrapped | |
44 | ||
45 | ||
46 | def _js_div(a, b): | |
47 | if JS_Undefined in (a, b) or not (a or b): | |
48 | return float('nan') | |
49 | return (a or 0) / b if b else float('inf') | |
50 | ||
51 | ||
52 | def _js_mod(a, b): | |
53 | if JS_Undefined in (a, b) or not b: | |
54 | return float('nan') | |
55 | return (a or 0) % b | |
56 | ||
57 | ||
58 | def _js_exp(a, b): | |
59 | if not b: | |
60 | return 1 # even 0 ** 0 !! | |
61 | elif JS_Undefined in (a, b): | |
62 | return float('nan') | |
63 | return (a or 0) ** b | |
64 | ||
65 | ||
66 | def _js_eq_op(op): | |
67 | ||
68 | def wrapped(a, b): | |
69 | if {a, b} <= {None, JS_Undefined}: | |
70 | return op(a, a) | |
71 | return op(a, b) | |
72 | ||
73 | return wrapped | |
74 | ||
75 | ||
76 | def _js_comp_op(op): | |
77 | ||
78 | def wrapped(a, b): | |
79 | if JS_Undefined in (a, b): | |
80 | return False | |
81 | if isinstance(a, str) or isinstance(b, str): | |
82 | return op(str(a or 0), str(b or 0)) | |
83 | return op(a or 0, b or 0) | |
84 | ||
85 | return wrapped | |
86 | ||
87 | ||
88 | def _js_ternary(cndn, if_true=True, if_false=False): | |
89 | """Simulate JS's ternary operator (cndn?if_true:if_false)""" | |
90 | if cndn in (False, None, 0, '', JS_Undefined): | |
91 | return if_false | |
92 | with contextlib.suppress(TypeError): | |
93 | if math.isnan(cndn): # NB: NaN cannot be checked by membership | |
94 | return if_false | |
95 | return if_true | |
96 | ||
97 | ||
98 | # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence | |
99 | _OPERATORS = { # None => Defined in JSInterpreter._operator | |
100 | '?': None, | |
101 | '??': None, | |
102 | '||': None, | |
103 | '&&': None, | |
104 | ||
105 | '|': _js_bit_op(operator.or_), | |
106 | '^': _js_bit_op(operator.xor), | |
107 | '&': _js_bit_op(operator.and_), | |
108 | ||
109 | '===': operator.is_, | |
110 | '!==': operator.is_not, | |
111 | '==': _js_eq_op(operator.eq), | |
112 | '!=': _js_eq_op(operator.ne), | |
113 | ||
114 | '<=': _js_comp_op(operator.le), | |
115 | '>=': _js_comp_op(operator.ge), | |
116 | '<': _js_comp_op(operator.lt), | |
117 | '>': _js_comp_op(operator.gt), | |
118 | ||
119 | '>>': _js_bit_op(operator.rshift), | |
120 | '<<': _js_bit_op(operator.lshift), | |
121 | ||
122 | '+': _js_arith_op(operator.add), | |
123 | '-': _js_arith_op(operator.sub), | |
124 | ||
125 | '*': _js_arith_op(operator.mul), | |
126 | '%': _js_mod, | |
127 | '/': _js_div, | |
128 | '**': _js_exp, | |
129 | } | |
130 | ||
131 | _COMP_OPERATORS = {'===', '!==', '==', '!=', '<=', '>=', '<', '>'} | |
132 | ||
133 | _NAME_RE = r'[a-zA-Z_$][\w$]*' | |
134 | _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]'))) | |
135 | _QUOTES = '\'"/' | |
136 | ||
137 | ||
138 | class JS_Undefined: | |
139 | pass | |
140 | ||
141 | ||
142 | class JS_Break(ExtractorError): | |
143 | def __init__(self): | |
144 | ExtractorError.__init__(self, 'Invalid break') | |
145 | ||
146 | ||
147 | class JS_Continue(ExtractorError): | |
148 | def __init__(self): | |
149 | ExtractorError.__init__(self, 'Invalid continue') | |
150 | ||
151 | ||
152 | class JS_Throw(ExtractorError): | |
153 | def __init__(self, e): | |
154 | self.error = e | |
155 | ExtractorError.__init__(self, f'Uncaught exception {e}') | |
156 | ||
157 | ||
158 | class LocalNameSpace(collections.ChainMap): | |
159 | def __setitem__(self, key, value): | |
160 | for scope in self.maps: | |
161 | if key in scope: | |
162 | scope[key] = value | |
163 | return | |
164 | self.maps[0][key] = value | |
165 | ||
166 | def __delitem__(self, key): | |
167 | raise NotImplementedError('Deleting is not supported') | |
168 | ||
169 | ||
170 | class Debugger: | |
171 | import sys | |
172 | ENABLED = False and 'pytest' in sys.modules | |
173 | ||
174 | @staticmethod | |
175 | def write(*args, level=100): | |
176 | write_string(f'[debug] JS: {" " * (100 - level)}' | |
177 | f'{" ".join(truncate_string(str(x), 50, 50) for x in args)}\n') | |
178 | ||
179 | @classmethod | |
180 | def wrap_interpreter(cls, f): | |
181 | def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs): | |
182 | if cls.ENABLED and stmt.strip(): | |
183 | cls.write(stmt, level=allow_recursion) | |
184 | try: | |
185 | ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs) | |
186 | except Exception as e: | |
187 | if cls.ENABLED: | |
188 | if isinstance(e, ExtractorError): | |
189 | e = e.orig_msg | |
190 | cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion) | |
191 | raise | |
192 | if cls.ENABLED and stmt.strip(): | |
193 | if should_ret or not repr(ret) == stmt: | |
194 | cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion) | |
195 | return ret, should_ret | |
196 | return interpret_statement | |
197 | ||
198 | ||
199 | class JSInterpreter: | |
200 | __named_object_counter = 0 | |
201 | ||
202 | _RE_FLAGS = { | |
203 | # special knowledge: Python's re flags are bitmask values, current max 128 | |
204 | # invent new bitmask values well above that for literal parsing | |
205 | # TODO: new pattern class to execute matches with these flags | |
206 | 'd': 1024, # Generate indices for substring matches | |
207 | 'g': 2048, # Global search | |
208 | 'i': re.I, # Case-insensitive search | |
209 | 'm': re.M, # Multi-line search | |
210 | 's': re.S, # Allows . to match newline characters | |
211 | 'u': re.U, # Treat a pattern as a sequence of unicode code points | |
212 | 'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string | |
213 | } | |
214 | ||
215 | def __init__(self, code, objects=None): | |
216 | self.code, self._functions = code, {} | |
217 | self._objects = {} if objects is None else objects | |
218 | ||
219 | class Exception(ExtractorError): | |
220 | def __init__(self, msg, expr=None, *args, **kwargs): | |
221 | if expr is not None: | |
222 | msg = f'{msg.rstrip()} in: {truncate_string(expr, 50, 50)}' | |
223 | super().__init__(msg, *args, **kwargs) | |
224 | ||
225 | def _named_object(self, namespace, obj): | |
226 | self.__named_object_counter += 1 | |
227 | name = f'__yt_dlp_jsinterp_obj{self.__named_object_counter}' | |
228 | if callable(obj) and not isinstance(obj, function_with_repr): | |
229 | obj = function_with_repr(obj, f'F<{self.__named_object_counter}>') | |
230 | namespace[name] = obj | |
231 | return name | |
232 | ||
233 | @classmethod | |
234 | def _regex_flags(cls, expr): | |
235 | flags = 0 | |
236 | if not expr: | |
237 | return flags, expr | |
238 | for idx, ch in enumerate(expr): | |
239 | if ch not in cls._RE_FLAGS: | |
240 | break | |
241 | flags |= cls._RE_FLAGS[ch] | |
242 | return flags, expr[idx + 1:] | |
243 | ||
244 | @staticmethod | |
245 | def _separate(expr, delim=',', max_split=None): | |
246 | OP_CHARS = '+-*/%&|^=<>!,;{}:[' | |
247 | if not expr: | |
248 | return | |
249 | counters = {k: 0 for k in _MATCHING_PARENS.values()} | |
250 | start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1 | |
251 | in_quote, escaping, after_op, in_regex_char_group = None, False, True, False | |
252 | for idx, char in enumerate(expr): | |
253 | if not in_quote and char in _MATCHING_PARENS: | |
254 | counters[_MATCHING_PARENS[char]] += 1 | |
255 | elif not in_quote and char in counters: | |
256 | # Something's wrong if we get negative, but ignore it anyway | |
257 | if counters[char]: | |
258 | counters[char] -= 1 | |
259 | elif not escaping: | |
260 | if char in _QUOTES and in_quote in (char, None): | |
261 | if in_quote or after_op or char != '/': | |
262 | in_quote = None if in_quote and not in_regex_char_group else char | |
263 | elif in_quote == '/' and char in '[]': | |
264 | in_regex_char_group = char == '[' | |
265 | escaping = not escaping and in_quote and char == '\\' | |
266 | in_unary_op = (not in_quote and not in_regex_char_group | |
267 | and after_op not in (True, False) and char in '-+') | |
268 | after_op = char if (not in_quote and char in OP_CHARS) else (char.isspace() and after_op) | |
269 | ||
270 | if char != delim[pos] or any(counters.values()) or in_quote or in_unary_op: | |
271 | pos = 0 | |
272 | continue | |
273 | elif pos != delim_len: | |
274 | pos += 1 | |
275 | continue | |
276 | yield expr[start: idx - delim_len] | |
277 | start, pos = idx + 1, 0 | |
278 | splits += 1 | |
279 | if max_split and splits >= max_split: | |
280 | break | |
281 | yield expr[start:] | |
282 | ||
283 | @classmethod | |
284 | def _separate_at_paren(cls, expr, delim=None): | |
285 | if delim is None: | |
286 | delim = expr and _MATCHING_PARENS[expr[0]] | |
287 | separated = list(cls._separate(expr, delim, 1)) | |
288 | if len(separated) < 2: | |
289 | raise cls.Exception(f'No terminating paren {delim}', expr) | |
290 | return separated[0][1:].strip(), separated[1].strip() | |
291 | ||
292 | def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion): | |
293 | if op in ('||', '&&'): | |
294 | if (op == '&&') ^ _js_ternary(left_val): | |
295 | return left_val # short circuiting | |
296 | elif op == '??': | |
297 | if left_val not in (None, JS_Undefined): | |
298 | return left_val | |
299 | elif op == '?': | |
300 | right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1)) | |
301 | ||
302 | right_val = self.interpret_expression(right_expr, local_vars, allow_recursion) | |
303 | if not _OPERATORS.get(op): | |
304 | return right_val | |
305 | ||
306 | try: | |
307 | return _OPERATORS[op](left_val, right_val) | |
308 | except Exception as e: | |
309 | raise self.Exception(f'Failed to evaluate {left_val!r} {op} {right_val!r}', expr, cause=e) | |
310 | ||
311 | def _index(self, obj, idx, allow_undefined=False): | |
312 | if idx == 'length': | |
313 | return len(obj) | |
314 | try: | |
315 | return obj[int(idx)] if isinstance(obj, list) else obj[idx] | |
316 | except Exception as e: | |
317 | if allow_undefined: | |
318 | return JS_Undefined | |
319 | raise self.Exception(f'Cannot get index {idx}', repr(obj), cause=e) | |
320 | ||
321 | def _dump(self, obj, namespace): | |
322 | try: | |
323 | return json.dumps(obj) | |
324 | except TypeError: | |
325 | return self._named_object(namespace, obj) | |
326 | ||
327 | @Debugger.wrap_interpreter | |
328 | def interpret_statement(self, stmt, local_vars, allow_recursion=100): | |
329 | if allow_recursion < 0: | |
330 | raise self.Exception('Recursion limit reached') | |
331 | allow_recursion -= 1 | |
332 | ||
333 | should_return = False | |
334 | sub_statements = list(self._separate(stmt, ';')) or [''] | |
335 | expr = stmt = sub_statements.pop().strip() | |
336 | ||
337 | for sub_stmt in sub_statements: | |
338 | ret, should_return = self.interpret_statement(sub_stmt, local_vars, allow_recursion) | |
339 | if should_return: | |
340 | return ret, should_return | |
341 | ||
342 | m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|(?=["\'])|$)|(?P<throw>throw\s+)', stmt) | |
343 | if m: | |
344 | expr = stmt[len(m.group(0)):].strip() | |
345 | if m.group('throw'): | |
346 | raise JS_Throw(self.interpret_expression(expr, local_vars, allow_recursion)) | |
347 | should_return = not m.group('var') | |
348 | if not expr: | |
349 | return None, should_return | |
350 | ||
351 | if expr[0] in _QUOTES: | |
352 | inner, outer = self._separate(expr, expr[0], 1) | |
353 | if expr[0] == '/': | |
354 | flags, outer = self._regex_flags(outer) | |
355 | # We don't support regex methods yet, so no point compiling it | |
356 | inner = f'{inner}/{flags}' | |
357 | # Avoid https://github.com/python/cpython/issues/74534 | |
358 | # inner = re.compile(inner[1:].replace('[[', r'[\['), flags=flags) | |
359 | else: | |
360 | inner = json.loads(js_to_json(f'{inner}{expr[0]}', strict=True)) | |
361 | if not outer: | |
362 | return inner, should_return | |
363 | expr = self._named_object(local_vars, inner) + outer | |
364 | ||
365 | if expr.startswith('new '): | |
366 | obj = expr[4:] | |
367 | if obj.startswith('Date('): | |
368 | left, right = self._separate_at_paren(obj[4:]) | |
369 | date = unified_timestamp( | |
370 | self.interpret_expression(left, local_vars, allow_recursion), False) | |
371 | if date is None: | |
372 | raise self.Exception(f'Failed to parse date {left!r}', expr) | |
373 | expr = self._dump(int(date * 1000), local_vars) + right | |
374 | else: | |
375 | raise self.Exception(f'Unsupported object {obj}', expr) | |
376 | ||
377 | if expr.startswith('void '): | |
378 | left = self.interpret_expression(expr[5:], local_vars, allow_recursion) | |
379 | return None, should_return | |
380 | ||
381 | if expr.startswith('{'): | |
382 | inner, outer = self._separate_at_paren(expr) | |
383 | # try for object expression (Map) | |
384 | sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)] | |
385 | if all(len(sub_expr) == 2 for sub_expr in sub_expressions): | |
386 | def dict_item(key, val): | |
387 | val = self.interpret_expression(val, local_vars, allow_recursion) | |
388 | if re.match(_NAME_RE, key): | |
389 | return key, val | |
390 | return self.interpret_expression(key, local_vars, allow_recursion), val | |
391 | ||
392 | return dict(dict_item(k, v) for k, v in sub_expressions), should_return | |
393 | ||
394 | inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) | |
395 | if not outer or should_abort: | |
396 | return inner, should_abort or should_return | |
397 | else: | |
398 | expr = self._dump(inner, local_vars) + outer | |
399 | ||
400 | if expr.startswith('('): | |
401 | inner, outer = self._separate_at_paren(expr) | |
402 | inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion) | |
403 | if not outer or should_abort: | |
404 | return inner, should_abort or should_return | |
405 | else: | |
406 | expr = self._dump(inner, local_vars) + outer | |
407 | ||
408 | if expr.startswith('['): | |
409 | inner, outer = self._separate_at_paren(expr) | |
410 | name = self._named_object(local_vars, [ | |
411 | self.interpret_expression(item, local_vars, allow_recursion) | |
412 | for item in self._separate(inner)]) | |
413 | expr = name + outer | |
414 | ||
415 | m = re.match(r'''(?x) | |
416 | (?P<try>try)\s*\{| | |
417 | (?P<if>if)\s*\(| | |
418 | (?P<switch>switch)\s*\(| | |
419 | (?P<for>for)\s*\( | |
420 | ''', expr) | |
421 | md = m.groupdict() if m else {} | |
422 | if md.get('if'): | |
423 | cndn, expr = self._separate_at_paren(expr[m.end() - 1:]) | |
424 | if_expr, expr = self._separate_at_paren(expr.lstrip()) | |
425 | # TODO: "else if" is not handled | |
426 | else_expr = None | |
427 | m = re.match(r'else\s*{', expr) | |
428 | if m: | |
429 | else_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) | |
430 | cndn = _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)) | |
431 | ret, should_abort = self.interpret_statement( | |
432 | if_expr if cndn else else_expr, local_vars, allow_recursion) | |
433 | if should_abort: | |
434 | return ret, True | |
435 | ||
436 | if md.get('try'): | |
437 | try_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) | |
438 | err = None | |
439 | try: | |
440 | ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion) | |
441 | if should_abort: | |
442 | return ret, True | |
443 | except Exception as e: | |
444 | # XXX: This works for now, but makes debugging future issues very hard | |
445 | err = e | |
446 | ||
447 | pending = (None, False) | |
448 | m = re.match(fr'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{', expr) | |
449 | if m: | |
450 | sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) | |
451 | if err: | |
452 | catch_vars = {} | |
453 | if m.group('err'): | |
454 | catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err | |
455 | catch_vars = local_vars.new_child(catch_vars) | |
456 | err, pending = None, self.interpret_statement(sub_expr, catch_vars, allow_recursion) | |
457 | ||
458 | m = re.match(r'finally\s*\{', expr) | |
459 | if m: | |
460 | sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:]) | |
461 | ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion) | |
462 | if should_abort: | |
463 | return ret, True | |
464 | ||
465 | ret, should_abort = pending | |
466 | if should_abort: | |
467 | return ret, True | |
468 | ||
469 | if err: | |
470 | raise err | |
471 | ||
472 | elif md.get('for'): | |
473 | constructor, remaining = self._separate_at_paren(expr[m.end() - 1:]) | |
474 | if remaining.startswith('{'): | |
475 | body, expr = self._separate_at_paren(remaining) | |
476 | else: | |
477 | switch_m = re.match(r'switch\s*\(', remaining) # FIXME | |
478 | if switch_m: | |
479 | switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:]) | |
480 | body, expr = self._separate_at_paren(remaining, '}') | |
481 | body = 'switch(%s){%s}' % (switch_val, body) | |
482 | else: | |
483 | body, expr = remaining, '' | |
484 | start, cndn, increment = self._separate(constructor, ';') | |
485 | self.interpret_expression(start, local_vars, allow_recursion) | |
486 | while True: | |
487 | if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)): | |
488 | break | |
489 | try: | |
490 | ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion) | |
491 | if should_abort: | |
492 | return ret, True | |
493 | except JS_Break: | |
494 | break | |
495 | except JS_Continue: | |
496 | pass | |
497 | self.interpret_expression(increment, local_vars, allow_recursion) | |
498 | ||
499 | elif md.get('switch'): | |
500 | switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:]) | |
501 | switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion) | |
502 | body, expr = self._separate_at_paren(remaining, '}') | |
503 | items = body.replace('default:', 'case default:').split('case ')[1:] | |
504 | for default in (False, True): | |
505 | matched = False | |
506 | for item in items: | |
507 | case, stmt = (i.strip() for i in self._separate(item, ':', 1)) | |
508 | if default: | |
509 | matched = matched or case == 'default' | |
510 | elif not matched: | |
511 | matched = (case != 'default' | |
512 | and switch_val == self.interpret_expression(case, local_vars, allow_recursion)) | |
513 | if not matched: | |
514 | continue | |
515 | try: | |
516 | ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion) | |
517 | if should_abort: | |
518 | return ret | |
519 | except JS_Break: | |
520 | break | |
521 | if matched: | |
522 | break | |
523 | ||
524 | if md: | |
525 | ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion) | |
526 | return ret, should_abort or should_return | |
527 | ||
528 | # Comma separated statements | |
529 | sub_expressions = list(self._separate(expr)) | |
530 | if len(sub_expressions) > 1: | |
531 | for sub_expr in sub_expressions: | |
532 | ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion) | |
533 | if should_abort: | |
534 | return ret, True | |
535 | return ret, False | |
536 | ||
537 | for m in re.finditer(rf'''(?x) | |
538 | (?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})| | |
539 | (?P<var2>{_NAME_RE})(?P<post_sign>\+\+|--)''', expr): | |
540 | var = m.group('var1') or m.group('var2') | |
541 | start, end = m.span() | |
542 | sign = m.group('pre_sign') or m.group('post_sign') | |
543 | ret = local_vars[var] | |
544 | local_vars[var] += 1 if sign[0] == '+' else -1 | |
545 | if m.group('pre_sign'): | |
546 | ret = local_vars[var] | |
547 | expr = expr[:start] + self._dump(ret, local_vars) + expr[end:] | |
548 | ||
549 | if not expr: | |
550 | return None, should_return | |
551 | ||
552 | m = re.match(fr'''(?x) | |
553 | (?P<assign> | |
554 | (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s* | |
555 | (?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})? | |
556 | =(?!=)(?P<expr>.*)$ | |
557 | )|(?P<return> | |
558 | (?!if|return|true|false|null|undefined|NaN)(?P<name>{_NAME_RE})$ | |
559 | )|(?P<indexing> | |
560 | (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$ | |
561 | )|(?P<attribute> | |
562 | (?P<var>{_NAME_RE})(?:(?P<nullish>\?)?\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s* | |
563 | )|(?P<function> | |
564 | (?P<fname>{_NAME_RE})\((?P<args>.*)\)$ | |
565 | )''', expr) | |
566 | if m and m.group('assign'): | |
567 | left_val = local_vars.get(m.group('out')) | |
568 | ||
569 | if not m.group('index'): | |
570 | local_vars[m.group('out')] = self._operator( | |
571 | m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion) | |
572 | return local_vars[m.group('out')], should_return | |
573 | elif left_val in (None, JS_Undefined): | |
574 | raise self.Exception(f'Cannot index undefined variable {m.group("out")}', expr) | |
575 | ||
576 | idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion) | |
577 | if not isinstance(idx, (int, float)): | |
578 | raise self.Exception(f'List index {idx} must be integer', expr) | |
579 | idx = int(idx) | |
580 | left_val[idx] = self._operator( | |
581 | m.group('op'), self._index(left_val, idx), m.group('expr'), expr, local_vars, allow_recursion) | |
582 | return left_val[idx], should_return | |
583 | ||
584 | elif expr.isdigit(): | |
585 | return int(expr), should_return | |
586 | ||
587 | elif expr == 'break': | |
588 | raise JS_Break() | |
589 | elif expr == 'continue': | |
590 | raise JS_Continue() | |
591 | elif expr == 'undefined': | |
592 | return JS_Undefined, should_return | |
593 | elif expr == 'NaN': | |
594 | return float('NaN'), should_return | |
595 | ||
596 | elif m and m.group('return'): | |
597 | return local_vars.get(m.group('name'), JS_Undefined), should_return | |
598 | ||
599 | with contextlib.suppress(ValueError): | |
600 | return json.loads(js_to_json(expr, strict=True)), should_return | |
601 | ||
602 | if m and m.group('indexing'): | |
603 | val = local_vars[m.group('in')] | |
604 | idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion) | |
605 | return self._index(val, idx), should_return | |
606 | ||
607 | for op in _OPERATORS: | |
608 | separated = list(self._separate(expr, op)) | |
609 | right_expr = separated.pop() | |
610 | while True: | |
611 | if op in '?<>*-' and len(separated) > 1 and not separated[-1].strip(): | |
612 | separated.pop() | |
613 | elif not (separated and op == '?' and right_expr.startswith('.')): | |
614 | break | |
615 | right_expr = f'{op}{right_expr}' | |
616 | if op != '-': | |
617 | right_expr = f'{separated.pop()}{op}{right_expr}' | |
618 | if not separated: | |
619 | continue | |
620 | left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion) | |
621 | return self._operator(op, left_val, right_expr, expr, local_vars, allow_recursion), should_return | |
622 | ||
623 | if m and m.group('attribute'): | |
624 | variable, member, nullish = m.group('var', 'member', 'nullish') | |
625 | if not member: | |
626 | member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion) | |
627 | arg_str = expr[m.end():] | |
628 | if arg_str.startswith('('): | |
629 | arg_str, remaining = self._separate_at_paren(arg_str) | |
630 | else: | |
631 | arg_str, remaining = None, arg_str | |
632 | ||
633 | def assertion(cndn, msg): | |
634 | """ assert, but without risk of getting optimized out """ | |
635 | if not cndn: | |
636 | raise self.Exception(f'{member} {msg}', expr) | |
637 | ||
638 | def eval_method(): | |
639 | if (variable, member) == ('console', 'debug'): | |
640 | if Debugger.ENABLED: | |
641 | Debugger.write(self.interpret_expression(f'[{arg_str}]', local_vars, allow_recursion)) | |
642 | return | |
643 | ||
644 | types = { | |
645 | 'String': str, | |
646 | 'Math': float, | |
647 | } | |
648 | obj = local_vars.get(variable, types.get(variable, NO_DEFAULT)) | |
649 | if obj is NO_DEFAULT: | |
650 | if variable not in self._objects: | |
651 | try: | |
652 | self._objects[variable] = self.extract_object(variable) | |
653 | except self.Exception: | |
654 | if not nullish: | |
655 | raise | |
656 | obj = self._objects.get(variable, JS_Undefined) | |
657 | ||
658 | if nullish and obj is JS_Undefined: | |
659 | return JS_Undefined | |
660 | ||
661 | # Member access | |
662 | if arg_str is None: | |
663 | return self._index(obj, member, nullish) | |
664 | ||
665 | # Function call | |
666 | argvals = [ | |
667 | self.interpret_expression(v, local_vars, allow_recursion) | |
668 | for v in self._separate(arg_str)] | |
669 | ||
670 | if obj == str: | |
671 | if member == 'fromCharCode': | |
672 | assertion(argvals, 'takes one or more arguments') | |
673 | return ''.join(map(chr, argvals)) | |
674 | raise self.Exception(f'Unsupported String method {member}', expr) | |
675 | elif obj == float: | |
676 | if member == 'pow': | |
677 | assertion(len(argvals) == 2, 'takes two arguments') | |
678 | return argvals[0] ** argvals[1] | |
679 | raise self.Exception(f'Unsupported Math method {member}', expr) | |
680 | ||
681 | if member == 'split': | |
682 | assertion(argvals, 'takes one or more arguments') | |
683 | assertion(len(argvals) == 1, 'with limit argument is not implemented') | |
684 | return obj.split(argvals[0]) if argvals[0] else list(obj) | |
685 | elif member == 'join': | |
686 | assertion(isinstance(obj, list), 'must be applied on a list') | |
687 | assertion(len(argvals) == 1, 'takes exactly one argument') | |
688 | return argvals[0].join(obj) | |
689 | elif member == 'reverse': | |
690 | assertion(not argvals, 'does not take any arguments') | |
691 | obj.reverse() | |
692 | return obj | |
693 | elif member == 'slice': | |
694 | assertion(isinstance(obj, list), 'must be applied on a list') | |
695 | assertion(len(argvals) == 1, 'takes exactly one argument') | |
696 | return obj[argvals[0]:] | |
697 | elif member == 'splice': | |
698 | assertion(isinstance(obj, list), 'must be applied on a list') | |
699 | assertion(argvals, 'takes one or more arguments') | |
700 | index, howMany = map(int, (argvals + [len(obj)])[:2]) | |
701 | if index < 0: | |
702 | index += len(obj) | |
703 | add_items = argvals[2:] | |
704 | res = [] | |
705 | for i in range(index, min(index + howMany, len(obj))): | |
706 | res.append(obj.pop(index)) | |
707 | for i, item in enumerate(add_items): | |
708 | obj.insert(index + i, item) | |
709 | return res | |
710 | elif member == 'unshift': | |
711 | assertion(isinstance(obj, list), 'must be applied on a list') | |
712 | assertion(argvals, 'takes one or more arguments') | |
713 | for item in reversed(argvals): | |
714 | obj.insert(0, item) | |
715 | return obj | |
716 | elif member == 'pop': | |
717 | assertion(isinstance(obj, list), 'must be applied on a list') | |
718 | assertion(not argvals, 'does not take any arguments') | |
719 | if not obj: | |
720 | return | |
721 | return obj.pop() | |
722 | elif member == 'push': | |
723 | assertion(argvals, 'takes one or more arguments') | |
724 | obj.extend(argvals) | |
725 | return obj | |
726 | elif member == 'forEach': | |
727 | assertion(argvals, 'takes one or more arguments') | |
728 | assertion(len(argvals) <= 2, 'takes at-most 2 arguments') | |
729 | f, this = (argvals + [''])[:2] | |
730 | return [f((item, idx, obj), {'this': this}, allow_recursion) for idx, item in enumerate(obj)] | |
731 | elif member == 'indexOf': | |
732 | assertion(argvals, 'takes one or more arguments') | |
733 | assertion(len(argvals) <= 2, 'takes at-most 2 arguments') | |
734 | idx, start = (argvals + [0])[:2] | |
735 | try: | |
736 | return obj.index(idx, start) | |
737 | except ValueError: | |
738 | return -1 | |
739 | elif member == 'charCodeAt': | |
740 | assertion(isinstance(obj, str), 'must be applied on a string') | |
741 | assertion(len(argvals) == 1, 'takes exactly one argument') | |
742 | idx = argvals[0] if isinstance(argvals[0], int) else 0 | |
743 | if idx >= len(obj): | |
744 | return None | |
745 | return ord(obj[idx]) | |
746 | ||
747 | idx = int(member) if isinstance(obj, list) else member | |
748 | return obj[idx](argvals, allow_recursion=allow_recursion) | |
749 | ||
750 | if remaining: | |
751 | ret, should_abort = self.interpret_statement( | |
752 | self._named_object(local_vars, eval_method()) + remaining, | |
753 | local_vars, allow_recursion) | |
754 | return ret, should_return or should_abort | |
755 | else: | |
756 | return eval_method(), should_return | |
757 | ||
758 | elif m and m.group('function'): | |
759 | fname = m.group('fname') | |
760 | argvals = [self.interpret_expression(v, local_vars, allow_recursion) | |
761 | for v in self._separate(m.group('args'))] | |
762 | if fname in local_vars: | |
763 | return local_vars[fname](argvals, allow_recursion=allow_recursion), should_return | |
764 | elif fname not in self._functions: | |
765 | self._functions[fname] = self.extract_function(fname) | |
766 | return self._functions[fname](argvals, allow_recursion=allow_recursion), should_return | |
767 | ||
768 | raise self.Exception( | |
769 | f'Unsupported JS expression {truncate_string(expr, 20, 20) if expr != stmt else ""}', stmt) | |
770 | ||
771 | def interpret_expression(self, expr, local_vars, allow_recursion): | |
772 | ret, should_return = self.interpret_statement(expr, local_vars, allow_recursion) | |
773 | if should_return: | |
774 | raise self.Exception('Cannot return from an expression', expr) | |
775 | return ret | |
776 | ||
777 | def extract_object(self, objname): | |
778 | _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' | |
779 | obj = {} | |
780 | obj_m = re.search( | |
781 | r'''(?x) | |
782 | (?<!\.)%s\s*=\s*{\s* | |
783 | (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*) | |
784 | }\s*; | |
785 | ''' % (re.escape(objname), _FUNC_NAME_RE), | |
786 | self.code) | |
787 | if not obj_m: | |
788 | raise self.Exception(f'Could not find object {objname}') | |
789 | fields = obj_m.group('fields') | |
790 | # Currently, it only supports function definitions | |
791 | fields_m = re.finditer( | |
792 | r'''(?x) | |
793 | (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)} | |
794 | ''' % (_FUNC_NAME_RE, _NAME_RE), | |
795 | fields) | |
796 | for f in fields_m: | |
797 | argnames = f.group('args').split(',') | |
798 | name = remove_quotes(f.group('key')) | |
799 | obj[name] = function_with_repr(self.build_function(argnames, f.group('code')), f'F<{name}>') | |
800 | ||
801 | return obj | |
802 | ||
803 | def extract_function_code(self, funcname): | |
804 | """ @returns argnames, code """ | |
805 | func_m = re.search( | |
806 | r'''(?xs) | |
807 | (?: | |
808 | function\s+%(name)s| | |
809 | [{;,]\s*%(name)s\s*=\s*function| | |
810 | (?:var|const|let)\s+%(name)s\s*=\s*function | |
811 | )\s* | |
812 | \((?P<args>[^)]*)\)\s* | |
813 | (?P<code>{.+})''' % {'name': re.escape(funcname)}, | |
814 | self.code) | |
815 | if func_m is None: | |
816 | raise self.Exception(f'Could not find JS function "{funcname}"') | |
817 | code, _ = self._separate_at_paren(func_m.group('code')) | |
818 | return [x.strip() for x in func_m.group('args').split(',')], code | |
819 | ||
820 | def extract_function(self, funcname): | |
821 | return function_with_repr( | |
822 | self.extract_function_from_code(*self.extract_function_code(funcname)), | |
823 | f'F<{funcname}>') | |
824 | ||
825 | def extract_function_from_code(self, argnames, code, *global_stack): | |
826 | local_vars = {} | |
827 | while True: | |
828 | mobj = re.search(r'function\((?P<args>[^)]*)\)\s*{', code) | |
829 | if mobj is None: | |
830 | break | |
831 | start, body_start = mobj.span() | |
832 | body, remaining = self._separate_at_paren(code[body_start - 1:]) | |
833 | name = self._named_object(local_vars, self.extract_function_from_code( | |
834 | [x.strip() for x in mobj.group('args').split(',')], | |
835 | body, local_vars, *global_stack)) | |
836 | code = code[:start] + name + remaining | |
837 | return self.build_function(argnames, code, local_vars, *global_stack) | |
838 | ||
839 | def call_function(self, funcname, *args): | |
840 | return self.extract_function(funcname)(args) | |
841 | ||
842 | def build_function(self, argnames, code, *global_stack): | |
843 | global_stack = list(global_stack) or [{}] | |
844 | argnames = tuple(argnames) | |
845 | ||
846 | def resf(args, kwargs={}, allow_recursion=100): | |
847 | global_stack[0].update(itertools.zip_longest(argnames, args, fillvalue=None)) | |
848 | global_stack[0].update(kwargs) | |
849 | var_stack = LocalNameSpace(*global_stack) | |
850 | ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1) | |
851 | if should_abort: | |
852 | return ret | |
853 | return resf |