19 _NAME_RE
= r
'[a-zA-Z_$][\w$]*'
21 # Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
22 _OPERATORS
= { # None => Defined in JSInterpreter._operator
32 '!==': operator
.is_not
,
36 '<=': lambda a
, b
: (a
or 0) <= (b
or 0),
37 '>=': lambda a
, b
: (a
or 0) >= (b
or 0),
38 '<': lambda a
, b
: (a
or 0) < (b
or 0),
39 '>': lambda a
, b
: (a
or 0) > (b
or 0),
41 '>>': operator
.rshift
,
42 '<<': operator
.lshift
,
44 '+': lambda a
, b
: (a
or 0) + (b
or 0),
45 '-': lambda a
, b
: (a
or 0) - (b
or 0),
47 '*': lambda a
, b
: (a
or 0) * (b
or 0),
48 '/': lambda a
, b
: (a
or 0) / b
,
54 _COMP_OPERATORS
= {'===', '!==', '==', '!=', '<=', '>=', '<', '>'}
56 _MATCHING_PARENS
= dict(zip('({[', ')}]'))
60 def _ternary(cndn
, if_true
=True, if_false
=False):
61 """Simulate JS's ternary operator (cndn?if_true:if_false)"""
62 if cndn
in (False, None, 0, ''):
64 with contextlib
.suppress(TypeError):
65 if math
.isnan(cndn
): # NB: NaN cannot be checked by membership
70 class JS_Break(ExtractorError
):
72 ExtractorError
.__init
__(self
, 'Invalid break')
75 class JS_Continue(ExtractorError
):
77 ExtractorError
.__init
__(self
, 'Invalid continue')
80 class LocalNameSpace(collections
.ChainMap
):
81 def __setitem__(self
, key
, value
):
82 for scope
in self
.maps
:
86 self
.maps
[0][key
] = value
88 def __delitem__(self
, key
):
89 raise NotImplementedError('Deleting is not supported')
94 ENABLED
= False and 'pytest' in sys
.modules
97 def write(*args
, level
=100):
98 write_string(f
'[debug] JS: {" " * (100 - level)}'
99 f
'{" ".join(truncate_string(str(x), 50, 50) for x in args)}\n')
102 def wrap_interpreter(cls
, f
):
103 def interpret_statement(self
, stmt
, local_vars
, allow_recursion
, *args
, **kwargs
):
104 if cls
.ENABLED
and stmt
.strip():
105 cls
.write(stmt
, level
=allow_recursion
)
106 ret
, should_ret
= f(self
, stmt
, local_vars
, allow_recursion
, *args
, **kwargs
)
107 if cls
.ENABLED
and stmt
.strip():
108 cls
.write(['->', '=>'][should_ret
], repr(ret
), '<-|', stmt
, level
=allow_recursion
)
109 return ret
, should_ret
110 return interpret_statement
114 __named_object_counter
= 0
116 def __init__(self
, code
, objects
=None):
117 self
.code
, self
._functions
= code
, {}
118 self
._objects
= {} if objects
is None else objects
120 class Exception(ExtractorError
):
121 def __init__(self
, msg
, expr
=None, *args
, **kwargs
):
123 msg
= f
'{msg.rstrip()} in: {truncate_string(expr, 50, 50)}'
124 super().__init
__(msg
, *args
, **kwargs
)
126 def _named_object(self
, namespace
, obj
):
127 self
.__named
_object
_counter
+= 1
128 name
= f
'__yt_dlp_jsinterp_obj{self.__named_object_counter}'
129 namespace
[name
] = obj
133 def _separate(expr
, delim
=',', max_split
=None):
136 counters
= {k: 0 for k in _MATCHING_PARENS.values()}
137 start
, splits
, pos
, delim_len
= 0, 0, 0, len(delim
) - 1
138 in_quote
, escaping
= None, False
139 for idx
, char
in enumerate(expr
):
140 if not in_quote
and char
in _MATCHING_PARENS
:
141 counters
[_MATCHING_PARENS
[char
]] += 1
142 elif not in_quote
and char
in counters
:
144 elif not escaping
and char
in _QUOTES
and in_quote
in (char
, None):
145 in_quote
= None if in_quote
else char
146 escaping
= not escaping
and in_quote
and char
== '\\'
148 if char
!= delim
[pos
] or any(counters
.values()) or in_quote
:
151 elif pos
!= delim_len
:
154 yield expr
[start
: idx
- delim_len
]
155 start
, pos
= idx
+ 1, 0
157 if max_split
and splits
>= max_split
:
162 def _separate_at_paren(cls
, expr
, delim
):
163 separated
= list(cls
._separate
(expr
, delim
, 1))
164 if len(separated
) < 2:
165 raise cls
.Exception(f
'No terminating paren {delim}', expr
)
166 return separated
[0][1:].strip(), separated
[1].strip()
168 def _operator(self
, op
, left_val
, right_expr
, expr
, local_vars
, allow_recursion
):
169 if op
in ('||', '&&'):
170 if (op
== '&&') ^
_ternary(left_val
):
171 return left_val
# short circuiting
173 right_expr
= _ternary(left_val
, *self
._separate
(right_expr
, ':', 1))
175 right_val
= self
.interpret_expression(right_expr
, local_vars
, allow_recursion
)
176 if not _OPERATORS
.get(op
):
180 return _OPERATORS
[op
](left_val
, right_val
)
181 except Exception as e
:
182 raise self
.Exception(f
'Failed to evaluate {left_val!r} {op} {right_val!r}', expr
, cause
=e
)
184 def _index(self
, obj
, idx
):
188 return obj
[int(idx
)] if isinstance(obj
, list) else obj
[idx
]
189 except Exception as e
:
190 raise self
.Exception(f
'Cannot get index {idx}', repr(obj
), cause
=e
)
192 def _dump(self
, obj
, namespace
):
194 return json
.dumps(obj
)
196 return self
._named
_object
(namespace
, obj
)
198 @Debugger.wrap_interpreter
199 def interpret_statement(self
, stmt
, local_vars
, allow_recursion
=100):
200 if allow_recursion
< 0:
201 raise self
.Exception('Recursion limit reached')
204 should_return
= False
205 sub_statements
= list(self
._separate
(stmt
, ';')) or ['']
206 expr
= stmt
= sub_statements
.pop().strip()
208 for sub_stmt
in sub_statements
:
209 ret
, should_return
= self
.interpret_statement(sub_stmt
, local_vars
, allow_recursion
)
211 return ret
, should_return
213 m
= re
.match(r
'(?P<var>(?:var|const|let)\s)|return(?:\s+|$)', stmt
)
215 expr
= stmt
[len(m
.group(0)):].strip()
216 should_return
= not m
.group('var')
218 return None, should_return
220 if expr
[0] in _QUOTES
:
221 inner
, outer
= self
._separate
(expr
, expr
[0], 1)
222 inner
= json
.loads(js_to_json(f
'{inner}{expr[0]}', strict
=True))
224 return inner
, should_return
225 expr
= self
._named
_object
(local_vars
, inner
) + outer
227 if expr
.startswith('new '):
229 if obj
.startswith('Date('):
230 left
, right
= self
._separate
_at
_paren
(obj
[4:], ')')
231 expr
= unified_timestamp(
232 self
.interpret_expression(left
, local_vars
, allow_recursion
), False)
234 raise self
.Exception(f
'Failed to parse date {left!r}', expr
)
235 expr
= self
._dump
(int(expr
* 1000), local_vars
) + right
237 raise self
.Exception(f
'Unsupported object {obj}', expr
)
239 if expr
.startswith('void '):
240 left
= self
.interpret_expression(expr
[5:], local_vars
, allow_recursion
)
241 return None, should_return
243 if expr
.startswith('{'):
244 inner
, outer
= self
._separate
_at
_paren
(expr
, '}')
245 inner
, should_abort
= self
.interpret_statement(inner
, local_vars
, allow_recursion
)
246 if not outer
or should_abort
:
247 return inner
, should_abort
or should_return
249 expr
= self
._dump
(inner
, local_vars
) + outer
251 if expr
.startswith('('):
252 inner
, outer
= self
._separate
_at
_paren
(expr
, ')')
253 inner
, should_abort
= self
.interpret_statement(inner
, local_vars
, allow_recursion
)
254 if not outer
or should_abort
:
255 return inner
, should_abort
or should_return
257 expr
= self
._dump
(inner
, local_vars
) + outer
259 if expr
.startswith('['):
260 inner
, outer
= self
._separate
_at
_paren
(expr
, ']')
261 name
= self
._named
_object
(local_vars
, [
262 self
.interpret_expression(item
, local_vars
, allow_recursion
)
263 for item
in self
._separate
(inner
)])
266 m
= re
.match(r
'(?P<try>try|finally)\s*|(?:(?P<catch>catch)|(?P<for>for)|(?P<switch>switch))\s*\(', expr
)
267 if m
and m
.group('try'):
268 if expr
[m
.end()] == '{':
269 try_expr
, expr
= self
._separate
_at
_paren
(expr
[m
.end():], '}')
271 try_expr
, expr
= expr
[m
.end() - 1:], ''
272 ret
, should_abort
= self
.interpret_statement(try_expr
, local_vars
, allow_recursion
)
275 ret
, should_abort
= self
.interpret_statement(expr
, local_vars
, allow_recursion
)
276 return ret
, should_abort
or should_return
278 elif m
and m
.group('catch'):
279 # We ignore the catch block
280 _
, expr
= self
._separate
_at
_paren
(expr
, '}')
281 ret
, should_abort
= self
.interpret_statement(expr
, local_vars
, allow_recursion
)
282 return ret
, should_abort
or should_return
284 elif m
and m
.group('for'):
285 constructor
, remaining
= self
._separate
_at
_paren
(expr
[m
.end() - 1:], ')')
286 if remaining
.startswith('{'):
287 body
, expr
= self
._separate
_at
_paren
(remaining
, '}')
289 switch_m
= re
.match(r
'switch\s*\(', remaining
) # FIXME
291 switch_val
, remaining
= self
._separate
_at
_paren
(remaining
[switch_m
.end() - 1:], ')')
292 body
, expr
= self
._separate
_at
_paren
(remaining
, '}')
293 body
= 'switch(%s){%s}' % (switch_val
, body
)
295 body
, expr
= remaining
, ''
296 start
, cndn
, increment
= self
._separate
(constructor
, ';')
297 self
.interpret_expression(start
, local_vars
, allow_recursion
)
299 if not _ternary(self
.interpret_expression(cndn
, local_vars
, allow_recursion
)):
302 ret
, should_abort
= self
.interpret_statement(body
, local_vars
, allow_recursion
)
309 self
.interpret_expression(increment
, local_vars
, allow_recursion
)
310 ret
, should_abort
= self
.interpret_statement(expr
, local_vars
, allow_recursion
)
311 return ret
, should_abort
or should_return
313 elif m
and m
.group('switch'):
314 switch_val
, remaining
= self
._separate
_at
_paren
(expr
[m
.end() - 1:], ')')
315 switch_val
= self
.interpret_expression(switch_val
, local_vars
, allow_recursion
)
316 body
, expr
= self
._separate
_at
_paren
(remaining
, '}')
317 items
= body
.replace('default:', 'case default:').split('case ')[1:]
318 for default
in (False, True):
321 case
, stmt
= (i
.strip() for i
in self
._separate
(item
, ':', 1))
323 matched
= matched
or case
== 'default'
325 matched
= (case
!= 'default'
326 and switch_val
== self
.interpret_expression(case
, local_vars
, allow_recursion
))
330 ret
, should_abort
= self
.interpret_statement(stmt
, local_vars
, allow_recursion
)
337 ret
, should_abort
= self
.interpret_statement(expr
, local_vars
, allow_recursion
)
338 return ret
, should_abort
or should_return
340 # Comma separated statements
341 sub_expressions
= list(self
._separate
(expr
))
342 if len(sub_expressions
) > 1:
343 for sub_expr
in sub_expressions
:
344 ret
, should_abort
= self
.interpret_statement(sub_expr
, local_vars
, allow_recursion
)
349 for m
in re
.finditer(rf
'''(?x)
350 (?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})|
351 (?P<var2>{_NAME_RE})(?P<post_sign>\+\+|--)''', expr
):
352 var
= m
.group('var1') or m
.group('var2')
353 start
, end
= m
.span()
354 sign
= m
.group('pre_sign') or m
.group('post_sign')
355 ret
= local_vars
[var
]
356 local_vars
[var
] += 1 if sign
[0] == '+' else -1
357 if m
.group('pre_sign'):
358 ret
= local_vars
[var
]
359 expr
= expr
[:start
] + self
._dump
(ret
, local_vars
) + expr
[end
:]
362 return None, should_return
364 m
= re
.match(fr
'''(?x)
366 (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
367 (?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})?
370 (?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
372 (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
374 (?P<var>{_NAME_RE})(?:\.(?P<member>[^(]+)|\[(?P<member2>[^\]]+)\])\s*
376 (?P<fname>{_NAME_RE})\((?P<args>.*)\)$
378 if m
and m
.group('assign'):
379 left_val
= local_vars
.get(m
.group('out'))
381 if not m
.group('index'):
382 local_vars
[m
.group('out')] = self
._operator
(
383 m
.group('op'), left_val
, m
.group('expr'), expr
, local_vars
, allow_recursion
)
384 return local_vars
[m
.group('out')], should_return
385 elif left_val
is None:
386 raise self
.Exception(f
'Cannot index undefined variable {m.group("out")}', expr
)
388 idx
= self
.interpret_expression(m
.group('index'), local_vars
, allow_recursion
)
389 if not isinstance(idx
, (int, float)):
390 raise self
.Exception(f
'List index {idx} must be integer', expr
)
392 left_val
[idx
] = self
._operator
(
393 m
.group('op'), left_val
[idx
], m
.group('expr'), expr
, local_vars
, allow_recursion
)
394 return left_val
[idx
], should_return
397 return int(expr
), should_return
399 elif expr
== 'break':
401 elif expr
== 'continue':
404 elif m
and m
.group('return'):
405 return local_vars
[m
.group('name')], should_return
407 with contextlib
.suppress(ValueError):
408 return json
.loads(js_to_json(expr
, strict
=True)), should_return
410 if m
and m
.group('indexing'):
411 val
= local_vars
[m
.group('in')]
412 idx
= self
.interpret_expression(m
.group('idx'), local_vars
, allow_recursion
)
413 return self
._index
(val
, idx
), should_return
415 for op
in _OPERATORS
:
416 separated
= list(self
._separate
(expr
, op
))
417 right_expr
= separated
.pop()
418 while op
in '<>*-' and len(separated
) > 1 and not separated
[-1].strip():
420 right_expr
= f
'{op}{right_expr}'
422 right_expr
= f
'{separated.pop()}{op}{right_expr}'
425 left_val
= self
.interpret_expression(op
.join(separated
), local_vars
, allow_recursion
)
426 return self
._operator
(op
, left_val
, right_expr
, expr
, local_vars
, allow_recursion
), should_return
428 if m
and m
.group('attribute'):
429 variable
= m
.group('var')
430 member
= m
.group('member')
432 member
= self
.interpret_expression(m
.group('member2'), local_vars
, allow_recursion
)
433 arg_str
= expr
[m
.end():]
434 if arg_str
.startswith('('):
435 arg_str
, remaining
= self
._separate
_at
_paren
(arg_str
, ')')
437 arg_str
, remaining
= None, arg_str
439 def assertion(cndn
, msg
):
440 """ assert, but without risk of getting optimized out """
442 raise self
.Exception(f
'{member} {msg}', expr
)
445 if (variable
, member
) == ('console', 'debug'):
447 Debugger
.write(self
.interpret_expression(f
'[{arg_str}]', local_vars
, allow_recursion
))
454 obj
= local_vars
.get(variable
, types
.get(variable
, NO_DEFAULT
))
455 if obj
is NO_DEFAULT
:
456 if variable
not in self
._objects
:
457 self
._objects
[variable
] = self
.extract_object(variable
)
458 obj
= self
._objects
[variable
]
462 return self
._index
(obj
, member
)
466 self
.interpret_expression(v
, local_vars
, allow_recursion
)
467 for v
in self
._separate
(arg_str
)]
470 if member
== 'fromCharCode':
471 assertion(argvals
, 'takes one or more arguments')
472 return ''.join(map(chr, argvals
))
473 raise self
.Exception(f
'Unsupported String method {member}', expr
)
476 assertion(len(argvals
) == 2, 'takes two arguments')
477 return argvals
[0] ** argvals
[1]
478 raise self
.Exception(f
'Unsupported Math method {member}', expr
)
480 if member
== 'split':
481 assertion(argvals
, 'takes one or more arguments')
482 assertion(len(argvals
) == 1, 'with limit argument is not implemented')
483 return obj
.split(argvals
[0]) if argvals
[0] else list(obj
)
484 elif member
== 'join':
485 assertion(isinstance(obj
, list), 'must be applied on a list')
486 assertion(len(argvals
) == 1, 'takes exactly one argument')
487 return argvals
[0].join(obj
)
488 elif member
== 'reverse':
489 assertion(not argvals
, 'does not take any arguments')
492 elif member
== 'slice':
493 assertion(isinstance(obj
, list), 'must be applied on a list')
494 assertion(len(argvals
) == 1, 'takes exactly one argument')
495 return obj
[argvals
[0]:]
496 elif member
== 'splice':
497 assertion(isinstance(obj
, list), 'must be applied on a list')
498 assertion(argvals
, 'takes one or more arguments')
499 index
, howMany
= map(int, (argvals
+ [len(obj
)])[:2])
502 add_items
= argvals
[2:]
504 for i
in range(index
, min(index
+ howMany
, len(obj
))):
505 res
.append(obj
.pop(index
))
506 for i
, item
in enumerate(add_items
):
507 obj
.insert(index
+ i
, item
)
509 elif member
== 'unshift':
510 assertion(isinstance(obj
, list), 'must be applied on a list')
511 assertion(argvals
, 'takes one or more arguments')
512 for item
in reversed(argvals
):
515 elif member
== 'pop':
516 assertion(isinstance(obj
, list), 'must be applied on a list')
517 assertion(not argvals
, 'does not take any arguments')
521 elif member
== 'push':
522 assertion(argvals
, 'takes one or more arguments')
525 elif member
== 'forEach':
526 assertion(argvals
, 'takes one or more arguments')
527 assertion(len(argvals
) <= 2, 'takes at-most 2 arguments')
528 f
, this
= (argvals
+ [''])[:2]
529 return [f((item
, idx
, obj
), {'this': this}
, allow_recursion
) for idx
, item
in enumerate(obj
)]
530 elif member
== 'indexOf':
531 assertion(argvals
, 'takes one or more arguments')
532 assertion(len(argvals
) <= 2, 'takes at-most 2 arguments')
533 idx
, start
= (argvals
+ [0])[:2]
535 return obj
.index(idx
, start
)
539 idx
= int(member
) if isinstance(obj
, list) else member
540 return obj
[idx
](argvals
, allow_recursion
=allow_recursion
)
543 ret
, should_abort
= self
.interpret_statement(
544 self
._named
_object
(local_vars
, eval_method()) + remaining
,
545 local_vars
, allow_recursion
)
546 return ret
, should_return
or should_abort
548 return eval_method(), should_return
550 elif m
and m
.group('function'):
551 fname
= m
.group('fname')
552 argvals
= [self
.interpret_expression(v
, local_vars
, allow_recursion
)
553 for v
in self
._separate
(m
.group('args'))]
554 if fname
in local_vars
:
555 return local_vars
[fname
](argvals
, allow_recursion
=allow_recursion
), should_return
556 elif fname
not in self
._functions
:
557 self
._functions
[fname
] = self
.extract_function(fname
)
558 return self
._functions
[fname
](argvals
, allow_recursion
=allow_recursion
), should_return
560 raise self
.Exception(
561 f
'Unsupported JS expression {truncate_string(expr, 20, 20) if expr != stmt else ""}', stmt
)
563 def interpret_expression(self
, expr
, local_vars
, allow_recursion
):
564 ret
, should_return
= self
.interpret_statement(expr
, local_vars
, allow_recursion
)
566 raise self
.Exception('Cannot return from an expression', expr
)
569 def extract_object(self
, objname
):
570 _FUNC_NAME_RE
= r
'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
574 (?<!this\.)%s\s*=\s*{\s*
575 (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
577 ''' % (re
.escape(objname
), _FUNC_NAME_RE
),
580 raise self
.Exception(f
'Could not find object {objname}')
581 fields
= obj_m
.group('fields')
582 # Currently, it only supports function definitions
583 fields_m
= re
.finditer(
585 (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
586 ''' % (_FUNC_NAME_RE
, _NAME_RE
),
589 argnames
= f
.group('args').split(',')
590 obj
[remove_quotes(f
.group('key'))] = self
.build_function(argnames
, f
.group('code'))
594 def extract_function_code(self
, funcname
):
595 """ @returns argnames, code """
600 [{;,]\s*%(name)s\s*=\s*function|
601 (?:var|const|let)\s+%(name)s\s*=\s*function
603 \((?P<args>[^)]*)\)\s*
604 (?P<code>{.+})''' % {'name': re.escape(funcname)}
,
606 code
, _
= self
._separate
_at
_paren
(func_m
.group('code'), '}')
608 raise self
.Exception(f
'Could not find JS function "{funcname}"')
609 return [x
.strip() for x
in func_m
.group('args').split(',')], code
611 def extract_function(self
, funcname
):
612 return self
.extract_function_from_code(*self
.extract_function_code(funcname
))
614 def extract_function_from_code(self
, argnames
, code
, *global_stack
):
617 mobj
= re
.search(r
'function\((?P<args>[^)]*)\)\s*{', code
)
620 start
, body_start
= mobj
.span()
621 body
, remaining
= self
._separate
_at
_paren
(code
[body_start
- 1:], '}')
622 name
= self
._named
_object
(local_vars
, self
.extract_function_from_code(
623 [x
.strip() for x
in mobj
.group('args').split(',')],
624 body
, local_vars
, *global_stack
))
625 code
= code
[:start
] + name
+ remaining
626 return self
.build_function(argnames
, code
, local_vars
, *global_stack
)
628 def call_function(self
, funcname
, *args
):
629 return self
.extract_function(funcname
)(args
)
631 def build_function(self
, argnames
, code
, *global_stack
):
632 global_stack
= list(global_stack
) or [{}]
633 argnames
= tuple(argnames
)
635 def resf(args
, kwargs
={}, allow_recursion
=100):
636 global_stack
[0].update(itertools
.zip_longest(argnames
, args
, fillvalue
=None))
637 global_stack
[0].update(kwargs
)
638 var_stack
= LocalNameSpace(*global_stack
)
639 ret
, should_abort
= self
.interpret_statement(code
.replace('\n', ''), var_stack
, allow_recursion
- 1)