7 from .utils
import ExtractorError
, remove_quotes
13 ('>>', operator
.rshift
),
14 ('<<', operator
.lshift
),
18 ('/', operator
.truediv
),
21 _ASSIGN_OPERATORS
= [(op
+ '=', opfunc
) for op
, opfunc
in _OPERATORS
]
22 _ASSIGN_OPERATORS
.append(('=', (lambda cur
, right
: right
)))
24 _NAME_RE
= r
'[a-zA-Z_$][a-zA-Z_$0-9]*'
26 _MATCHING_PARENS
= dict(zip('({[', ')}]'))
29 class JS_Break(ExtractorError
):
31 ExtractorError
.__init
__(self
, 'Invalid break')
34 class JS_Continue(ExtractorError
):
36 ExtractorError
.__init
__(self
, 'Invalid continue')
39 class LocalNameSpace(collections
.ChainMap
):
40 def __setitem__(self
, key
, value
):
41 for scope
in self
.maps
:
45 self
.maps
[0][key
] = value
47 def __delitem__(self
, key
):
48 raise NotImplementedError('Deleting is not supported')
52 def __init__(self
, code
, objects
=None):
57 self
._objects
= objects
58 self
.__named
_object
_counter
= 0
60 def _named_object(self
, namespace
, obj
):
61 self
.__named
_object
_counter
+= 1
62 name
= f
'__yt_dlp_jsinterp_obj{self.__named_object_counter}'
67 def _separate(expr
, delim
=',', max_split
=None):
70 counters
= {k: 0 for k in _MATCHING_PARENS.values()}
71 start
, splits
, pos
, delim_len
= 0, 0, 0, len(delim
) - 1
72 for idx
, char
in enumerate(expr
):
73 if char
in _MATCHING_PARENS
:
74 counters
[_MATCHING_PARENS
[char
]] += 1
75 elif char
in counters
:
77 if char
!= delim
[pos
] or any(counters
.values()):
80 elif pos
!= delim_len
:
83 yield expr
[start
: idx
- delim_len
]
84 start
, pos
= idx
+ 1, 0
86 if max_split
and splits
>= max_split
:
91 def _separate_at_paren(expr
, delim
):
92 separated
= list(JSInterpreter
._separate
(expr
, delim
, 1))
93 if len(separated
) < 2:
94 raise ExtractorError(f
'No terminating paren {delim} in {expr}')
95 return separated
[0][1:].strip(), separated
[1].strip()
97 def interpret_statement(self
, stmt
, local_vars
, allow_recursion
=100):
98 if allow_recursion
< 0:
99 raise ExtractorError('Recursion limit reached')
101 sub_statements
= list(self
._separate
(stmt
, ';'))
102 stmt
= (sub_statements
or ['']).pop()
103 for sub_stmt
in sub_statements
:
104 ret
, should_abort
= self
.interpret_statement(sub_stmt
, local_vars
, allow_recursion
- 1)
110 stmt_m
= re
.match(r
'var\s', stmt
)
112 expr
= stmt
[len(stmt_m
.group(0)):]
114 return_m
= re
.match(r
'return(?:\s+|$)', stmt
)
116 expr
= stmt
[len(return_m
.group(0)):]
119 # Try interpreting it as an expression
122 v
= self
.interpret_expression(expr
, local_vars
, allow_recursion
)
123 return v
, should_abort
125 def interpret_expression(self
, expr
, local_vars
, allow_recursion
):
127 if expr
== '': # Empty expression
130 if expr
.startswith('{'):
131 inner
, outer
= self
._separate
_at
_paren
(expr
, '}')
132 inner
, should_abort
= self
.interpret_statement(inner
, local_vars
, allow_recursion
- 1)
133 if not outer
or should_abort
:
136 expr
= json
.dumps(inner
) + outer
138 if expr
.startswith('('):
139 inner
, outer
= self
._separate
_at
_paren
(expr
, ')')
140 inner
= self
.interpret_expression(inner
, local_vars
, allow_recursion
)
144 expr
= json
.dumps(inner
) + outer
146 if expr
.startswith('['):
147 inner
, outer
= self
._separate
_at
_paren
(expr
, ']')
148 name
= self
._named
_object
(local_vars
, [
149 self
.interpret_expression(item
, local_vars
, allow_recursion
)
150 for item
in self
._separate
(inner
)])
153 m
= re
.match(r
'try\s*', expr
)
155 if expr
[m
.end()] == '{':
156 try_expr
, expr
= self
._separate
_at
_paren
(expr
[m
.end():], '}')
158 try_expr
, expr
= expr
[m
.end() - 1:], ''
159 ret
, should_abort
= self
.interpret_statement(try_expr
, local_vars
, allow_recursion
- 1)
162 return self
.interpret_statement(expr
, local_vars
, allow_recursion
- 1)[0]
164 m
= re
.match(r
'catch\s*\(', expr
)
166 # We ignore the catch block
167 _
, expr
= self
._separate
_at
_paren
(expr
, '}')
168 return self
.interpret_statement(expr
, local_vars
, allow_recursion
- 1)[0]
170 m
= re
.match(r
'for\s*\(', expr
)
172 constructor
, remaining
= self
._separate
_at
_paren
(expr
[m
.end() - 1:], ')')
173 if remaining
.startswith('{'):
174 body
, expr
= self
._separate
_at
_paren
(remaining
, '}')
176 m
= re
.match(r
'switch\s*\(', remaining
) # FIXME
178 switch_val
, remaining
= self
._separate
_at
_paren
(remaining
[m
.end() - 1:], ')')
179 body
, expr
= self
._separate
_at
_paren
(remaining
, '}')
180 body
= 'switch(%s){%s}' % (switch_val
, body
)
182 body
, expr
= remaining
, ''
183 start
, cndn
, increment
= self
._separate
(constructor
, ';')
184 if self
.interpret_statement(start
, local_vars
, allow_recursion
- 1)[1]:
185 raise ExtractorError(
186 f
'Premature return in the initialization of a for loop in {constructor!r}')
188 if not self
.interpret_expression(cndn
, local_vars
, allow_recursion
):
191 ret
, should_abort
= self
.interpret_statement(body
, local_vars
, allow_recursion
- 1)
198 if self
.interpret_statement(increment
, local_vars
, allow_recursion
- 1)[1]:
199 raise ExtractorError(
200 f
'Premature return in the initialization of a for loop in {constructor!r}')
201 return self
.interpret_statement(expr
, local_vars
, allow_recursion
- 1)[0]
203 m
= re
.match(r
'switch\s*\(', expr
)
205 switch_val
, remaining
= self
._separate
_at
_paren
(expr
[m
.end() - 1:], ')')
206 switch_val
= self
.interpret_expression(switch_val
, local_vars
, allow_recursion
)
207 body
, expr
= self
._separate
_at
_paren
(remaining
, '}')
208 items
= body
.replace('default:', 'case default:').split('case ')[1:]
209 for default
in (False, True):
212 case
, stmt
= (i
.strip() for i
in self
._separate
(item
, ':', 1))
214 matched
= matched
or case
== 'default'
216 matched
= case
!= 'default' and switch_val
== self
.interpret_expression(case
, local_vars
, allow_recursion
)
220 ret
, should_abort
= self
.interpret_statement(stmt
, local_vars
, allow_recursion
- 1)
227 return self
.interpret_statement(expr
, local_vars
, allow_recursion
- 1)[0]
229 # Comma separated statements
230 sub_expressions
= list(self
._separate
(expr
))
231 expr
= sub_expressions
.pop().strip() if sub_expressions
else ''
232 for sub_expr
in sub_expressions
:
233 self
.interpret_expression(sub_expr
, local_vars
, allow_recursion
)
235 for m
in re
.finditer(rf
'''(?x)
236 (?P<pre_sign>\+\+|--)(?P<var1>{_NAME_RE})|
237 (?P<var2>{_NAME_RE})(?P<post_sign>\+\+|--)''', expr
):
238 var
= m
.group('var1') or m
.group('var2')
239 start
, end
= m
.span()
240 sign
= m
.group('pre_sign') or m
.group('post_sign')
241 ret
= local_vars
[var
]
242 local_vars
[var
] += 1 if sign
[0] == '+' else -1
243 if m
.group('pre_sign'):
244 ret
= local_vars
[var
]
245 expr
= expr
[:start
] + json
.dumps(ret
) + expr
[end
:]
247 for op
, opfunc
in _ASSIGN_OPERATORS
:
248 m
= re
.match(rf
'''(?x)
249 (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?
251 (?P<expr>.*)$''', expr
)
254 right_val
= self
.interpret_expression(m
.group('expr'), local_vars
, allow_recursion
)
256 if m
.groupdict().get('index'):
257 lvar
= local_vars
[m
.group('out')]
258 idx
= self
.interpret_expression(m
.group('index'), local_vars
, allow_recursion
)
259 if not isinstance(idx
, int):
260 raise ExtractorError(f
'List indices must be integers: {idx}')
262 val
= opfunc(cur
, right_val
)
266 cur
= local_vars
.get(m
.group('out'))
267 val
= opfunc(cur
, right_val
)
268 local_vars
[m
.group('out')] = val
276 elif expr
== 'continue':
280 r
'(?!if|return|true|false|null)(?P<name>%s)$' % _NAME_RE
,
283 return local_vars
[var_m
.group('name')]
285 with contextlib
.suppress(ValueError):
286 return json
.loads(expr
)
289 r
'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE
, expr
)
291 val
= local_vars
[m
.group('in')]
292 idx
= self
.interpret_expression(m
.group('idx'), local_vars
, allow_recursion
)
295 for op
, opfunc
in _OPERATORS
:
296 separated
= list(self
._separate
(expr
, op
))
297 if len(separated
) < 2:
299 right_val
= separated
.pop()
300 left_val
= op
.join(separated
)
301 left_val
, should_abort
= self
.interpret_statement(
302 left_val
, local_vars
, allow_recursion
- 1)
304 raise ExtractorError(f
'Premature left-side return of {op} in {expr!r}')
305 right_val
, should_abort
= self
.interpret_statement(
306 right_val
, local_vars
, allow_recursion
- 1)
308 raise ExtractorError(f
'Premature right-side return of {op} in {expr!r}')
309 return opfunc(left_val
or 0, right_val
)
312 r
'(?P<var>%s)(?:\.(?P<member>[^(]+)|\[(?P<member2>[^]]+)\])\s*' % _NAME_RE
,
315 variable
= m
.group('var')
316 member
= remove_quotes(m
.group('member') or m
.group('member2'))
317 arg_str
= expr
[m
.end():]
318 if arg_str
.startswith('('):
319 arg_str
, remaining
= self
._separate
_at
_paren
(arg_str
, ')')
321 arg_str
, remaining
= None, arg_str
323 def assertion(cndn
, msg
):
324 """ assert, but without risk of getting optimized out """
326 raise ExtractorError(f
'{member} {msg}: {expr}')
330 if variable
== 'String':
332 elif variable
in local_vars
:
333 obj
= local_vars
[variable
]
335 if variable
not in self
._objects
:
336 self
._objects
[variable
] = self
.extract_object(variable
)
337 obj
= self
._objects
[variable
]
341 if member
== 'length':
347 self
.interpret_expression(v
, local_vars
, allow_recursion
)
348 for v
in self
._separate
(arg_str
)]
351 if member
== 'fromCharCode':
352 assertion(argvals
, 'takes one or more arguments')
353 return ''.join(map(chr, argvals
))
354 raise ExtractorError(f
'Unsupported string method {member}')
356 if member
== 'split':
357 assertion(argvals
, 'takes one or more arguments')
358 assertion(argvals
== [''], 'with arguments is not implemented')
360 elif member
== 'join':
361 assertion(isinstance(obj
, list), 'must be applied on a list')
362 assertion(len(argvals
) == 1, 'takes exactly one argument')
363 return argvals
[0].join(obj
)
364 elif member
== 'reverse':
365 assertion(not argvals
, 'does not take any arguments')
368 elif member
== 'slice':
369 assertion(isinstance(obj
, list), 'must be applied on a list')
370 assertion(len(argvals
) == 1, 'takes exactly one argument')
371 return obj
[argvals
[0]:]
372 elif member
== 'splice':
373 assertion(isinstance(obj
, list), 'must be applied on a list')
374 assertion(argvals
, 'takes one or more arguments')
375 index
, howMany
= map(int, (argvals
+ [len(obj
)])[:2])
378 add_items
= argvals
[2:]
380 for i
in range(index
, min(index
+ howMany
, len(obj
))):
381 res
.append(obj
.pop(index
))
382 for i
, item
in enumerate(add_items
):
383 obj
.insert(index
+ i
, item
)
385 elif member
== 'unshift':
386 assertion(isinstance(obj
, list), 'must be applied on a list')
387 assertion(argvals
, 'takes one or more arguments')
388 for item
in reversed(argvals
):
391 elif member
== 'pop':
392 assertion(isinstance(obj
, list), 'must be applied on a list')
393 assertion(not argvals
, 'does not take any arguments')
397 elif member
== 'push':
398 assertion(argvals
, 'takes one or more arguments')
401 elif member
== 'forEach':
402 assertion(argvals
, 'takes one or more arguments')
403 assertion(len(argvals
) <= 2, 'takes at-most 2 arguments')
404 f
, this
= (argvals
+ [''])[:2]
405 return [f((item
, idx
, obj
), this
=this
) for idx
, item
in enumerate(obj
)]
406 elif member
== 'indexOf':
407 assertion(argvals
, 'takes one or more arguments')
408 assertion(len(argvals
) <= 2, 'takes at-most 2 arguments')
409 idx
, start
= (argvals
+ [0])[:2]
411 return obj
.index(idx
, start
)
415 if isinstance(obj
, list):
417 return obj
[member
](argvals
)
420 return self
.interpret_expression(
421 self
._named
_object
(local_vars
, eval_method()) + remaining
,
422 local_vars
, allow_recursion
)
426 m
= re
.match(r
'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE
, expr
)
428 fname
= m
.group('func')
430 int(v
) if v
.isdigit() else local_vars
[v
]
431 for v
in self
._separate
(m
.group('args')))
432 if fname
in local_vars
:
433 return local_vars
[fname
](argvals
)
434 elif fname
not in self
._functions
:
435 self
._functions
[fname
] = self
.extract_function(fname
)
436 return self
._functions
[fname
](argvals
)
439 raise ExtractorError('Unsupported JS expression %r' % expr
)
441 def extract_object(self
, objname
):
442 _FUNC_NAME_RE
= r
'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')'''
446 (?<!this\.)%s\s*=\s*{\s*
447 (?P<fields>(%s\s*:\s*function\s*\(.*?\)\s*{.*?}(?:,\s*)?)*)
449 ''' % (re
.escape(objname
), _FUNC_NAME_RE
),
451 fields
= obj_m
.group('fields')
452 # Currently, it only supports function definitions
453 fields_m
= re
.finditer(
455 (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
459 argnames
= f
.group('args').split(',')
460 obj
[remove_quotes(f
.group('key'))] = self
.build_function(argnames
, f
.group('code'))
464 def extract_function_code(self
, funcname
):
465 """ @returns argnames, code """
468 (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*
469 \((?P<args>[^)]*)\)\s*
470 (?P<code>\{(?:(?!};)[^"]|"([^"]|\\")*")+\})''' % (
471 re
.escape(funcname
), re
.escape(funcname
), re
.escape(funcname
)),
473 code
, _
= self
._separate
_at
_paren
(func_m
.group('code'), '}') # refine the match
475 raise ExtractorError('Could not find JS function %r' % funcname
)
476 return func_m
.group('args').split(','), code
478 def extract_function(self
, funcname
):
479 return self
.extract_function_from_code(*self
.extract_function_code(funcname
))
481 def extract_function_from_code(self
, argnames
, code
, *global_stack
):
484 mobj
= re
.search(r
'function\((?P<args>[^)]*)\)\s*{', code
)
487 start
, body_start
= mobj
.span()
488 body
, remaining
= self
._separate
_at
_paren
(code
[body_start
- 1:], '}')
489 name
= self
._named
_object
(
491 self
.extract_function_from_code(
492 [str.strip(x
) for x
in mobj
.group('args').split(',')],
493 body
, local_vars
, *global_stack
))
494 code
= code
[:start
] + name
+ remaining
495 return self
.build_function(argnames
, code
, local_vars
, *global_stack
)
497 def call_function(self
, funcname
, *args
):
498 return self
.extract_function(funcname
)(args
)
500 def build_function(self
, argnames
, code
, *global_stack
):
501 global_stack
= list(global_stack
) or [{}]
503 def resf(args
, **kwargs
):
504 global_stack
[0].update({
505 **dict(zip(argnames
, args
)),
508 var_stack
= LocalNameSpace(*global_stack
)
509 for stmt
in self
._separate
(code
.replace('\n', ''), ';'):
510 ret
, should_abort
= self
.interpret_statement(stmt
, var_stack
)