]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/jsinterp.py
[jsinterp] Fix escape in regex
[yt-dlp.git] / yt_dlp / jsinterp.py
index cadb013a314e4b7e613612457ed5fb892a4f17fb..27d7f0dfa6289241c2cf3f542557b75e421c7f5f 100644 (file)
 
 
 def _js_bit_op(op):
+    def zeroise(x):
+        return 0 if x in (None, JS_Undefined) else x
+
     def wrapped(a, b):
-        def zeroise(x):
-            return 0 if x in (None, JS_Undefined) else x
-        return op(zeroise(a), zeroise(b))
+        return op(zeroise(a), zeroise(b)) & 0xffffffff
 
     return wrapped
 
@@ -172,7 +173,14 @@ def wrap_interpreter(cls, f):
         def interpret_statement(self, stmt, local_vars, allow_recursion, *args, **kwargs):
             if cls.ENABLED and stmt.strip():
                 cls.write(stmt, level=allow_recursion)
-            ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
+            try:
+                ret, should_ret = f(self, stmt, local_vars, allow_recursion, *args, **kwargs)
+            except Exception as e:
+                if cls.ENABLED:
+                    if isinstance(e, ExtractorError):
+                        e = e.orig_msg
+                    cls.write('=> Raises:', e, '<-|', stmt, level=allow_recursion)
+                raise
             if cls.ENABLED and stmt.strip():
                 cls.write(['->', '=>'][should_ret], repr(ret), '<-|', stmt, level=allow_recursion)
             return ret, should_ret
@@ -226,7 +234,7 @@ def _regex_flags(cls, expr):
 
     @staticmethod
     def _separate(expr, delim=',', max_split=None):
-        OP_CHARS = '+-*/%&|^=<>!,;{}()[]:'
+        OP_CHARS = '+-*/%&|^=<>!,;{}:'
         if not expr:
             return
         counters = {k: 0 for k in _MATCHING_PARENS.values()}
@@ -237,11 +245,12 @@ def _separate(expr, delim=',', max_split=None):
                 counters[_MATCHING_PARENS[char]] += 1
             elif not in_quote and char in counters:
                 counters[char] -= 1
-            elif not escaping and char in _QUOTES and in_quote in (char, None):
-                if in_quote or after_op or char != '/':
-                    in_quote = None if in_quote and not in_regex_char_group else char
-            elif in_quote == '/' and char in '[]':
-                in_regex_char_group = char == '['
+            elif not escaping:
+                if char in _QUOTES and in_quote in (char, None):
+                    if in_quote or after_op or char != '/':
+                        in_quote = None if in_quote and not in_regex_char_group else char
+                elif in_quote == '/' and char in '[]':
+                    in_regex_char_group = char == '['
             escaping = not escaping and in_quote and char == '\\'
             after_op = not in_quote and char in OP_CHARS or (char.isspace() and after_op)
 
@@ -504,7 +513,7 @@ def dict_item(key, val):
                 (?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})?
                 =(?!=)(?P<expr>.*)$
             )|(?P<return>
-                (?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
+                (?!if|return|true|false|null|undefined|NaN)(?P<name>{_NAME_RE})$
             )|(?P<indexing>
                 (?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
             )|(?P<attribute>
@@ -539,6 +548,8 @@ def dict_item(key, val):
             raise JS_Continue()
         elif expr == 'undefined':
             return JS_Undefined, should_return
+        elif expr == 'NaN':
+            return float('NaN'), should_return
 
         elif m and m.group('return'):
             return local_vars.get(m.group('name'), JS_Undefined), should_return
@@ -683,6 +694,13 @@ def eval_method():
                         return obj.index(idx, start)
                     except ValueError:
                         return -1
+                elif member == 'charCodeAt':
+                    assertion(isinstance(obj, str), 'must be applied on a string')
+                    assertion(len(argvals) == 1, 'takes exactly one argument')
+                    idx = argvals[0] if isinstance(argvals[0], int) else 0
+                    if idx >= len(obj):
+                        return None
+                    return ord(obj[idx])
 
                 idx = int(member) if isinstance(obj, list) else member
                 return obj[idx](argvals, allow_recursion=allow_recursion)
@@ -784,7 +802,7 @@ def resf(args, kwargs={}, allow_recursion=100):
             global_stack[0].update(itertools.zip_longest(argnames, args, fillvalue=None))
             global_stack[0].update(kwargs)
             var_stack = LocalNameSpace(*global_stack)
-            ret, should_abort = self.interpret_statement(code.replace('\n', ''), var_stack, allow_recursion - 1)
+            ret, should_abort = self.interpret_statement(code.replace('\n', ' '), var_stack, allow_recursion - 1)
             if should_abort:
                 return ret
         return resf