]> jfr.im git - yt-dlp.git/commitdiff
[jsinterp] Bring or-par with youtube-dl
authorpukkandan <redacted>
Sun, 14 Aug 2022 21:50:36 +0000 (03:20 +0530)
committerpukkandan <redacted>
Sun, 14 Aug 2022 22:01:49 +0000 (03:31 +0530)
Partially cherry-picked from: https://github.com/ytdl-org/youtube-dl/commit/d231b56717c73ee597d2e077d11b69ed48a1b02d

Authored by pukkandan, dirkf

README.md
test/test_jsinterp.py
test/test_youtube_signature.py
yt_dlp/jsinterp.py

index 9672a1771837801906ea7753f729d49d323a8c0b..42cbfcebac4d19f59d00b91c2fda94685119f2a6 100644 (file)
--- a/README.md
+++ b/README.md
@@ -71,7 +71,7 @@
 
 # NEW FEATURES
 
-* Merged with **youtube-dl v2021.12.17+ [commit/adb5294](https://github.com/ytdl-org/youtube-dl/commit/adb5294177265ba35b45746dbb600965076ed150)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
+* Merged with **youtube-dl v2021.12.17+ [commit/d231b56](https://github.com/ytdl-org/youtube-dl/commit/d231b56717c73ee597d2e077d11b69ed48a1b02d)**<!--([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21))--> and **youtube-dlc v2020.11.11-3+ [commit/f9401f2](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee)**: You get all the features and patches of [youtube-dlc](https://github.com/blackjack4494/yt-dlc) in addition to the latest [youtube-dl](https://github.com/ytdl-org/youtube-dl)
 
 * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API
 
index 48e2abcf6661ff45196b4b2dc21312221508af78..c97f6dcfb95198deb54d6d757431f7eab49e0ade 100644 (file)
@@ -48,6 +48,9 @@ def test_operators(self):
         jsi = JSInterpreter('function f(){return 1 << 5;}')
         self.assertEqual(jsi.call_function('f'), 32)
 
+        jsi = JSInterpreter('function f(){return 2 ** 5}')
+        self.assertEqual(jsi.call_function('f'), 32)
+
         jsi = JSInterpreter('function f(){return 19 & 21;}')
         self.assertEqual(jsi.call_function('f'), 17)
 
@@ -57,6 +60,12 @@ def test_operators(self):
         jsi = JSInterpreter('function f(){return []? 2+3: 4;}')
         self.assertEqual(jsi.call_function('f'), 5)
 
+        jsi = JSInterpreter('function f(){return 1 == 2}')
+        self.assertEqual(jsi.call_function('f'), False)
+
+        jsi = JSInterpreter('function f(){return 0 && 1 || 2;}')
+        self.assertEqual(jsi.call_function('f'), 2)
+
     def test_array_access(self):
         jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
         self.assertEqual(jsi.call_function('f'), [5, 2, 7])
@@ -114,6 +123,16 @@ def test_precedence(self):
         }''')
         self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50])
 
+    def test_builtins(self):
+        jsi = JSInterpreter('''
+        function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 86000)
+        jsi = JSInterpreter('''
+        function x(dt) { return new Date(dt) - 0; }
+        ''')
+        self.assertEqual(jsi.call_function('x', 'Wednesday 31 December 1969 18:01:26 MDT'), 86000)
+
     def test_call(self):
         jsi = JSInterpreter('''
         function x() { return 2; }
@@ -188,6 +207,17 @@ def test_comma(self):
         ''')
         self.assertEqual(jsi.call_function('x'), 7)
 
+        jsi = JSInterpreter('''
+        function x() { a=5; return (a -= 1, a+=3, a); }
+        ''')
+        self.assertEqual(jsi.call_function('x'), 7)
+
+    def test_void(self):
+        jsi = JSInterpreter('''
+        function x() { return void 42; }
+        ''')
+        self.assertEqual(jsi.call_function('x'), None)
+
     def test_return_function(self):
         jsi = JSInterpreter('''
         function x() { return [1, function(){return 1}][1] }
index 559bdfccff6731cd02890d20b791ac8f607e365c..79bbfc32371ddf282df5512ce76d516f9bf24c90 100644 (file)
 class TestPlayerInfo(unittest.TestCase):
     def test_youtube_extract_player_info(self):
         PLAYER_URLS = (
+            ('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'),
             ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
             ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'),
             ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'),
index 1af6ee0aa2b8b568e7ccde91088615cf9c851ef1..87f141476c8b92129d53909880e8e22ff86d4264 100644 (file)
@@ -17,6 +17,8 @@
 )
 
 _NAME_RE = r'[a-zA-Z_$][\w$]*'
+
+# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
 _OPERATORS = {  # None => Defined in JSInterpreter._operator
     '?': None,
 
     '|': operator.or_,
     '^': operator.xor,
 
-    # FIXME: This should actually be below comparision
-    '>>': operator.rshift,
-    '<<': operator.lshift,
+    '===': operator.is_,
+    '!==': operator.is_not,
+    '==': operator.eq,
+    '!=': operator.ne,
 
     '<=': operator.le,
     '>=': operator.ge,
     '<': operator.lt,
     '>': operator.gt,
 
+    '>>': operator.rshift,
+    '<<': operator.lshift,
+
     '+': operator.add,
     '-': operator.sub,
 
     '*': operator.mul,
     '/': operator.truediv,
     '%': operator.mod,
+
+    '**': operator.pow,
 }
 
+_COMP_OPERATORS = {'===', '!==', '==', '!=', '<=', '>=', '<', '>'}
+
 _MATCHING_PARENS = dict(zip('({[', ')}]'))
 _QUOTES = '\'"'
 
@@ -81,7 +91,7 @@ def __delitem__(self, key):
 
 class Debugger:
     import sys
-    ENABLED = 'pytest' in sys.modules
+    ENABLED = False and 'pytest' in sys.modules
 
     @staticmethod
     def write(*args, level=100):
@@ -200,7 +210,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
             if should_return:
                 return ret, should_return
 
-        m = re.match(r'(?P<var>var\s)|return(?:\s+|$)', stmt)
+        m = re.match(r'(?P<var>(?:var|const|let)\s)|return(?:\s+|$)', stmt)
         if m:
             expr = stmt[len(m.group(0)):].strip()
             should_return = not m.group('var')
@@ -218,13 +228,18 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
             obj = expr[4:]
             if obj.startswith('Date('):
                 left, right = self._separate_at_paren(obj[4:], ')')
-                expr = unified_timestamp(left[1:-1], False)
+                expr = unified_timestamp(
+                    self.interpret_expression(left, local_vars, allow_recursion), False)
                 if not expr:
                     raise self.Exception(f'Failed to parse date {left!r}', expr)
                 expr = self._dump(int(expr * 1000), local_vars) + right
             else:
                 raise self.Exception(f'Unsupported object {obj}', expr)
 
+        if expr.startswith('void '):
+            left = self.interpret_expression(expr[5:], local_vars, allow_recursion)
+            return None, should_return
+
         if expr.startswith('{'):
             inner, outer = self._separate_at_paren(expr, '}')
             inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
@@ -307,7 +322,8 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
                     if default:
                         matched = matched or case == 'default'
                     elif not matched:
-                        matched = case != 'default' and switch_val == self.interpret_expression(case, local_vars, allow_recursion)
+                        matched = (case != 'default'
+                                   and switch_val == self.interpret_expression(case, local_vars, allow_recursion))
                     if not matched:
                         continue
                     try:
@@ -347,7 +363,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
         m = re.match(fr'''(?x)
             (?P<assign>
                 (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])?\s*
-                (?P<op>{"|".join(map(re.escape, _OPERATORS))})?
+                (?P<op>{"|".join(map(re.escape, set(_OPERATORS) - _COMP_OPERATORS))})?
                 =(?P<expr>.*)$
             )|(?P<return>
                 (?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
@@ -397,12 +413,14 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
 
         for op in _OPERATORS:
             separated = list(self._separate(expr, op))
-            if len(separated) < 2:
-                continue
             right_expr = separated.pop()
-            while op == '-' and len(separated) > 1 and not separated[-1].strip():
-                right_expr = f'-{right_expr}'
+            while op in '<>*-' and len(separated) > 1 and not separated[-1].strip():
                 separated.pop()
+                right_expr = f'{op}{right_expr}'
+                if op != '-':
+                    right_expr = f'{separated.pop()}{op}{right_expr}'
+            if not separated:
+                continue
             left_val = self.interpret_expression(op.join(separated), local_vars, allow_recursion)
             return self._operator(op, 0 if left_val is None else left_val,
                                   right_expr, expr, local_vars, allow_recursion), should_return
@@ -564,8 +582,8 @@ def extract_object(self, objname):
         # Currently, it only supports function definitions
         fields_m = re.finditer(
             r'''(?x)
-                (?P<key>%s)\s*:\s*function\s*\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}
-            ''' % _FUNC_NAME_RE,
+                (?P<key>%s)\s*:\s*function\s*\((?P<args>(?:%s|,)*)\){(?P<code>[^}]+)}
+            ''' % (_FUNC_NAME_RE, _NAME_RE),
             fields)
         for f in fields_m:
             argnames = f.group('args').split(',')
@@ -580,7 +598,7 @@ def extract_function_code(self, funcname):
                 (?:
                     function\s+%(name)s|
                     [{;,]\s*%(name)s\s*=\s*function|
-                    var\s+%(name)s\s*=\s*function
+                    (?:var|const|let)\s+%(name)s\s*=\s*function
                 )\s*
                 \((?P<args>[^)]*)\)\s*
                 (?P<code>{.+})''' % {'name': re.escape(funcname)},
@@ -615,10 +633,8 @@ def build_function(self, argnames, code, *global_stack):
         argnames = tuple(argnames)
 
         def resf(args, kwargs={}, allow_recursion=100):
-            global_stack[0].update({
-                **dict(itertools.zip_longest(argnames, args, fillvalue=None)),
-                **kwargs
-            })
+            global_stack[0].update(itertools.zip_longest(argnames, args, fillvalue=None))
+            global_stack[0].update(kwargs)
             var_stack = LocalNameSpace(*global_stack)
             ret, should_abort = self.interpret_statement(code.replace('\n', ''), var_stack, allow_recursion - 1)
             if should_abort: