]> jfr.im git - yt-dlp.git/commitdiff
[phantomjs] Add function to execute JS without a DOM
authorpukkandan <redacted>
Thu, 18 Aug 2022 16:04:47 +0000 (21:34 +0530)
committerpukkandan <redacted>
Thu, 18 Aug 2022 16:04:47 +0000 (21:34 +0530)
Authored by: MinePlayersPE, pukkandan

yt_dlp/extractor/openload.py

index f12a0eff11dc283408bb5f9836271000f3fbc523..e66ed4831bc03220d2d8e2dbd7a9a19e07aa3c69 100644 (file)
@@ -1,3 +1,4 @@
+import collections
 import contextlib
 import json
 import os
@@ -9,8 +10,10 @@
     ExtractorError,
     Popen,
     check_executable,
+    format_field,
     get_exe_version,
     is_outdated_version,
+    shell_quote,
 )
 
 
@@ -49,7 +52,7 @@ class PhantomJSwrapper:
     This class is experimental.
     """
 
-    _TEMPLATE = r'''
+    _BASE_JS = R'''
         phantom.onError = function(msg, trace) {{
           var msgStack = ['PHANTOM ERROR: ' + msg];
           if(trace && trace.length) {{
@@ -62,6 +65,9 @@ class PhantomJSwrapper:
           console.error(msgStack.join('\n'));
           phantom.exit(1);
         }};
+    '''
+
+    _TEMPLATE = R'''
         var page = require('webpage').create();
         var fs = require('fs');
         var read = {{ mode: 'r', charset: 'utf-8' }};
@@ -116,14 +122,18 @@ def __init__(self, extractor, required_version=None, timeout=10000):
                     'Your copy of PhantomJS is outdated, update it to version '
                     '%s or newer if you encounter any errors.' % required_version)
 
-        self.options = {
-            'timeout': timeout,
-        }
         for name in self._TMP_FILE_NAMES:
             tmp = tempfile.NamedTemporaryFile(delete=False)
             tmp.close()
             self._TMP_FILES[name] = tmp
 
+        self.options = collections.ChainMap({
+            'timeout': timeout,
+        }, {
+            x: self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
+            for x in self._TMP_FILE_NAMES
+        })
+
     def __del__(self):
         for name in self._TMP_FILE_NAMES:
             with contextlib.suppress(OSError, KeyError):
@@ -194,31 +204,35 @@ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on w
 
         self._save_cookies(url)
 
-        replaces = self.options
-        replaces['url'] = url
         user_agent = headers.get('User-Agent') or self.extractor.get_param('http_headers')['User-Agent']
-        replaces['ua'] = user_agent.replace('"', '\\"')
-        replaces['jscode'] = jscode
-
-        for x in self._TMP_FILE_NAMES:
-            replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
-
-        with open(self._TMP_FILES['script'].name, 'wb') as f:
-            f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
+        jscode = self._TEMPLATE.format_map(self.options.new_child({
+            'url': url,
+            'ua': user_agent.replace('"', '\\"'),
+            'jscode': jscode,
+        }))
 
-        if video_id is None:
-            self.extractor.to_screen(f'{note2}')
-        else:
-            self.extractor.to_screen(f'{video_id}: {note2}')
+        stdout = self.execute(jscode, video_id, note2)
 
-        stdout, stderr, returncode = Popen.run(
-            [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name],
-            text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        if returncode:
-            raise ExtractorError(f'Executing JS failed:\n{stderr}')
         with open(self._TMP_FILES['html'].name, 'rb') as f:
             html = f.read().decode('utf-8')
-
         self._load_cookies()
 
         return html, stdout
+
+    def execute(self, jscode, video_id=None, note='Executing JS'):
+        """Execute JS and return stdout"""
+        if 'phantom.exit();' not in jscode:
+            jscode += ';\nphantom.exit();'
+        jscode = self._BASE_JS + jscode
+
+        with open(self._TMP_FILES['script'].name, 'w', encoding='utf-8') as f:
+            f.write(jscode)
+        self.extractor.to_screen(f'{format_field(video_id, None, "%s: ")}{note}')
+
+        cmd = [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name]
+        self.extractor.write_debug(f'PhantomJS command line: {shell_quote(cmd)}')
+        stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        if returncode:
+            raise ExtractorError(f'Executing JS failed:\n{stderr.strip()}')
+
+        return stdout