]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/downloader/external.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / downloader / external.py
index 007689a8c90eadc6ab3eb277601512987c5d52cf..8b45c671a0fb64cd31d7674b70a164670476937d 100644 (file)
@@ -1,14 +1,16 @@
 import enum
 import json
-import os.path
+import os
 import re
 import subprocess
 import sys
+import tempfile
 import time
 import uuid
 
 from .fragment import FragmentFD
 from ..compat import functools
+from ..networking import Request
 from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
 from ..utils import (
     Popen,
@@ -24,7 +26,6 @@
     encodeFilename,
     find_available_port,
     remove_end,
-    sanitized_Request,
     traverse_obj,
 )
 
@@ -42,6 +43,7 @@ class ExternalFD(FragmentFD):
     def real_download(self, filename, info_dict):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
+        self._cookies_tempfile = None
 
         try:
             started = time.time()
@@ -53,7 +55,10 @@ def real_download(self, filename, info_dict):
             # correct and expected termination thus all postprocessing
             # should take place
             retval = 0
-            self.to_screen('[%s] Interrupted by user' % self.get_basename())
+            self.to_screen(f'[{self.get_basename()}] Interrupted by user')
+        finally:
+            if self._cookies_tempfile:
+                self.try_remove(self._cookies_tempfile)
 
         if retval == 0:
             status = {
@@ -125,6 +130,16 @@ def _configuration_args(self, keys=None, *args, **kwargs):
             self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
             keys, *args, **kwargs)
 
+    def _write_cookies(self):
+        if not self.ydl.cookiejar.filename:
+            tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False)
+            tmp_cookies.close()
+            self._cookies_tempfile = tmp_cookies.name
+            self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"')
+        # real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename
+        self.ydl.cookiejar.save(self._cookies_tempfile)
+        return self.ydl.cookiejar.filename or self._cookies_tempfile
+
     def _call_downloader(self, tmpfilename, info_dict):
         """ Either overwrite this or implement _make_cmd """
         cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
@@ -157,7 +172,7 @@ def _call_downloader(self, tmpfilename, info_dict):
         decrypt_fragment = self.decrypter(info_dict)
         dest, _ = self.sanitize_open(tmpfilename, 'wb')
         for frag_index, fragment in enumerate(info_dict['fragments']):
-            fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
+            fragment_filename = f'{tmpfilename}-Frag{frag_index}'
             try:
                 src, _ = self.sanitize_open(fragment_filename, 'rb')
             except OSError as err:
@@ -171,7 +186,7 @@ def _call_downloader(self, tmpfilename, info_dict):
             if not self.params.get('keep_fragments', False):
                 self.try_remove(encodeFilename(fragment_filename))
         dest.close()
-        self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
+        self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls'))
         return 0
 
     def _call_process(self, cmd, info_dict):
@@ -184,6 +199,9 @@ class CurlFD(ExternalFD):
 
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
+        cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+        if cookie_header:
+            cmd += ['--cookie', cookie_header]
         if info_dict.get('http_headers') is not None:
             for key, val in info_dict['http_headers'].items():
                 cmd += ['--header', f'{key}: {val}']
@@ -214,6 +232,9 @@ def _make_cmd(self, tmpfilename, info_dict):
         if info_dict.get('http_headers') is not None:
             for key, val in info_dict['http_headers'].items():
                 cmd += ['-H', f'{key}: {val}']
+        cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+        if cookie_header:
+            cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0']
         cmd += self._configuration_args()
         cmd += ['--', info_dict['url']]
         return cmd
@@ -223,7 +244,9 @@ class WgetFD(ExternalFD):
     AVAILABLE_OPT = '--version'
 
     def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
+        cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto']
+        if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+            cmd += ['--load-cookies', self._write_cookies()]
         if info_dict.get('http_headers') is not None:
             for key, val in info_dict['http_headers'].items():
                 cmd += ['--header', f'{key}: {val}']
@@ -271,7 +294,7 @@ def _call_downloader(self, tmpfilename, info_dict):
         return super()._call_downloader(tmpfilename, info_dict)
 
     def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [self.exe, '-c',
+        cmd = [self.exe, '-c', '--no-conf',
                '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
                '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
         if 'fragments' in info_dict:
@@ -279,6 +302,8 @@ def _make_cmd(self, tmpfilename, info_dict):
         else:
             cmd += ['--min-split-size', '1M']
 
+        if self.ydl.cookiejar.get_cookie_header(info_dict['url']):
+            cmd += [f'--load-cookies={self._write_cookies()}']
         if info_dict.get('http_headers') is not None:
             for key, val in info_dict['http_headers'].items():
                 cmd += ['--header', f'{key}: {val}']
@@ -310,12 +335,12 @@ def _make_cmd(self, tmpfilename, info_dict):
         cmd += ['--auto-file-renaming=false']
 
         if 'fragments' in info_dict:
-            cmd += ['--file-allocation=none', '--uri-selector=inorder']
-            url_list_file = '%s.frag.urls' % tmpfilename
+            cmd += ['--uri-selector=inorder']
+            url_list_file = f'{tmpfilename}.frag.urls'
             url_list = []
             for frag_index, fragment in enumerate(info_dict['fragments']):
-                fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
-                url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
+                fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}'
+                url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename)))
             stream, _ = self.sanitize_open(url_list_file, 'wb')
             stream.write('\n'.join(url_list).encode())
             stream.close()
@@ -332,14 +357,13 @@ def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()):
             'id': sanitycheck,
             'method': method,
             'params': [f'token:{rpc_secret}', *params],
-        }).encode('utf-8')
-        request = sanitized_Request(
+        }).encode()
+        request = Request(
             f'http://localhost:{rpc_port}/jsonrpc',
             data=d, headers={
                 'Content-Type': 'application/json',
                 'Content-Length': f'{len(d)}',
-                'Ytdl-request-proxy': '__noproxy__',
-            })
+            }, proxies={'all': None})
         with self.ydl.urlopen(request) as r:
             resp = json.load(r)
         assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server'
@@ -392,7 +416,7 @@ def get_stat(key, *obj, average=False):
                     'total_bytes_estimate': total,
                     'eta': (total - downloaded) / (speed or 1),
                     'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None,
-                    'elapsed': time.time() - started
+                    'elapsed': time.time() - started,
                 })
                 self._hook_progress(status, info_dict)
 
@@ -417,6 +441,14 @@ def _make_cmd(self, tmpfilename, info_dict):
         if info_dict.get('http_headers') is not None:
             for key, val in info_dict['http_headers'].items():
                 cmd += [f'{key}:{val}']
+
+        # httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1]
+        # If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2]
+        # 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq
+        # 2: https://httpie.io/docs/cli/sessions
+        cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url'])
+        if cookie_header:
+            cmd += [f'Cookie:{cookie_header}']
         return cmd
 
 
@@ -459,7 +491,7 @@ def _call_downloader(self, tmpfilename, info_dict):
         if not self.params.get('verbose'):
             args += ['-hide_banner']
 
-        args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[])
+        args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...))
 
         # These exists only for compatibility. Extractors should use
         # info_dict['downloader_options']['ffmpeg_args'] instead
@@ -477,12 +509,12 @@ def _call_downloader(self, tmpfilename, info_dict):
         proxy = self.params.get('proxy')
         if proxy:
             if not re.match(r'^[\da-zA-Z]+://', proxy):
-                proxy = 'http://%s' % proxy
+                proxy = f'http://{proxy}'
 
             if proxy.startswith('socks'):
                 self.report_warning(
-                    '%s does not support SOCKS proxies. Downloading is likely to fail. '
-                    'Consider adding --hls-prefer-native to your command.' % self.get_basename())
+                    f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. '
+                    'Consider adding --hls-prefer-native to your command.')
 
             # Since December 2015 ffmpeg supports -http_proxy option (see
             # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
@@ -527,7 +559,13 @@ def _call_downloader(self, tmpfilename, info_dict):
 
         selected_formats = info_dict.get('requested_formats') or [info_dict]
         for i, fmt in enumerate(selected_formats):
-            if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']):
+            is_http = re.match(r'^https?://', fmt['url'])
+            cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else []
+            if cookies:
+                args.extend(['-cookies', ''.join(
+                    f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n'
+                    for cookie in cookies)])
+            if fmt.get('http_headers') and is_http:
                 # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
                 # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
                 args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())])
@@ -537,7 +575,7 @@ def _call_downloader(self, tmpfilename, info_dict):
             if end_time:
                 args += ['-t', str(end_time - start_time)]
 
-            args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', fmt['url']]
+            args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']]
 
         if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
             args += ['-c', 'copy']
@@ -577,6 +615,8 @@ def _call_downloader(self, tmpfilename, info_dict):
         else:
             args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
 
+        args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args_out', ...))
+
         args += self._configuration_args(('_o1', '_o', ''))
 
         args = [encodeArgument(opt) for opt in args]