X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/3cf50fa8e9e460fef35531df46b6e893924f1c96..62b5c94cadaa5f596dc1a7083db9db12efe357be:/yt_dlp/downloader/external.py diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 575138371..4ce8a3bf7 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -1,12 +1,16 @@ import enum -import os.path +import json +import os import re import subprocess import sys +import tempfile import time +import uuid from .fragment import FragmentFD from ..compat import functools +from ..networking import Request from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..utils import ( Popen, @@ -20,7 +24,7 @@ determine_ext, encodeArgument, encodeFilename, - handle_youtubedl_headers, + find_available_port, remove_end, traverse_obj, ) @@ -39,6 +43,7 @@ class ExternalFD(FragmentFD): def real_download(self, filename, info_dict): self.report_destination(filename) tmpfilename = self.temp_name(filename) + self._cookies_tempfile = None try: started = time.time() @@ -51,6 +56,9 @@ def real_download(self, filename, info_dict): # should take place retval = 0 self.to_screen('[%s] Interrupted by user' % self.get_basename()) + finally: + if self._cookies_tempfile: + self.try_remove(self._cookies_tempfile) if retval == 0: status = { @@ -60,7 +68,6 @@ def real_download(self, filename, info_dict): } if filename != '-': fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) status.update({ 'downloaded_bytes': fsize, @@ -101,6 +108,7 @@ def supports(cls, info_dict): return all(( not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, + not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url'), all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), )) @@ -122,6 +130,16 @@ def _configuration_args(self, keys=None, *args, **kwargs): self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME, keys, *args, **kwargs) + def _write_cookies(self): + if not self.ydl.cookiejar.filename: + tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False) + tmp_cookies.close() + self._cookies_tempfile = tmp_cookies.name + self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"') + # real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename + self.ydl.cookiejar.save(self._cookies_tempfile) + return self.ydl.cookiejar.filename or self._cookies_tempfile + def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] @@ -129,8 +147,7 @@ def _call_downloader(self, tmpfilename, info_dict): self._debug_cmd(cmd) if 'fragments' not in info_dict: - _, stderr, returncode = Popen.run( - cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None) + _, stderr, returncode = self._call_process(cmd, info_dict) if returncode and stderr: self.to_stderr(stderr) return returncode @@ -140,7 +157,7 @@ def _call_downloader(self, tmpfilename, info_dict): retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=None, fatal=not skip_unavailable_fragments) for retry in retry_manager: - _, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE) + _, stderr, returncode = self._call_process(cmd, info_dict) if not returncode: break # TODO: Decide whether to retry based on error code @@ -172,6 +189,9 @@ def _call_downloader(self, tmpfilename, info_dict): self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename)) return 0 + def _call_process(self, cmd, info_dict): + return Popen.run(cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None) + class CurlFD(ExternalFD): AVAILABLE_OPT = '-V' @@ -179,6 +199,9 @@ class CurlFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed'] + cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) + if cookie_header: + cmd += ['--cookie', cookie_header] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', f'{key}: {val}'] @@ -209,6 +232,9 @@ def _make_cmd(self, tmpfilename, info_dict): if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['-H', f'{key}: {val}'] + cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) + if cookie_header: + cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0'] cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -218,7 +244,9 @@ class WgetFD(ExternalFD): AVAILABLE_OPT = '--version' def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto'] + cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto'] + if self.ydl.cookiejar.get_cookie_header(info_dict['url']): + cmd += ['--load-cookies', self._write_cookies()] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', f'{key}: {val}'] @@ -256,8 +284,17 @@ def supports_manifest(manifest): def _aria2c_filename(fn): return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}' + def _call_downloader(self, tmpfilename, info_dict): + # FIXME: Disabled due to https://github.com/yt-dlp/yt-dlp/issues/5931 + if False and 'no-external-downloader-progress' not in self.params.get('compat_opts', []): + info_dict['__rpc'] = { + 'port': find_available_port() or 19190, + 'secret': str(uuid.uuid4()), + } + return super()._call_downloader(tmpfilename, info_dict) + def _make_cmd(self, tmpfilename, info_dict): - cmd = [self.exe, '-c', + cmd = [self.exe, '-c', '--no-conf', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16'] if 'fragments' in info_dict: @@ -265,6 +302,8 @@ def _make_cmd(self, tmpfilename, info_dict): else: cmd += ['--min-split-size', '1M'] + if self.ydl.cookiejar.get_cookie_header(info_dict['url']): + cmd += [f'--load-cookies={self._write_cookies()}'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', f'{key}: {val}'] @@ -276,6 +315,12 @@ def _make_cmd(self, tmpfilename, info_dict): cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') cmd += self._configuration_args() + if '__rpc' in info_dict: + cmd += [ + '--enable-rpc', + f'--rpc-listen-port={info_dict["__rpc"]["port"]}', + f'--rpc-secret={info_dict["__rpc"]["secret"]}'] + # aria2c strips out spaces from the beginning/end of filenames and paths. # We work around this issue by adding a "./" to the beginning of the # filename and relative path, and adding a "/" at the end of the path. @@ -304,6 +349,87 @@ def _make_cmd(self, tmpfilename, info_dict): cmd += ['--', info_dict['url']] return cmd + def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()): + # Does not actually need to be UUID, just unique + sanitycheck = str(uuid.uuid4()) + d = json.dumps({ + 'jsonrpc': '2.0', + 'id': sanitycheck, + 'method': method, + 'params': [f'token:{rpc_secret}', *params], + }).encode('utf-8') + request = Request( + f'http://localhost:{rpc_port}/jsonrpc', + data=d, headers={ + 'Content-Type': 'application/json', + 'Content-Length': f'{len(d)}', + }, proxies={'all': None}) + with self.ydl.urlopen(request) as r: + resp = json.load(r) + assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server' + return resp['result'] + + def _call_process(self, cmd, info_dict): + if '__rpc' not in info_dict: + return super()._call_process(cmd, info_dict) + + send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret']) + started = time.time() + + fragmented = 'fragments' in info_dict + frag_count = len(info_dict['fragments']) if fragmented else 1 + status = { + 'filename': info_dict.get('_filename'), + 'status': 'downloading', + 'elapsed': 0, + 'downloaded_bytes': 0, + 'fragment_count': frag_count if fragmented else None, + 'fragment_index': 0 if fragmented else None, + } + self._hook_progress(status, info_dict) + + def get_stat(key, *obj, average=False): + val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0] + return sum(val) / (len(val) if average else 1) + + with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p: + # Add a small sleep so that RPC client can receive response, + # or the connection stalls infinitely + time.sleep(0.2) + retval = p.poll() + while retval is None: + # We don't use tellStatus as we won't know the GID without reading stdout + # Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive + active = send_rpc('aria2.tellActive') + completed = send_rpc('aria2.tellStopped', [0, frag_count]) + + downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active) + speed = get_stat('downloadSpeed', active) + total = frag_count * get_stat('totalLength', active, completed, average=True) + if total < downloaded: + total = None + + status.update({ + 'downloaded_bytes': int(downloaded), + 'speed': speed, + 'total_bytes': None if fragmented else total, + 'total_bytes_estimate': total, + 'eta': (total - downloaded) / (speed or 1), + 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, + 'elapsed': time.time() - started + }) + self._hook_progress(status, info_dict) + + if not active and len(completed) >= frag_count: + send_rpc('aria2.shutdown') + retval = p.wait() + break + + time.sleep(0.1) + retval = p.poll() + + return '', p.stderr.read(), retval + class HttpieFD(ExternalFD): AVAILABLE_OPT = '--version' @@ -315,6 +441,14 @@ def _make_cmd(self, tmpfilename, info_dict): if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += [f'{key}:{val}'] + + # httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1] + # If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2] + # 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq + # 2: https://httpie.io/docs/cli/sessions + cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) + if cookie_header: + cmd += [f'Cookie:{cookie_header}'] return cmd @@ -425,11 +559,16 @@ def _call_downloader(self, tmpfilename, info_dict): selected_formats = info_dict.get('requested_formats') or [info_dict] for i, fmt in enumerate(selected_formats): - if fmt.get('http_headers') and re.match(r'^https?://', fmt['url']): - headers_dict = handle_youtubedl_headers(fmt['http_headers']) + is_http = re.match(r'^https?://', fmt['url']) + cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else [] + if cookies: + args.extend(['-cookies', ''.join( + f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n' + for cookie in cookies)]) + if fmt.get('http_headers') and is_http: # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. - args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in headers_dict.items())]) + args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())]) if start_time: args += ['-ss', str(start_time)]