]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/downloader/external.py
[docs,cleanup] Add deprecation warning in docs
[yt-dlp.git] / yt_dlp / downloader / external.py
index 0e2bbdb862bc32b26c985ee9b1d6c189a40f9138..9c1229cf6f318f6a10277aae4db260e534dcd227 100644 (file)
@@ -6,13 +6,7 @@
 import sys
 import time
 
-try:
-    from Crypto.Cipher import AES
-    can_decrypt_frag = True
-except ImportError:
-    can_decrypt_frag = False
-
-from .common import FileDownloader
+from .fragment import FragmentFD
 from ..compat import (
     compat_setenv,
     compat_str,
     cli_option,
     cli_valueless_option,
     cli_bool_option,
-    cli_configuration_args,
+    _configuration_args,
     encodeFilename,
-    error_to_compat_str,
     encodeArgument,
     handle_youtubedl_headers,
     check_executable,
     is_outdated_version,
     process_communicate_or_kill,
-    sanitized_Request,
     sanitize_open,
 )
 
 
-class ExternalFD(FileDownloader):
+class ExternalFD(FragmentFD):
     SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
+    can_download_to_stdout = False
 
     def real_download(self, filename, info_dict):
         self.report_destination(filename)
@@ -68,7 +61,7 @@ def real_download(self, filename, info_dict):
                     'downloaded_bytes': fsize,
                     'total_bytes': fsize,
                 })
-            self._hook_progress(status)
+            self._hook_progress(status, info_dict)
             return True
         else:
             self.to_stderr('\n')
@@ -82,15 +75,21 @@ def get_basename(cls):
 
     @property
     def exe(self):
-        return self.params.get('external_downloader')
+        return self.get_basename()
 
     @classmethod
     def available(cls, path=None):
-        return check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
+        path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
+        if path:
+            cls.exe = path
+            return path
+        return False
 
     @classmethod
     def supports(cls, info_dict):
-        return info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS
+        return (
+            (cls.can_download_to_stdout or not info_dict.get('to_stdout'))
+            and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS)
 
     @classmethod
     def can_download(cls, info_dict, path=None):
@@ -105,10 +104,10 @@ def _bool_option(self, command_option, param, true_value='true', false_value='fa
     def _valueless_option(self, command_option, param, expected_value=True):
         return cli_valueless_option(self.params, command_option, param, expected_value)
 
-    def _configuration_args(self, *args, **kwargs):
-        return cli_configuration_args(
-            self.params.get('external_downloader_args'),
-            self.get_basename(), *args, **kwargs)
+    def _configuration_args(self, keys=None, *args, **kwargs):
+        return _configuration_args(
+            self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(),
+            keys, *args, **kwargs)
 
     def _call_downloader(self, tmpfilename, info_dict):
         """ Either overwrite this or implement _make_cmd """
@@ -116,58 +115,56 @@ def _call_downloader(self, tmpfilename, info_dict):
 
         self._debug_cmd(cmd)
 
-        p = subprocess.Popen(
-            cmd, stderr=subprocess.PIPE)
-        _, stderr = process_communicate_or_kill(p)
-        if p.returncode != 0:
-            self.to_stderr(stderr.decode('utf-8', 'replace'))
-
-        if 'url_list' in info_dict:
-            file_list = []
-            for [i, url] in enumerate(info_dict['url_list']):
-                tmpsegmentname = '%s_%s.frag' % (tmpfilename, i)
-                file_list.append(tmpsegmentname)
-            key_list = info_dict.get('key_list')
-            decrypt_info = None
+        if 'fragments' in info_dict:
+            fragment_retries = self.params.get('fragment_retries', 0)
+            skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+
+            count = 0
+            while count <= fragment_retries:
+                p = subprocess.Popen(
+                    cmd, stderr=subprocess.PIPE)
+                _, stderr = process_communicate_or_kill(p)
+                if p.returncode == 0:
+                    break
+                # TODO: Decide whether to retry based on error code
+                # https://aria2.github.io/manual/en/html/aria2c.html#exit-status
+                self.to_stderr(stderr.decode('utf-8', 'replace'))
+                count += 1
+                if count <= fragment_retries:
+                    self.to_screen(
+                        '[%s] Got error. Retrying fragments (attempt %d of %s)...'
+                        % (self.get_basename(), count, self.format_retries(fragment_retries)))
+            if count > fragment_retries:
+                if not skip_unavailable_fragments:
+                    self.report_error('Giving up after %s fragment retries' % fragment_retries)
+                    return -1
+
+            decrypt_fragment = self.decrypter(info_dict)
             dest, _ = sanitize_open(tmpfilename, 'wb')
-            for i, file in enumerate(file_list):
-                src, _ = sanitize_open(file, 'rb')
-                if key_list:
-                    decrypt_info = next((x for x in key_list if x['INDEX'] == i), decrypt_info)
-                    if decrypt_info['METHOD'] == 'AES-128':
-                        iv = decrypt_info.get('IV')
-                        decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
-                            self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
-                        encrypted_data = src.read()
-                        decrypted_data = AES.new(
-                            decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(encrypted_data)
-                        dest.write(decrypted_data)
-                    else:
-                        fragment_data = src.read()
-                        dest.write(fragment_data)
-                else:
-                    fragment_data = src.read()
-                    dest.write(fragment_data)
+            for frag_index, fragment in enumerate(info_dict['fragments']):
+                fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
+                try:
+                    src, _ = sanitize_open(fragment_filename, 'rb')
+                except IOError:
+                    if skip_unavailable_fragments and frag_index > 1:
+                        self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index))
+                        continue
+                    self.report_error('Unable to open fragment %d' % frag_index)
+                    return -1
+                dest.write(decrypt_fragment(fragment, src.read()))
                 src.close()
+                if not self.params.get('keep_fragments', False):
+                    os.remove(encodeFilename(fragment_filename))
             dest.close()
-            if not self.params.get('keep_fragments', False):
-                for file_path in file_list:
-                    try:
-                        os.remove(file_path)
-                    except OSError as ose:
-                        self.report_error("Unable to delete file %s; %s" % (file_path, error_to_compat_str(ose)))
-                try:
-                    file_path = '%s.frag.urls' % tmpfilename
-                    os.remove(file_path)
-                except OSError as ose:
-                    self.report_error("Unable to delete file %s; %s" % (file_path, error_to_compat_str(ose)))
-
+            os.remove(encodeFilename('%s.frag.urls' % tmpfilename))
+        else:
+            p = subprocess.Popen(
+                cmd, stderr=subprocess.PIPE)
+            _, stderr = process_communicate_or_kill(p)
+            if p.returncode != 0:
+                self.to_stderr(stderr.decode('utf-8', 'replace'))
         return p.returncode
 
-    def _prepare_url(self, info_dict, url):
-        headers = info_dict.get('http_headers')
-        return sanitized_Request(url, None, headers) if headers else url
-
 
 class CurlFD(ExternalFD):
     AVAILABLE_OPT = '-V'
@@ -243,37 +240,61 @@ def _make_cmd(self, tmpfilename, info_dict):
 
 class Aria2cFD(ExternalFD):
     AVAILABLE_OPT = '-v'
-    SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'frag_urls')
+    SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls')
+
+    @staticmethod
+    def supports_manifest(manifest):
+        UNSUPPORTED_FEATURES = [
+            r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [1]
+            # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
+        ]
+        check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
+        return all(check_results)
 
     def _make_cmd(self, tmpfilename, info_dict):
-        cmd = [self.exe, '-c']
-        dn = os.path.dirname(tmpfilename)
-        if 'url_list' not in info_dict:
-            cmd += ['--out', os.path.basename(tmpfilename)]
-        verbose_level_args = ['--console-log-level=warn', '--summary-interval=0']
-        cmd += self._configuration_args(['--file-allocation=none', '-x16', '-j16', '-s16'] + verbose_level_args)
-        if dn:
-            cmd += ['--dir', dn]
+        cmd = [self.exe, '-c',
+               '--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
+               '--file-allocation=none', '-x16', '-j16', '-s16']
+        if 'fragments' in info_dict:
+            cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
+        else:
+            cmd += ['--min-split-size', '1M']
+
         if info_dict.get('http_headers') is not None:
             for key, val in info_dict['http_headers'].items():
                 cmd += ['--header', '%s: %s' % (key, val)]
+        cmd += self._option('--max-overall-download-limit', 'ratelimit')
         cmd += self._option('--interface', 'source_address')
         cmd += self._option('--all-proxy', 'proxy')
         cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
         cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
+        cmd += self._configuration_args()
+
+        # aria2c strips out spaces from the beginning/end of filenames and paths.
+        # We work around this issue by adding a "./" to the beginning of the
+        # filename and relative path, and adding a "/" at the end of the path.
+        # See: https://github.com/yt-dlp/yt-dlp/issues/276
+        # https://github.com/ytdl-org/youtube-dl/issues/20312
+        # https://github.com/aria2/aria2/issues/1373
+        dn = os.path.dirname(tmpfilename)
+        if dn:
+            if not os.path.isabs(dn):
+                dn = '.%s%s' % (os.path.sep, dn)
+            cmd += ['--dir', dn + os.path.sep]
+        if 'fragments' not in info_dict:
+            cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))]
         cmd += ['--auto-file-renaming=false']
-        if 'url_list' in info_dict:
-            cmd += verbose_level_args
-            cmd += ['--uri-selector', 'inorder', '--download-result=hide']
+
+        if 'fragments' in info_dict:
+            cmd += ['--file-allocation=none', '--uri-selector=inorder']
             url_list_file = '%s.frag.urls' % tmpfilename
             url_list = []
-            for [i, url] in enumerate(info_dict['url_list']):
-                tmpsegmentname = '%s_%s.frag' % (os.path.basename(tmpfilename), i)
-                url_list.append('%s\n\tout=%s' % (url, tmpsegmentname))
+            for frag_index, fragment in enumerate(info_dict['fragments']):
+                fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
+                url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename))
             stream, _ = sanitize_open(url_list_file, 'wb')
             stream.write('\n'.join(url_list).encode('utf-8'))
             stream.close()
-
             cmd += ['-i', url_list_file]
         else:
             cmd += ['--', info_dict['url']]
@@ -281,9 +302,11 @@ def _make_cmd(self, tmpfilename, info_dict):
 
 
 class HttpieFD(ExternalFD):
+    AVAILABLE_OPT = '--version'
+
     @classmethod
-    def available(cls):
-        return check_executable('http', ['--version'])
+    def available(cls, path=None):
+        return ExternalFD.available(cls, path or 'http')
 
     def _make_cmd(self, tmpfilename, info_dict):
         cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
@@ -295,14 +318,30 @@ def _make_cmd(self, tmpfilename, info_dict):
 
 
 class FFmpegFD(ExternalFD):
-    SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
+    SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments')
+    can_download_to_stdout = True
 
     @classmethod
-    def available(cls):
+    def available(cls, path=None):
+        # TODO: Fix path for ffmpeg
+        # Fixme: This may be wrong when --ffmpeg-location is used
         return FFmpegPostProcessor().available
 
+    def on_process_started(self, proc, stdin):
+        """ Override this in subclasses  """
+        pass
+
+    @classmethod
+    def can_merge_formats(cls, info_dict, params):
+        return (
+            info_dict.get('requested_formats')
+            and info_dict.get('protocol')
+            and not params.get('allow_unplayable_formats')
+            and 'no-direct-merge' not in params.get('compat_opts', [])
+            and cls.can_download(info_dict))
+
     def _call_downloader(self, tmpfilename, info_dict):
-        url = info_dict['url']
+        urls = [f['url'] for f in info_dict.get('requested_formats', [])] or [info_dict['url']]
         ffpp = FFmpegPostProcessor(downloader=self)
         if not ffpp.available:
             self.report_error('m3u8 download detected but ffmpeg could not be found. Please install')
@@ -315,7 +354,12 @@ def _call_downloader(self, tmpfilename, info_dict):
             if self.params.get(log_level, False):
                 args += ['-loglevel', log_level]
                 break
+        if not self.params.get('verbose'):
+            args += ['-hide_banner']
 
+        args += info_dict.get('_ffmpeg_args', [])
+
+        # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead
         seekable = info_dict.get('_seekable')
         if seekable is not None:
             # setting -seekable prevents ffmpeg from guessing if the server
@@ -325,8 +369,6 @@ def _call_downloader(self, tmpfilename, info_dict):
             # http://trac.ffmpeg.org/ticket/6125#comment:10
             args += ['-seekable', '1' if seekable else '0']
 
-        args += self._configuration_args()
-
         # start_time = info_dict.get('start_time') or 0
         # if start_time:
         #     args += ['-ss', compat_str(start_time)]
@@ -334,7 +376,7 @@ def _call_downloader(self, tmpfilename, info_dict):
         # if end_time:
         #     args += ['-t', compat_str(end_time - start_time)]
 
-        if info_dict.get('http_headers') is not None and re.match(r'^https?://', url):
+        if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]):
             # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
             # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
             headers = handle_youtubedl_headers(info_dict['http_headers'])
@@ -392,11 +434,20 @@ def _call_downloader(self, tmpfilename, info_dict):
             elif isinstance(conn, compat_str):
                 args += ['-rtmp_conn', conn]
 
-        args += ['-i', url, '-c', 'copy']
+        for i, url in enumerate(urls):
+            args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url]
+
+        args += ['-c', 'copy']
+        if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
+            for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]):
+                stream_number = fmt.get('manifest_stream_number', 0)
+                a_or_v = 'a' if fmt.get('acodec') != 'none' else 'v'
+                args.extend(['-map', f'{i}:{a_or_v}:{stream_number}'])
 
         if self.params.get('test', False):
             args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
 
+        ext = info_dict['ext']
         if protocol in ('m3u8', 'm3u8_native'):
             use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts')
             if use_mpegts is None:
@@ -409,15 +460,20 @@ def _call_downloader(self, tmpfilename, info_dict):
                     args += ['-bsf:a', 'aac_adtstoasc']
         elif protocol == 'rtmp':
             args += ['-f', 'flv']
+        elif ext == 'mp4' and tmpfilename == '-':
+            args += ['-f', 'mpegts']
         else:
-            args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
+            args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
+
+        args += self._configuration_args(('_o1', '_o', ''))
 
         args = [encodeArgument(opt) for opt in args]
         args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
-
         self._debug_cmd(args)
 
         proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
+        if url in ('-', 'pipe:'):
+            self.on_process_started(proc, proc.stdin)
         try:
             retval = proc.wait()
         except BaseException as e:
@@ -426,7 +482,7 @@ def _call_downloader(self, tmpfilename, info_dict):
             # produces a file that is playable (this is mostly useful for live
             # streams). Note that Windows is not affected and produces playable
             # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
-            if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
+            if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
                 process_communicate_or_kill(proc, b'q')
             else:
                 proc.kill()
@@ -442,7 +498,7 @@ class AVconvFD(FFmpegFD):
 _BY_NAME = dict(
     (klass.get_basename(), klass)
     for name, klass in globals().items()
-    if name.endswith('FD') and name != 'ExternalFD'
+    if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
 )
 
 
@@ -455,4 +511,4 @@ def get_external_downloader(external_downloader):
         downloader . """
     # Drop .exe extension on Windows
     bn = os.path.splitext(os.path.basename(external_downloader))[0]
-    return _BY_NAME[bn]
+    return _BY_NAME.get(bn)