[fragment] Merge during download for `-N`, and refactor `hls`/`dash` (#364)

author pukkandan <redacted>

Mon, 21 Jun 2021 18:59:50 +0000 (00:29 +0530)

committer GitHub <redacted>

Mon, 21 Jun 2021 18:59:50 +0000 (00:29 +0530)
author pukkandan <redacted>
Mon, 21 Jun 2021 18:59:50 +0000 (00:29 +0530)
committer GitHub <redacted>
Mon, 21 Jun 2021 18:59:50 +0000 (00:29 +0530)
diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py

index 3982941767cee45ad9dbf42b79f00adf52d156d3..90c7a3ace25d9495f6ab2bc1103685147e5e7786 100644 (file)
--- a/yt_dlp/downloader/dash.py
+++ b/yt_dlp/downloader/dash.py
@@ -1,21 +1,9 @@
  from __future__ import unicode_literals
  
-import errno
-try:
-    import concurrent.futures
-    can_threaded_download = True
-except ImportError:
-    can_threaded_download = False
-
  from ..downloader import _get_real_downloader
  from .fragment import FragmentFD
  
-from ..compat import compat_urllib_error
-from ..utils import (
-    DownloadError,
-    sanitize_open,
-    urljoin,
-)
+from ..utils import urljoin
  
  
  class DashSegmentsFD(FragmentFD):
@@ -43,9 +31,6 @@ def real_download(self, filename, info_dict):
          else:
              self._prepare_and_start_frag_download(ctx)
  
-        fragment_retries = self.params.get('fragment_retries', 0)
-        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
-
          fragments_to_download = []
          frag_index = 0
          for i, fragment in enumerate(fragments):
@@ -76,116 +61,5 @@ def real_download(self, filename, info_dict):
              if not success:
                  return False
          else:
-            def download_fragment(fragment):
-                i = fragment['index']
-                frag_index = fragment['frag_index']
-                fragment_url = fragment['url']
-
-                ctx['fragment_index'] = frag_index
-
-                # In DASH, the first segment contains necessary headers to
-                # generate a valid MP4 file, so always abort for the first segment
-                fatal = i == 0 or not skip_unavailable_fragments
-                count = 0
-                while count <= fragment_retries:
-                    try:
-                        success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
-                        if not success:
-                            return False, frag_index
-                        break
-                    except compat_urllib_error.HTTPError as err:
-                        # YouTube may often return 404 HTTP error for a fragment causing the
-                        # whole download to fail. However if the same fragment is immediately
-                        # retried with the same request data this usually succeeds (1-2 attempts
-                        # is usually enough) thus allowing to download the whole file successfully.
-                        # To be future-proof we will retry all fragments that fail with any
-                        # HTTP error.
-                        count += 1
-                        if count <= fragment_retries:
-                            self.report_retry_fragment(err, frag_index, count, fragment_retries)
-                    except DownloadError:
-                        # Don't retry fragment if error occurred during HTTP downloading
-                        # itself since it has own retry settings
-                        if not fatal:
-                            break
-                        raise
-
-                if count > fragment_retries:
-                    if not fatal:
-                        return False, frag_index
-                    ctx['dest_stream'].close()
-                    self.report_error('Giving up after %s fragment retries' % fragment_retries)
-                    return False, frag_index
-
-                return frag_content, frag_index
-
-            def append_fragment(frag_content, frag_index):
-                fatal = frag_index == 1 or not skip_unavailable_fragments
-                if frag_content:
-                    fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
-                    try:
-                        file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
-                        ctx['fragment_filename_sanitized'] = frag_sanitized
-                        file.close()
-                        self._append_fragment(ctx, frag_content)
-                        return True
-                    except EnvironmentError as ose:
-                        if ose.errno != errno.ENOENT:
-                            raise
-                        # FileNotFoundError
-                        if not fatal:
-                            self.report_skip_fragment(frag_index)
-                            return True
-                        else:
-                            ctx['dest_stream'].close()
-                            self.report_error(
-                                'fragment %s not found, unable to continue' % frag_index)
-                            return False
-                else:
-                    if not fatal:
-                        self.report_skip_fragment(frag_index)
-                        return True
-                    else:
-                        ctx['dest_stream'].close()
-                        self.report_error(
-                            'fragment %s not found, unable to continue' % frag_index)
-                        return False
-
-            max_workers = self.params.get('concurrent_fragment_downloads', 1)
-            if can_threaded_download and max_workers > 1:
-                self.report_warning('The download speed shown is only of one thread. This is a known issue')
-                _download_fragment = lambda f: (f, download_fragment(f)[1])
-                with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
-                    futures = [pool.submit(_download_fragment, fragment) for fragment in fragments_to_download]
-                    # timeout must be 0 to return instantly
-                    done, not_done = concurrent.futures.wait(futures, timeout=0)
-                    try:
-                        while not_done:
-                            # Check every 1 second for KeyboardInterrupt
-                            freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
-                            done |= freshly_done
-                    except KeyboardInterrupt:
-                        for future in not_done:
-                            future.cancel()
-                        # timeout must be none to cancel
-                        concurrent.futures.wait(not_done, timeout=None)
-                        raise KeyboardInterrupt
-
-                for fragment, frag_index in map(lambda x: x.result(), futures):
-                    fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
-                    down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
-                    fragment['fragment_filename_sanitized'] = frag_sanitized
-                    frag_content = down.read()
-                    down.close()
-                    result = append_fragment(frag_content, frag_index)
-                    if not result:
-                        return False
-            else:
-                for fragment in fragments_to_download:
-                    frag_content, frag_index = download_fragment(fragment)
-                    result = append_fragment(frag_content, frag_index)
-                    if not result:
-                        return False
-
-            self._finish_frag_download(ctx)
+            self.download_and_append_fragments(ctx, fragments_to_download, info_dict)
          return True
diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py

index 6d6d28483fde47107d1a76ad64bfcc7892538002..a530484b5643ee01d49e3b61038908bbb62bb28b 100644 (file)
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@@ -4,9 +4,26 @@
  import time
  import json
  
+try:
+    from Crypto.Cipher import AES
+    can_decrypt_frag = True
+except ImportError:
+    can_decrypt_frag = False
+
+try:
+    import concurrent.futures
+    can_threaded_download = True
+except ImportError:
+    can_threaded_download = False
+
  from .common import FileDownloader
  from .http import HttpFD
+from ..compat import (
+    compat_urllib_error,
+    compat_struct_pack,
+)
  from ..utils import (
+    DownloadError,
      error_to_compat_str,
      encodeFilename,
      sanitize_open,
@@ -56,7 +73,7 @@ class FragmentFD(FileDownloader):
  
      def report_retry_fragment(self, err, frag_index, count, retries):
          self.to_screen(
-            '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...'
+            '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...'
              % (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
  
      def report_skip_fragment(self, frag_index):
@@ -112,11 +129,15 @@ def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_dat
              return False, None
          if fragment_info_dict.get('filetime'):
              ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
-        down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
+        ctx['fragment_filename_sanitized'] = fragment_filename
+        return True, self._read_fragment(ctx)
+
+    def _read_fragment(self, ctx):
+        down, frag_sanitized = sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
          ctx['fragment_filename_sanitized'] = frag_sanitized
          frag_content = down.read()
          down.close()
-        return True, frag_content
+        return frag_content
  
      def _append_fragment(self, ctx, frag_content):
          try:
@@ -304,3 +325,106 @@ def _prepare_external_frag_download(self, ctx):
              'tmpfilename': tmpfilename,
              'fragment_index': 0,
          })
+
+    def download_and_append_fragments(self, ctx, fragments, info_dict, pack_func=None):
+        fragment_retries = self.params.get('fragment_retries', 0)
+        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+        test = self.params.get('test', False)
+        if not pack_func:
+            pack_func = lambda frag_content, _: frag_content
+
+        def download_fragment(fragment, ctx):
+            frag_index = ctx['fragment_index'] = fragment['frag_index']
+            headers = info_dict.get('http_headers', {})
+            byte_range = fragment.get('byte_range')
+            if byte_range:
+                headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
+
+            # Never skip the first fragment
+            fatal = (fragment.get('index') or frag_index) == 0 or not skip_unavailable_fragments
+            count, frag_content = 0, None
+            while count <= fragment_retries:
+                try:
+                    success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers)
+                    if not success:
+                        return False, frag_index
+                    break
+                except compat_urllib_error.HTTPError as err:
+                    # Unavailable (possibly temporary) fragments may be served.
+                    # First we try to retry then either skip or abort.
+                    # See https://github.com/ytdl-org/youtube-dl/issues/10165,
+                    # https://github.com/ytdl-org/youtube-dl/issues/10448).
+                    count += 1
+                    if count <= fragment_retries:
+                        self.report_retry_fragment(err, frag_index, count, fragment_retries)
+                except DownloadError:
+                    # Don't retry fragment if error occurred during HTTP downloading
+                    # itself since it has own retry settings
+                    if not fatal:
+                        break
+                    raise
+
+            if count > fragment_retries:
+                if not fatal:
+                    return False, frag_index
+                ctx['dest_stream'].close()
+                self.report_error('Giving up after %s fragment retries' % fragment_retries)
+                return False, frag_index
+            return frag_content, frag_index
+
+        def decrypt_fragment(fragment, frag_content):
+            decrypt_info = fragment.get('decrypt_info')
+            if not decrypt_info or decrypt_info['METHOD'] != 'AES-128':
+                return frag_content
+            iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence'])
+            decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
+                self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
+            # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
+            # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
+            # not what it decrypts to.
+            if test:
+                return frag_content
+            return AES.new(decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
+
+        def append_fragment(frag_content, frag_index, ctx):
+            if not frag_content:
+                fatal = frag_index == 1 or not skip_unavailable_fragments
+                if not fatal:
+                    self.report_skip_fragment(frag_index)
+                    return True
+                else:
+                    ctx['dest_stream'].close()
+                    self.report_error(
+                        'fragment %s not found, unable to continue' % frag_index)
+                    return False
+            self._append_fragment(ctx, pack_func(frag_content, frag_index))
+            return True
+
+        max_workers = self.params.get('concurrent_fragment_downloads', 1)
+        if can_threaded_download and max_workers > 1:
+
+            def _download_fragment(fragment):
+                try:
+                    ctx_copy = ctx.copy()
+                    frag_content, frag_index = download_fragment(fragment, ctx_copy)
+                    return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
+                except Exception:
+                    # Return immediately on exception so that it is raised in the main thread
+                    return
+
+            self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
+            with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
+                for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments):
+                    ctx['fragment_filename_sanitized'] = frag_filename
+                    ctx['fragment_index'] = frag_index
+                    result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
+                    if not result:
+                        return False
+        else:
+            for fragment in fragments:
+                frag_content, frag_index = download_fragment(fragment, ctx)
+                result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
+                if not result:
+                    return False
+
+        self._finish_frag_download(ctx)
diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py

index 2c7f235d4b7a5e8d3f6da384c73c899dd1f09df4..a3cd18b77aac605f218988589b82828a93cda548 100644 (file)
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@@ -1,32 +1,18 @@
  from __future__ import unicode_literals
  
-import errno
  import re
  import io
  import binascii
-try:
-    from Crypto.Cipher import AES
-    can_decrypt_frag = True
-except ImportError:
-    can_decrypt_frag = False
-try:
-    import concurrent.futures
-    can_threaded_download = True
-except ImportError:
-    can_threaded_download = False
  
  from ..downloader import _get_real_downloader
-from .fragment import FragmentFD
+from .fragment import FragmentFD, can_decrypt_frag
  from .external import FFmpegFD
  
  from ..compat import (
-    compat_urllib_error,
      compat_urlparse,
-    compat_struct_pack,
  )
  from ..utils import (
      parse_m3u8_attributes,
-    sanitize_open,
      update_url_query,
      bug_reports_message,
  )
@@ -151,10 +137,6 @@ def is_ad_fragment_end(s):
  
          extra_state = ctx.setdefault('extra_state', {})
  
-        fragment_retries = self.params.get('fragment_retries', 0)
-        skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
-        test = self.params.get('test', False)
-
          format_index = info_dict.get('format_index')
          extra_query = None
          extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
@@ -258,7 +240,7 @@ def is_ad_fragment_end(s):
                  media_sequence += 1
  
          # We only download the first fragment during the test
-        if test:
+        if self.params.get('test', False):
              fragments = [fragments[0] if fragments else None]
  
          if real_downloader:
@@ -272,55 +254,6 @@ def is_ad_fragment_end(s):
              if not success:
                  return False
          else:
-            def decrypt_fragment(fragment, frag_content):
-                decrypt_info = fragment['decrypt_info']
-                if decrypt_info['METHOD'] != 'AES-128':
-                    return frag_content
-                iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence'])
-                decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
-                    self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
-                # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
-                # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
-                # not what it decrypts to.
-                if test:
-                    return frag_content
-                return AES.new(decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
-
-            def download_fragment(fragment):
-                frag_index = fragment['frag_index']
-                frag_url = fragment['url']
-                byte_range = fragment['byte_range']
-
-                ctx['fragment_index'] = frag_index
-
-                count = 0
-                headers = info_dict.get('http_headers', {})
-                if byte_range:
-                    headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
-                while count <= fragment_retries:
-                    try:
-                        success, frag_content = self._download_fragment(
-                            ctx, frag_url, info_dict, headers)
-                        if not success:
-                            return False, frag_index
-                        break
-                    except compat_urllib_error.HTTPError as err:
-                        # Unavailable (possibly temporary) fragments may be served.
-                        # First we try to retry then either skip or abort.
-                        # See https://github.com/ytdl-org/youtube-dl/issues/10165,
-                        # https://github.com/ytdl-org/youtube-dl/issues/10448).
-                        count += 1
-                        if count <= fragment_retries:
-                            self.report_retry_fragment(err, frag_index, count, fragment_retries)
-                if count > fragment_retries:
-                    ctx['dest_stream'].close()
-                    self.report_error('Giving up after %s fragment retries' % fragment_retries)
-                    return False, frag_index
-
-                return decrypt_fragment(fragment, frag_content), frag_index
-
-            pack_fragment = lambda frag_content, _: frag_content
-
              if is_webvtt:
                  def pack_fragment(frag_content, frag_index):
                      output = io.StringIO()
@@ -388,75 +321,7 @@ def pack_fragment(frag_content, frag_index):
                          block.write_into(output)
  
                      return output.getvalue().encode('utf-8')
-
-            def append_fragment(frag_content, frag_index):
-                fatal = frag_index == 1 or not skip_unavailable_fragments
-                if frag_content:
-                    fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
-                    try:
-                        file, frag_sanitized = sanitize_open(fragment_filename, 'rb')
-                        ctx['fragment_filename_sanitized'] = frag_sanitized
-                        file.close()
-                        frag_content = pack_fragment(frag_content, frag_index)
-                        self._append_fragment(ctx, frag_content)
-                        return True
-                    except EnvironmentError as ose:
-                        if ose.errno != errno.ENOENT:
-                            raise
-                        # FileNotFoundError
-                        if not fatal:
-                            self.report_skip_fragment(frag_index)
-                            return True
-                        else:
-                            ctx['dest_stream'].close()
-                            self.report_error(
-                                'fragment %s not found, unable to continue' % frag_index)
-                            return False
-                else:
-                    if not fatal:
-                        self.report_skip_fragment(frag_index)
-                        return True
-                    else:
-                        ctx['dest_stream'].close()
-                        self.report_error(
-                            'fragment %s not found, unable to continue' % frag_index)
-                        return False
-
-            max_workers = self.params.get('concurrent_fragment_downloads', 1)
-            if can_threaded_download and max_workers > 1:
-                self.report_warning('The download speed shown is only of one thread. This is a known issue')
-                _download_fragment = lambda f: (f, download_fragment(f)[1])
-                with concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
-                    futures = [pool.submit(_download_fragment, fragment) for fragment in fragments]
-                    # timeout must be 0 to return instantly
-                    done, not_done = concurrent.futures.wait(futures, timeout=0)
-                    try:
-                        while not_done:
-                            # Check every 1 second for KeyboardInterrupt
-                            freshly_done, not_done = concurrent.futures.wait(not_done, timeout=1)
-                            done |= freshly_done
-                    except KeyboardInterrupt:
-                        for future in not_done:
-                            future.cancel()
-                        # timeout must be none to cancel
-                        concurrent.futures.wait(not_done, timeout=None)
-                        raise KeyboardInterrupt
-
-                for fragment, frag_index in map(lambda x: x.result(), futures):
-                    fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], frag_index)
-                    down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
-                    fragment['fragment_filename_sanitized'] = frag_sanitized
-                    frag_content = down.read()
-                    down.close()
-                    result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index)
-                    if not result:
-                        return False
              else:
-                for fragment in fragments:
-                    frag_content, frag_index = download_fragment(fragment)
-                    result = append_fragment(frag_content, frag_index)
-                    if not result:
-                        return False
-
-            self._finish_frag_download(ctx)
+                pack_fragment = None
+            self.download_and_append_fragments(ctx, fragments, info_dict, pack_fragment)
          return True
author	pukkandan <redacted>
	Mon, 21 Jun 2021 18:59:50 +0000 (00:29 +0530)
committer	GitHub <redacted>
	Mon, 21 Jun 2021 18:59:50 +0000 (00:29 +0530)
yt_dlp/downloader/dash.py		patch \| blob \| blame \| history
yt_dlp/downloader/fragment.py		patch \| blob \| blame \| history
yt_dlp/downloader/hls.py		patch \| blob \| blame \| history