Add experimental option `--check-formats` to test the URLs before format selection

author pukkandan <redacted>

Tue, 4 May 2021 15:54:00 +0000 (21:24 +0530)

committer pukkandan <redacted>

Thu, 6 May 2021 15:20:44 +0000 (20:50 +0530)
author pukkandan <redacted>
Tue, 4 May 2021 15:54:00 +0000 (21:24 +0530)
committer pukkandan <redacted>
Thu, 6 May 2021 15:20:44 +0000 (20:50 +0530)
diff --git a/README.md b/README.md

index 5ae596da7017d1bf95dc5c6dbd5bbe25d3737be0..2c231f847f9ec190ad79d934536e438fa13d325c 100644 (file)
--- a/README.md
+++ b/README.md
@@ -577,6 +577,8 @@ ## Video Format Options:
                                       containers irrespective of quality
      --no-prefer-free-formats         Don't give any special preference to free
                                       containers (default)
+    --check-formats                  Check that the formats selected are
+                                     actually downloadable (Experimental)
      -F, --list-formats               List all available formats of requested
                                       videos
      --list-formats-as-table          Present the output of -F in tabular form
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 79ba3ef93442106e23ea60233afed1d01a7dd583..c710f75549a9e13f77a44552f5c09e17dcaab54f 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -84,6 +84,7 @@
      PostProcessingError,
      preferredencoding,
      prepend_extension,
+    random_uuidv4,
      register_socks_protocols,
      render_table,
      replace_extension,
@@ -1521,6 +1522,8 @@ def syntax_error(note, start):
          allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
                                    'video': self.params.get('allow_multiple_video_streams', False)}
  
+        check_formats = self.params.get('check_formats')
+
          def _parse_filter(tokens):
              filter_parts = []
              for type, string, start, _, _ in tokens:
@@ -1678,6 +1681,22 @@ def _merge(formats_pair):
  
              return new_dict
  
+        def _check_formats(formats):
+            for f in formats:
+                self.to_screen('[info] Testing format %s' % f['format_id'])
+                paths = self.params.get('paths', {})
+                temp_file = os.path.join(
+                    expand_path(paths.get('home', '').strip()),
+                    expand_path(paths.get('temp', '').strip()),
+                    'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
+                dl, _ = self.dl(temp_file, f, test=True)
+                if os.path.exists(temp_file):
+                    os.remove(temp_file)
+                if dl:
+                    yield f
+                else:
+                    self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
+
          def _build_selector_function(selector):
              if isinstance(selector, list):  # ,
                  fs = [_build_selector_function(s) for s in selector]
@@ -1708,12 +1727,13 @@ def selector_function(ctx):
                  if format_spec == 'all':
                      def selector_function(ctx):
                          formats = list(ctx['formats'])
-                        if formats:
-                            for f in formats:
-                                yield f
+                        if check_formats:
+                            formats = _check_formats(formats)
+                        for f in formats:
+                            yield f
                  elif format_spec == 'mergeall':
                      def selector_function(ctx):
-                        formats = list(ctx['formats'])
+                        formats = list(_check_formats(ctx['formats']))
                          if not formats:
                              return
                          merged_format = formats[-1]
@@ -1722,13 +1742,13 @@ def selector_function(ctx):
                          yield merged_format
  
                  else:
-                    format_fallback = False
+                    format_fallback, format_reverse, format_idx = False, True, 1
                      mobj = re.match(
                          r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
                          format_spec)
                      if mobj is not None:
                          format_idx = int_or_none(mobj.group('n'), default=1)
-                        format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx
+                        format_reverse = mobj.group('bw')[0] == 'b'
                          format_type = (mobj.group('type') or [None])[0]
                          not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
                          format_modified = mobj.group('mod') is not None
@@ -1743,7 +1763,6 @@ def selector_function(ctx):
                              if not format_modified  # b, w
                              else None)  # b*, w*
                      else:
-                        format_idx = -1
                          filter_f = ((lambda f: f.get('ext') == format_spec)
                                      if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']  # extension
                                      else (lambda f: f.get('format_id') == format_spec))  # id
@@ -1753,16 +1772,18 @@ def selector_function(ctx):
                          if not formats:
                              return
                          matches = list(filter(filter_f, formats)) if filter_f is not None else formats
-                        n = len(matches)
-                        if -n <= format_idx < n:
-                            yield matches[format_idx]
-                        elif format_fallback and ctx['incomplete_formats']:
+                        if format_fallback and ctx['incomplete_formats'] and not matches:
                              # for extractors with incomplete formats (audio only (soundcloud)
                              # or video only (imgur)) best/worst will fallback to
                              # best/worst {video,audio}-only format
-                            n = len(formats)
-                            if -n <= format_idx < n:
-                                yield formats[format_idx]
+                            matches = formats
+                        if format_reverse:
+                            matches = matches[::-1]
+                        if check_formats:
+                            matches = list(itertools.islice(_check_formats(matches), format_idx))
+                        n = len(matches)
+                        if -n <= format_idx - 1 < n:
+                            yield matches[format_idx - 1]
  
              elif selector.type == MERGE:        # +
                  selector_1, selector_2 = map(_build_selector_function, selector.selector)
@@ -2179,6 +2200,34 @@ def print_optional(field):
              self.post_extract(info_dict)
              self.to_stdout(json.dumps(info_dict, default=repr))
  
+    def dl(self, name, info, subtitle=False, test=False):
+
+        if test:
+            verbose = self.params.get('verbose')
+            params = {
+                'test': True,
+                'quiet': not verbose,
+                'verbose': verbose,
+                'noprogress': not verbose,
+                'nopart': True,
+                'skip_unavailable_fragments': False,
+                'keep_fragments': False,
+                'overwrites': True,
+                '_no_ytdl_file': True,
+            }
+        else:
+            params = self.params
+        fd = get_suitable_downloader(info, params)(self, params)
+        if not test:
+            for ph in self._progress_hooks:
+                fd.add_progress_hook(ph)
+            if self.params.get('verbose'):
+                self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
+        new_info = dict(info)
+        if new_info.get('http_headers') is None:
+            new_info['http_headers'] = self._calc_headers(new_info)
+        return fd.download(name, new_info, subtitle)
+
      def process_info(self, info_dict):
          """Process a single resolved IE result."""
  
@@ -2264,17 +2313,6 @@ def process_info(self, info_dict):
                      self.report_error('Cannot write annotations file: ' + annofn)
                      return
  
-        def dl(name, info, subtitle=False):
-            fd = get_suitable_downloader(info, self.params)(self, self.params)
-            for ph in self._progress_hooks:
-                fd.add_progress_hook(ph)
-            if self.params.get('verbose'):
-                self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
-            new_info = dict(info)
-            if new_info.get('http_headers') is None:
-                new_info['http_headers'] = self._calc_headers(new_info)
-            return fd.download(name, new_info, subtitle)
-
          subtitles_are_requested = any([self.params.get('writesubtitles', False),
                                         self.params.get('writeautomaticsub')])
  
@@ -2307,7 +2345,7 @@ def dl(name, info, subtitle=False):
                              return
                      else:
                          try:
-                            dl(sub_filename, sub_info.copy(), subtitle=True)
+                            self.dl(sub_filename, sub_info.copy(), subtitle=True)
                              sub_info['filepath'] = sub_filename
                              files_to_move[sub_filename] = sub_filename_final
                          except tuple([ExtractorError, IOError, OSError, ValueError] + network_exceptions) as err:
@@ -2493,7 +2531,7 @@ def correct_ext(filename):
                              if not self._ensure_dir_exists(fname):
                                  return
                              downloaded.append(fname)
-                            partial_success, real_download = dl(fname, new_info)
+                            partial_success, real_download = self.dl(fname, new_info)
                              info_dict['__real_download'] = info_dict['__real_download'] or real_download
                              success = success and partial_success
                          if merger.available and not self.params.get('allow_unplayable_formats'):
@@ -2508,7 +2546,7 @@ def correct_ext(filename):
                      # Just a single file
                      dl_filename = existing_file(full_filename, temp_filename)
                      if dl_filename is None:
-                        success, real_download = dl(temp_filename, info_dict)
+                        success, real_download = self.dl(temp_filename, info_dict)
                          info_dict['__real_download'] = real_download
  
                  dl_filename = dl_filename or temp_filename
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py

index e6e5687806d492ff0b80d864cfc90c0530e15e60..c88cca7ae6dc7da068f9a03a721466d363d8ea70 100644 (file)
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -471,6 +471,7 @@ def report_args_compat(arg, name):
          'format_sort_force': opts.format_sort_force,
          'allow_multiple_video_streams': opts.allow_multiple_video_streams,
          'allow_multiple_audio_streams': opts.allow_multiple_audio_streams,
+        'check_formats': opts.check_formats,
          'listformats': opts.listformats,
          'listformats_table': opts.listformats_table,
          'outtmpl': outtmpl,
diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py

index fadd0dfc5f283aa929a25c3ea0f1d83eb5bc41a3..6d6d28483fde47107d1a76ad64bfcc7892538002 100644 (file)
--- a/yt_dlp/downloader/fragment.py
+++ b/yt_dlp/downloader/fragment.py
@@ -31,6 +31,7 @@ class FragmentFD(FileDownloader):
                          Skip unavailable fragments (DASH and hlsnative only)
      keep_fragments:     Keep downloaded fragments on disk after downloading is
                          finished
+    _no_ytdl_file:      Don't use .ytdl file
  
      For each incomplete fragment download yt-dlp keeps on disk a special
      bookkeeping file with download state and metadata (in future such files will
@@ -69,9 +70,8 @@ def _prepare_and_start_frag_download(self, ctx):
          self._prepare_frag_download(ctx)
          self._start_frag_download(ctx)
  
-    @staticmethod
-    def __do_ytdl_file(ctx):
-        return not ctx['live'] and not ctx['tmpfilename'] == '-'
+    def __do_ytdl_file(self, ctx):
+        return not ctx['live'] and not ctx['tmpfilename'] == '-' and not self.params.get('_no_ytdl_file')
  
      def _read_ytdl_file(self, ctx):
          assert 'ytdl_corrupt' not in ctx
diff --git a/yt_dlp/options.py b/yt_dlp/options.py

index a3fd9d122e4308c0a2b4376d6ea2d8ab48465473..7505e8c789ab15c98ce3a541de0feab1b38fbaf3 100644 (file)
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -502,6 +502,10 @@ def _dict_from_multiple_values_options_callback(
          '--no-prefer-free-formats',
          action='store_true', dest='prefer_free_formats', default=False,
          help="Don't give any special preference to free containers (default)")
+    video_format.add_option(
+        '--check-formats',
+        action='store_true', dest='check_formats', default=False,
+        help="Check that the formats selected are actually downloadable (Experimental)")
      video_format.add_option(
          '-F', '--list-formats',
          action='store_true', dest='listformats',
author	pukkandan <redacted>
	Tue, 4 May 2021 15:54:00 +0000 (21:24 +0530)
committer	pukkandan <redacted>
	Thu, 6 May 2021 15:20:44 +0000 (20:50 +0530)
README.md		patch \| blob \| blame \| history
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/__init__.py		patch \| blob \| blame \| history
yt_dlp/downloader/fragment.py		patch \| blob \| blame \| history
yt_dlp/options.py		patch \| blob \| blame \| history