Add field `webpage_url_domain`

[yt-dlp.git] / yt_dlp / YoutubeDL.py
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 4a9f4775bf6bb9457b09ba4486d866416b217f17..39fd2e1fd7ab527241806607b1b31dba03fb879d 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -70,10 +70,12 @@
      format_field,
      formatSeconds,
      GeoRestrictedError,
+    get_domain,
      HEADRequest,
      int_or_none,
      iri_to_uri,
      ISO3166Utils,
+    join_nonempty,
      LazyList,
      LINK_TEMPLATES,
      locked_file,
@@ -92,8 +94,10 @@
      PostProcessingError,
      preferredencoding,
      prepend_extension,
+    ReExtractInfo,
      register_socks_protocols,
      RejectedVideoReached,
+    remove_terminal_sequences,
      render_table,
      replace_extension,
      SameFileError,
@@ -108,7 +112,7 @@
      strftime_or_none,
      subtitles_filename,
      supports_terminal_sequences,
-    ThrottledDownload,
+    timetuple_from_msec,
      to_high_limit_path,
      traverse_obj,
      try_get,
@@ -151,7 +155,7 @@
      _PLUGIN_CLASSES as plugin_postprocessors
  )
  from .update import detect_variant
-from .version import __version__
+from .version import __version__, RELEASE_GIT_HEAD
  
  if compat_os_name == 'nt':
      import ctypes
@@ -210,6 +214,9 @@ class YoutubeDL(object):
      simulate:          Do not download the video files. If unset (or None),
                         simulate only if listsubtitles, listformats or list_thumbnails is used
      format:            Video format code. see "FORMAT SELECTION" for more details.
+                       You can also pass a function. The function takes 'ctx' as
+                       argument and returns the formats to download.
+                       See "build_format_selector" for an implementation
      allow_unplayable_formats:   Allow unplayable formats to be extracted and downloaded.
      ignore_no_formats_error: Ignore "No video formats" error. Usefull for
                         extracting metadata even if the video is not actually
@@ -306,6 +313,8 @@ class YoutubeDL(object):
                         file that is in the archive.
      break_on_reject:   Stop the download process when encountering a video that
                         has been filtered out.
+    break_per_url:     Whether break_on_reject and break_on_existing
+                       should act on each input URL as opposed to for the entire queue
      cookiefile:        File name where cookies should be read from and dumped to
      cookiesfrombrowser: A tuple containing the name of the browser and the profile
                         name/path from where cookies are loaded.
@@ -320,13 +329,16 @@ class YoutubeDL(object):
      bidi_workaround:   Work around buggy terminals without bidirectional text
                         support, using fridibi
      debug_printtraffic:Print out sent and received HTTP traffic
-    include_ads:       Download ads as well
+    include_ads:       Download ads as well (deprecated)
      default_search:    Prepend this string if an input url is not valid.
                         'auto' for elaborate guessing
      encoding:          Use this encoding instead of the system-specified.
      extract_flat:      Do not resolve URLs, return the immediate result.
                         Pass in 'in_playlist' to only show this behavior for
                         playlist items.
+    wait_for_video:    If given, wait for scheduled streams to become available.
+                       The value should be a tuple containing the range
+                       (min_secs, max_secs) to wait between retries
      postprocessors:    A list of dictionaries, each with an entry
                         * key:  The name of the postprocessor. See
                                 yt_dlp/postprocessor/__init__.py for a list.
@@ -373,8 +385,7 @@ class YoutubeDL(object):
                         (with status "started" and "finished") if the processing is successful.
      merge_output_format: Extension to use when merging formats.
      final_ext:         Expected final extension; used to detect when the file was
-                       already downloaded and converted. "merge_output_format" is
-                       replaced by this extension when given
+                       already downloaded and converted
      fixup:             Automatically correct known faults of the file.
                         One of:
                         - "never": do nothing
@@ -428,7 +439,7 @@ class YoutubeDL(object):
      compat_opts:       Compatibility options. See "Differences in default behavior".
                         The following options do not work when used through the API:
                         filename, abort-on-error, multistreams, no-live-chat, format-sort
-                       no-clean-infojson, no-playlist-metafiles, no-keep-subs.
+                       no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json.
                         Refer __init__.py for their implementation
      progress_template: Dictionary of templates for progress outputs.
                         Allowed keys are 'download', 'postprocess',
@@ -440,7 +451,7 @@ class YoutubeDL(object):
      nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize,
      max_filesize, test, noresizebuffer, retries, fragment_retries, continuedl,
      noprogress, xattr_set_filesize, hls_use_mpegts, http_chunk_size,
-    external_downloader_args.
+    external_downloader_args, concurrent_fragment_downloads.
  
      The following options are used by the post processors:
      prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
@@ -525,7 +536,6 @@ def __init__(self, params=None, auto_init=True):
          self.cache = Cache(self)
  
          windows_enable_vt_mode()
-        # FIXME: This will break if we ever print color to stdout
          self._allow_colors = {
              'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file),
              'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file),
@@ -558,6 +568,8 @@ def check_deprecated(param, option, suggestion):
  
          for msg in self.params.get('_warnings', []):
              self.report_warning(msg)
+        for msg in self.params.get('_deprecation_warnings', []):
+            self.deprecation_warning(msg)
  
          if 'list-formats' in self.params.get('compat_opts', []):
              self.params['listformats_table'] = False
@@ -613,6 +625,7 @@ def check_deprecated(param, option, suggestion):
          # Creating format selector here allows us to catch syntax errors before the extraction
          self.format_selector = (
              None if self.params.get('format') is None
+            else self.params['format'] if callable(self.params['format'])
              else self.build_format_selector(self.params['format']))
  
          self._setup_opener()
@@ -622,18 +635,21 @@ def check_deprecated(param, option, suggestion):
                  self.print_debug_header()
              self.add_default_info_extractors()
  
+        hooks = {
+            'post_hooks': self.add_post_hook,
+            'progress_hooks': self.add_progress_hook,
+            'postprocessor_hooks': self.add_postprocessor_hook,
+        }
+        for opt, fn in hooks.items():
+            for ph in self.params.get(opt, []):
+                fn(ph)
+
          for pp_def_raw in self.params.get('postprocessors', []):
              pp_def = dict(pp_def_raw)
              when = pp_def.pop('when', 'post_process')
-            pp_class = get_postprocessor(pp_def.pop('key'))
-            pp = pp_class(self, **compat_kwargs(pp_def))
-            self.add_post_processor(pp, when=when)
-
-        for ph in self.params.get('post_hooks', []):
-            self.add_post_hook(ph)
-
-        for ph in self.params.get('progress_hooks', []):
-            self.add_progress_hook(ph)
+            self.add_post_processor(
+                get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
+                when=when)
  
          register_socks_protocols()
  
@@ -721,6 +737,9 @@ def add_progress_hook(self, ph):
      def add_postprocessor_hook(self, ph):
          """Add the postprocessing progress hook"""
          self._postprocessor_hooks.append(ph)
+        for pps in self._pps.values():
+            for pp in pps:
+                pp.add_progress_hook(ph)
  
      def _bidi_workaround(self, message):
          if not hasattr(self, '_output_channel'):
@@ -762,6 +781,7 @@ def to_stderr(self, message, only_once=False):
      def to_console_title(self, message):
          if not self.params.get('consoletitle', False):
              return
+        message = remove_terminal_sequences(message)
          if compat_os_name == 'nt':
              if ctypes.windll.kernel32.GetConsoleWindow():
                  # c_wchar_p() might not be necessary if `message` is
@@ -836,30 +856,31 @@ def to_screen(self, message, skip_eol=False):
  
      class Styles(Enum):
          HEADERS = 'yellow'
-        EMPHASIS = 'blue'
+        EMPHASIS = 'light blue'
          ID = 'green'
          DELIM = 'blue'
          ERROR = 'red'
          WARNING = 'yellow'
+        SUPPRESS = 'light black'
  
-    def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
-        assert out in ('screen', 'err')
+    def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
          if test_encoding:
              original_text = text
-            handle = self._screen_file if out == 'screen' else self._err_file
              encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
              text = text.encode(encoding, 'ignore').decode(encoding)
              if fallback is not None and text != original_text:
                  text = fallback
          if isinstance(f, self.Styles):
-            f = f._value_
-        return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
+            f = f.value
+        return format_text(text, f) if allow_colors else text if fallback is None else fallback
  
      def _format_screen(self, *args, **kwargs):
-        return self.__format_text('screen', *args, **kwargs)
+        return self._format_text(
+            self._screen_file, self._allow_colors['screen'], *args, **kwargs)
  
      def _format_err(self, *args, **kwargs):
-        return self.__format_text('err', *args, **kwargs)
+        return self._format_text(
+            self._err_file, self._allow_colors['err'], *args, **kwargs)
  
      def report_warning(self, message, only_once=False):
          '''
@@ -873,6 +894,12 @@ def report_warning(self, message, only_once=False):
                  return
              self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
  
+    def deprecation_warning(self, message):
+        if self.params.get('logger') is not None:
+            self.params['logger'].warning('DeprecationWarning: {message}')
+        else:
+            self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
+
      def report_error(self, message, tb=None):
          '''
          Do the same as trouble, but prefixes the message with 'ERROR:', colored
@@ -1103,22 +1130,23 @@ def create_key(outer_mobj):
  
              value = default if value is None else value
  
+            flags = outer_mobj.group('conversion') or ''
              str_fmt = f'{fmt[:-1]}s'
              if fmt[-1] == 'l':  # list
-                delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', '
+                delim = '\n' if '#' in flags else ', '
                  value, fmt = delim.join(variadic(value)), str_fmt
              elif fmt[-1] == 'j':  # json
-                value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
+                value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt
              elif fmt[-1] == 'q':  # quoted
-                value, fmt = compat_shlex_quote(str(value)), str_fmt
+                value = map(str, variadic(value) if '#' in flags else [value])
+                value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt
              elif fmt[-1] == 'B':  # bytes
                  value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8')
                  value, fmt = value.decode('utf-8', 'ignore'), 's'
              elif fmt[-1] == 'U':  # unicode normalized
-                opts = outer_mobj.group('conversion') or ''
                  value, fmt = unicodedata.normalize(
                      # "+" = compatibility equivalence, "#" = NFD
-                    'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'),
+                    'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
                      value), str_fmt
              elif fmt[-1] == 'c':
                  if value:
@@ -1164,12 +1192,8 @@ def _prepare_filename(self, info_dict, tmpl_type='default'):
              # https://github.com/blackjack4494/youtube-dlc/issues/85
              trim_file_name = self.params.get('trim_file_name', False)
              if trim_file_name:
-                fn_groups = filename.rsplit('.')
-                ext = fn_groups[-1]
-                sub_ext = ''
-                if len(fn_groups) > 2:
-                    sub_ext = fn_groups[-2]
-                filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
+                no_ext, *ext = filename.rsplit('.', 2)
+                filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
  
              return filename
          except ValueError as err:
@@ -1296,8 +1320,9 @@ def extract_info(self, url, download=True, ie_key=None, extra_info=None,
  
              temp_id = ie.get_temp_id(url)
              if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
-                self.to_screen("[%s] %s: has already been recorded in archive" % (
-                               ie_key, temp_id))
+                self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
+                if self.params.get('break_on_existing', False):
+                    raise ExistingVideoReached()
                  break
              return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
          else:
@@ -1317,11 +1342,14 @@ def wrapper(self, *args, **kwargs):
                  self.report_error(msg)
              except ExtractorError as e:  # An error we somewhat expected
                  self.report_error(compat_str(e), e.format_traceback())
-            except ThrottledDownload:
-                self.to_stderr('\r')
-                self.report_warning('The download speed is below throttle limit. Re-extracting data')
+            except ReExtractInfo as e:
+                if e.expected:
+                    self.to_screen(f'{e}; Re-extracting data')
+                else:
+                    self.to_stderr('\r')
+                    self.report_warning(f'{e}; Re-extracting data')
                  return wrapper(self, *args, **kwargs)
-            except (DownloadCancelled, LazyList.IndexError):
+            except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
                  raise
              except Exception as e:
                  if self.params.get('ignoreerrors'):
@@ -1330,6 +1358,47 @@ def wrapper(self, *args, **kwargs):
                      raise
          return wrapper
  
+    def _wait_for_video(self, ie_result):
+        if (not self.params.get('wait_for_video')
+                or ie_result.get('_type', 'video') != 'video'
+                or ie_result.get('formats') or ie_result.get('url')):
+            return
+
+        format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
+        last_msg = ''
+
+        def progress(msg):
+            nonlocal last_msg
+            self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
+            last_msg = msg
+
+        min_wait, max_wait = self.params.get('wait_for_video')
+        diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
+        if diff is None and ie_result.get('live_status') == 'is_upcoming':
+            diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
+            self.report_warning('Release time of video is not known')
+        elif (diff or 0) <= 0:
+            self.report_warning('Video should already be available according to extracted info')
+        diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
+        self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
+
+        wait_till = time.time() + diff
+        try:
+            while True:
+                diff = wait_till - time.time()
+                if diff <= 0:
+                    progress('')
+                    raise ReExtractInfo('[wait] Wait period ended', expected=True)
+                progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
+                time.sleep(1)
+        except KeyboardInterrupt:
+            progress('')
+            raise ReExtractInfo('[wait] Interrupted by user', expected=True)
+        except BaseException as e:
+            if not isinstance(e, ReExtractInfo):
+                self.to_screen('')
+            raise
+
      @__handle_extraction_exceptions
      def __extract_info(self, url, ie, download, extra_info, process):
          ie_result = ie.extract(url)
@@ -1345,6 +1414,7 @@ def __extract_info(self, url, ie, download, extra_info, process):
              ie_result.setdefault('original_url', extra_info['original_url'])
          self.add_default_extra_info(ie_result, ie, url)
          if process:
+            self._wait_for_video(ie_result)
              return self.process_ie_result(ie_result, download, extra_info)
          else:
              return ie_result
@@ -1355,6 +1425,7 @@ def add_default_extra_info(self, ie_result, ie, url):
                  'webpage_url': url,
                  'original_url': url,
                  'webpage_url_basename': url_basename(url),
+                'webpage_url_domain': get_domain(url),
              })
          if ie is not None:
              self.add_extra_info(ie_result, {
@@ -1388,6 +1459,7 @@ def process_ie_result(self, ie_result, download=True, extra_info=None):
                      info_copy['id'] = ie.get_temp_id(ie_result['url'])
                  self.add_default_extra_info(info_copy, ie, ie_result['url'])
                  self.add_extra_info(info_copy, extra_info)
+                info_copy, _ = self.pre_process(info_copy)
                  self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
                  if self.params.get('force_write_download_archive', False):
                      self.record_download_archive(info_copy)
@@ -1478,6 +1550,7 @@ def _fixup(r):
                      'extractor': ie_result['extractor'],
                      'webpage_url': ie_result['webpage_url'],
                      'webpage_url_basename': url_basename(ie_result['webpage_url']),
+                    'webpage_url_domain': get_domain(ie_result['webpage_url']),
                      'extractor_key': ie_result['extractor_key'],
                  })
                  return r
@@ -1498,12 +1571,14 @@ def __process_playlist(self, ie_result, download):
          self.to_screen('[download] Downloading playlist: %s' % playlist)
  
          if 'entries' not in ie_result:
-            raise EntryNotInPlaylist()
+            raise EntryNotInPlaylist('There are no entries')
+
+        MissingEntry = object()
          incomplete_entries = bool(ie_result.get('requested_entries'))
          if incomplete_entries:
-            def fill_missing_entries(entries, indexes):
-                ret = [None] * max(*indexes)
-                for i, entry in zip(indexes, entries):
+            def fill_missing_entries(entries, indices):
+                ret = [MissingEntry] * max(indices)
+                for i, entry in zip(indices, entries):
                      ret[i - 1] = entry
                  return ret
              ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
@@ -1538,7 +1613,7 @@ def iter_playlistitems(format):
              def get_entry(i):
                  return ie_entries[i - 1]
          else:
-            if not isinstance(ie_entries, PagedList):
+            if not isinstance(ie_entries, (PagedList, LazyList)):
                  ie_entries = LazyList(ie_entries)
  
              def get_entry(i):
@@ -1556,11 +1631,11 @@ def get_entry(i):
              entry = None
              try:
                  entry = get_entry(i)
-                if entry is None:
+                if entry is MissingEntry:
                      raise EntryNotInPlaylist()
              except (IndexError, EntryNotInPlaylist):
                  if incomplete_entries:
-                    raise EntryNotInPlaylist()
+                    raise EntryNotInPlaylist(f'Entry {i} cannot be found')
                  elif not playlistitems:
                      break
              entries.append(entry)
@@ -1578,10 +1653,11 @@ def get_entry(i):
              if entry is not None]
          n_entries = len(entries)
  
-        if not playlistitems and (playliststart or playlistend):
+        if not playlistitems and (playliststart != 1 or playlistend):
              playlistitems = list(range(playliststart, playliststart + n_entries))
          ie_result['requested_entries'] = playlistitems
  
+        _infojson_written = False
          if not self.params.get('simulate') and self.params.get('allow_playlist_files', True):
              ie_copy = {
                  'playlist': playlist,
@@ -1594,8 +1670,9 @@ def get_entry(i):
              }
              ie_copy.update(dict(ie_result))
  
-            if self._write_info_json('playlist', ie_result,
-                                     self.prepare_filename(ie_copy, 'pl_infojson')) is None:
+            _infojson_written = self._write_info_json(
+                'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
+            if _infojson_written is None:
                  return
              if self._write_description('playlist', ie_result,
                                         self.prepare_filename(ie_copy, 'pl_description')) is None:
@@ -1635,6 +1712,7 @@ def get_entry(i):
                  'extractor': ie_result['extractor'],
                  'webpage_url': ie_result['webpage_url'],
                  'webpage_url_basename': url_basename(ie_result['webpage_url']),
+                'webpage_url_domain': get_domain(ie_result['webpage_url']),
                  'extractor_key': ie_result['extractor_key'],
              }
  
@@ -1648,9 +1726,14 @@ def get_entry(i):
                  self.report_error(
                      'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
                  break
-            # TODO: skip failed (empty) entries?
              playlist_results.append(entry_result)
          ie_result['entries'] = playlist_results
+
+        # Write the updated info to json
+        if _infojson_written and self._write_info_json(
+                'updated playlist', ie_result,
+                self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
+            return
          self.to_screen('[download] Finished downloading playlist: %s' % playlist)
          return ie_result
  
@@ -1723,9 +1806,10 @@ def _filter(f):
      def _check_formats(self, formats):
          for f in formats:
              self.to_screen('[info] Testing format %s' % f['format_id'])
-            temp_file = tempfile.NamedTemporaryFile(
-                suffix='.tmp', delete=False,
-                dir=self.get_output_path('temp') or None)
+            path = self.get_output_path('temp')
+            if not self._ensure_dir_exists(f'{path}/'):
+                continue
+            temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
              temp_file.close()
              try:
                  success, _ = self.dl(temp_file.name, f, test=True)
@@ -1926,9 +2010,9 @@ def _merge(formats_pair):
                  'format_id': '+'.join(filtered('format_id')),
                  'ext': output_ext,
                  'protocol': '+'.join(map(determine_protocol, formats_info)),
-                'language': '+'.join(orderedSet(filtered('language'))),
-                'format_note': '+'.join(orderedSet(filtered('format_note'))),
-                'filesize_approx': sum(filtered('filesize', 'filesize_approx')),
+                'language': '+'.join(orderedSet(filtered('language'))) or None,
+                'format_note': '+'.join(orderedSet(filtered('format_note'))) or None,
+                'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None,
                  'tbr': sum(filtered('tbr', 'vbr', 'abr')),
              }
  
@@ -1995,7 +2079,7 @@ def selector_function(ctx):
                  # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
                  if format_spec == 'all':
                      def selector_function(ctx):
-                        yield from _check_formats(ctx['formats'])
+                        yield from _check_formats(ctx['formats'][::-1])
                  elif format_spec == 'mergeall':
                      def selector_function(ctx):
                          formats = list(_check_formats(ctx['formats']))
@@ -2150,7 +2234,7 @@ def check_thumbnails(thumbnails):
              t['url'] = sanitize_url(t['url'])
  
          if self.params.get('check_formats') is True:
-            info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse()
+            info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True)
          else:
              info_dict['thumbnails'] = thumbnails
  
@@ -2345,7 +2429,7 @@ def is_wellformed(f):
          # TODO Central sorting goes here
  
          if self.params.get('check_formats') is True:
-            formats = LazyList(self._check_formats(formats[::-1])).reverse()
+            formats = LazyList(self._check_formats(formats[::-1]), reverse=True)
  
          if not formats or formats[0] is not info_dict:
              # only set the 'formats' fields if the original info_dict list them
@@ -2356,6 +2440,9 @@ def is_wellformed(f):
  
          info_dict, _ = self.pre_process(info_dict)
  
+        # The pre-processors may have modified the formats
+        formats = info_dict.get('formats', [info_dict])
+
          if self.params.get('list_thumbnails'):
              self.list_thumbnails(info_dict)
          if self.params.get('listformats'):
@@ -2637,6 +2724,8 @@ def process_info(self, info_dict):
          infofn = self.prepare_filename(info_dict, 'infojson')
          _infojson_written = self._write_info_json('video', info_dict, infofn)
          if _infojson_written:
+            info_dict['infojson_filename'] = infofn
+            # For backward compatability, even though it was a private field
              info_dict['__infojson_filename'] = infofn
          elif _infojson_written is None:
              return
@@ -2669,6 +2758,8 @@ def _write_link_file(link_type):
                  self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
                  return False
              linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
+            if not self._ensure_dir_exists(encodeFilename(linkfn)):
+                return False
              if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
                  self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present')
                  return True
@@ -2910,9 +3001,10 @@ def ffmpeg_fixup(cndn, msg, cls):
                      downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
                      downloader = downloader.__name__ if downloader else None
                      ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
-                                 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
-                    ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
-                    ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
+                                 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
+                                 FFmpegFixupM3u8PP)
+                    ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
+                    ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
  
                  fixup()
                  try:
@@ -2934,8 +3026,29 @@ def ffmpeg_fixup(cndn, msg, cls):
          if max_downloads is not None and self._num_downloads >= int(max_downloads):
              raise MaxDownloadsReached()
  
+    def __download_wrapper(self, func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            try:
+                res = func(*args, **kwargs)
+            except UnavailableVideoError as e:
+                self.report_error(e)
+            except MaxDownloadsReached as e:
+                self.to_screen(f'[info] {e}')
+                raise
+            except DownloadCancelled as e:
+                self.to_screen(f'[info] {e}')
+                if not self.params.get('break_per_url'):
+                    raise
+            else:
+                if self.params.get('dump_single_json', False):
+                    self.post_extract(res)
+                    self.to_stdout(json.dumps(self.sanitize_info(res)))
+        return wrapper
+
      def download(self, url_list):
          """Download a given list of URLs."""
+        url_list = variadic(url_list)  # Passing a single URL is a common mistake
          outtmpl = self.outtmpl_dict['default']
          if (len(url_list) > 1
                  and outtmpl != '-'
@@ -2944,19 +3057,8 @@ def download(self, url_list):
              raise SameFileError(outtmpl)
  
          for url in url_list:
-            try:
-                # It also downloads the videos
-                res = self.extract_info(
-                    url, force_generic_extractor=self.params.get('force_generic_extractor', False))
-            except UnavailableVideoError:
-                self.report_error('unable to download video')
-            except DownloadCancelled as e:
-                self.to_screen(f'[info] {e.msg}')
-                raise
-            else:
-                if self.params.get('dump_single_json', False):
-                    self.post_extract(res)
-                    self.to_stdout(json.dumps(self.sanitize_info(res)))
+            self.__download_wrapper(self.extract_info)(
+                url, force_generic_extractor=self.params.get('force_generic_extractor', False))
  
          return self._download_retcode
  
@@ -2967,11 +3069,13 @@ def download_with_info_file(self, info_filename):
              # FileInput doesn't have a read method, we can't call json.load
              info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
          try:
-            self.process_ie_result(info, download=True)
-        except (DownloadError, EntryNotInPlaylist, ThrottledDownload):
+            self.__download_wrapper(self.process_ie_result)(info, download=True)
+        except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
+            if not isinstance(e, EntryNotInPlaylist):
+                self.to_stderr('\r')
              webpage_url = info.get('webpage_url')
              if webpage_url is not None:
-                self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
+                self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}')
                  return self.download([webpage_url])
              else:
                  raise
@@ -2984,11 +3088,11 @@ def sanitize_info(info_dict, remove_private_keys=False):
              return info_dict
          info_dict.setdefault('epoch', int(time.time()))
          remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
-        keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
+        keep_keys = ['_type']  # Always keep this to facilitate load-info-json
          if remove_private_keys:
              remove_keys |= {
-                'requested_formats', 'requested_subtitles', 'requested_entries',
-                'filepath', 'entries', 'original_url', 'playlist_autonumber',
+                'requested_formats', 'requested_subtitles', 'requested_entries', 'entries',
+                'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber',
              }
              empty_values = (None, {}, [], set(), tuple())
              reject = lambda k, v: k not in keep_keys and (
@@ -3118,35 +3222,34 @@ def record_download_archive(self, info_dict):
  
      @staticmethod
      def format_resolution(format, default='unknown'):
-        is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none'
          if format.get('vcodec') == 'none' and format.get('acodec') != 'none':
              return 'audio only'
          if format.get('resolution') is not None:
              return format['resolution']
          if format.get('width') and format.get('height'):
-            res = '%dx%d' % (format['width'], format['height'])
+            return '%dx%d' % (format['width'], format['height'])
          elif format.get('height'):
-            res = '%sp' % format['height']
+            return '%sp' % format['height']
          elif format.get('width'):
-            res = '%dx?' % format['width']
-        elif is_images:
-            return 'images'
-        else:
-            return default
-        return f'{res} images' if is_images else res
+            return '%dx?' % format['width']
+        return default
  
      def _format_note(self, fdict):
          res = ''
          if fdict.get('ext') in ['f4f', 'f4m']:
-            res += '(unsupported) '
+            res += '(unsupported)'
          if fdict.get('language'):
              if res:
                  res += ' '
-            res += '[%s] ' % fdict['language']
+            res += '[%s]' % fdict['language']
          if fdict.get('format_note') is not None:
-            res += fdict['format_note'] + ' '
+            if res:
+                res += ' '
+            res += fdict['format_note']
          if fdict.get('tbr') is not None:
-            res += '%4dk ' % fdict['tbr']
+            if res:
+                res += ', '
+            res += '%4dk' % fdict['tbr']
          if fdict.get('container') is not None:
              if res:
                  res += ', '
@@ -3200,37 +3303,42 @@ def list_formats(self, info_dict):
          formats = info_dict.get('formats', [info_dict])
          new_format = self.params.get('listformats_table', True) is not False
          if new_format:
-            tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats))
-            vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats))
-            abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats))
              delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True)
              table = [
                  [
                      self._format_screen(format_field(f, 'format_id'), self.Styles.ID),
                      format_field(f, 'ext'),
-                    self.format_resolution(f),
-                    format_field(f, 'fps', '%d'),
+                    format_field(f, func=self.format_resolution, ignore=('audio only', 'images')),
+                    format_field(f, 'fps', '\t%d'),
                      format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''),
                      delim,
-                    format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
-                    format_field(f, 'tbr', f'%{tbr_digits}dk'),
-                    shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
+                    format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes),
+                    format_field(f, 'tbr', '\t%dk'),
+                    shorten_protocol_name(f.get('protocol', '').replace('native', 'n')),
                      delim,
-                    format_field(f, 'vcodec', default='unknown').replace('none', ''),
-                    format_field(f, 'vbr', f'%{vbr_digits}dk'),
-                    format_field(f, 'acodec', default='unknown').replace('none', ''),
-                    format_field(f, 'abr', f'%{abr_digits}dk'),
-                    format_field(f, 'asr', '%5dHz'),
-                    ', '.join(filter(None, (
-                        self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else '',
+                    format_field(f, 'vcodec', default='unknown').replace(
+                        'none',
+                        'images' if f.get('acodec') == 'none'
+                        else self._format_screen('audio only', self.Styles.SUPPRESS)),
+                    format_field(f, 'vbr', '\t%dk'),
+                    format_field(f, 'acodec', default='unknown').replace(
+                        'none',
+                        '' if f.get('vcodec') == 'none'
+                        else self._format_screen('video only', self.Styles.SUPPRESS)),
+                    format_field(f, 'abr', '\t%dk'),
+                    format_field(f, 'asr', '\t%dHz'),
+                    join_nonempty(
+                        self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
                          format_field(f, 'language', '[%s]'),
-                        format_field(f, 'format_note'),
-                        format_field(f, 'container', ignore=(None, f.get('ext'))),
-                    ))),
+                        join_nonempty(
+                            format_field(f, 'format_note'),
+                            format_field(f, 'container', ignore=(None, f.get('ext'))),
+                            delim=', '),
+                        delim=' '),
                  ] for f in formats if f.get('preference') is None or f['preference'] >= -1000]
              header_line = self._list_format_headers(
-                'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', '  TBR', 'PROTO',
-                delim, 'VCODEC', '  VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO')
+                'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO',
+                delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO')
          else:
              table = [
                  [
@@ -3246,8 +3354,8 @@ def list_formats(self, info_dict):
              '[info] Available formats for %s:' % info_dict['id'])
          self.to_stdout(render_table(
              header_line, table,
-            extraGap=(0 if new_format else 1),
-            hideEmpty=new_format,
+            extra_gap=(0 if new_format else 1),
+            hide_empty=new_format,
              delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True)))
  
      def list_thumbnails(self, info_dict):
@@ -3278,7 +3386,7 @@ def _row(lang, formats):
          self.to_stdout(render_table(
              self._list_format_headers('Language', 'Name', 'Formats'),
              [_row(lang, formats) for lang, formats in subtitles.items()],
-            hideEmpty=True))
+            hide_empty=True))
  
      def urlopen(self, req):
          """ Start an HTTP download """
@@ -3293,7 +3401,8 @@ def print_debug_header(self):
          def get_encoding(stream):
              ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
              if not supports_terminal_sequences(stream):
-                ret += ' (No ANSI)'
+                from .compat import WINDOWS_VT_MODE
+                ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
              return ret
  
          encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
@@ -3311,7 +3420,11 @@ def get_encoding(stream):
              write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
  
          source = detect_variant()
-        write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
+        write_debug(join_nonempty(
+            'yt-dlp version', __version__,
+            f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
+            '' if source == 'unknown' else f'({source})',
+            delim=' '))
          if not _LAZY_LOADER:
              if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
                  write_debug('Lazy loading extractors is forcibly disabled')
@@ -3323,20 +3436,22 @@ def get_encoding(stream):
                  for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
          if self.params.get('compat_opts'):
              write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
-        try:
-            sp = Popen(
-                ['git', 'rev-parse', '--short', 'HEAD'],
-                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                cwd=os.path.dirname(os.path.abspath(__file__)))
-            out, err = sp.communicate_or_kill()
-            out = out.decode().strip()
-            if re.match('[0-9a-f]+', out):
-                write_debug('Git HEAD: %s' % out)
-        except Exception:
+
+        if source == 'source':
              try:
-                sys.exc_clear()
+                sp = Popen(
+                    ['git', 'rev-parse', '--short', 'HEAD'],
+                    stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                    cwd=os.path.dirname(os.path.abspath(__file__)))
+                out, err = sp.communicate_or_kill()
+                out = out.decode().strip()
+                if re.match('[0-9a-f]+', out):
+                    write_debug('Git HEAD: %s' % out)
              except Exception:
-                pass
+                try:
+                    sys.exc_clear()
+                except Exception:
+                    pass
  
          def python_implementation():
              impl_name = platform.python_implementation()
@@ -3350,7 +3465,11 @@ def python_implementation():
              platform.architecture()[0],
              platform_name()))
  
-        exe_versions = FFmpegPostProcessor.get_versions(self)
+        exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self)
+        ffmpeg_features = {key for key, val in ffmpeg_features.items() if val}
+        if ffmpeg_features:
+            exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features)
+
          exe_versions['rtmpdump'] = rtmpdump_version()
          exe_versions['phantomjs'] = PhantomJSwrapper._version()
          exe_str = ', '.join(
@@ -3362,13 +3481,13 @@ def python_implementation():
          from .postprocessor.embedthumbnail import has_mutagen
          from .cookies import SQLITE_AVAILABLE, KEYRING_AVAILABLE
  
-        lib_str = ', '.join(sorted(filter(None, (
+        lib_str = join_nonempty(
              compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
-            has_websockets and 'websockets',
+            KEYRING_AVAILABLE and 'keyring',
              has_mutagen and 'mutagen',
              SQLITE_AVAILABLE and 'sqlite',
-            KEYRING_AVAILABLE and 'keyring',
-        )))) or 'none'
+            has_websockets and 'websockets',
+            delim=', ') or 'none'
          write_debug('Optional libraries: %s' % lib_str)
  
          proxy_map = {}
@@ -3453,8 +3572,10 @@ def get_encoding(self):
              encoding = preferredencoding()
          return encoding
  
-    def _write_info_json(self, label, ie_result, infofn):
+    def _write_info_json(self, label, ie_result, infofn, overwrite=None):
          ''' Write infojson and returns True = written, False = skip, None = error '''
+        if overwrite is None:
+            overwrite = self.params.get('overwrites', True)
          if not self.params.get('writeinfojson'):
              return False
          elif not infofn:
@@ -3462,7 +3583,7 @@ def _write_info_json(self, label, ie_result, infofn):
              return False
          elif not self._ensure_dir_exists(infofn):
              return None
-        elif not self.params.get('overwrites', True) and os.path.exists(infofn):
+        elif not overwrite and os.path.exists(infofn):
              self.to_screen(f'[info] {label.title()} metadata is already present')
          else:
              self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
@@ -3561,14 +3682,15 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
  
          for t in thumbnails[::-1]:
              thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
-            thumb_display_id = f'{label} thumbnail' + (f' {t["id"]}' if multiple else '')
+            thumb_display_id = f'{label} thumbnail {t["id"]}'
              thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
              thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext'))
  
              if not self.params.get('overwrites', True) and os.path.exists(thumb_filename):
                  ret.append((thumb_filename, thumb_filename_final))
                  t['filepath'] = thumb_filename
-                self.to_screen(f'[info] {thumb_display_id.title()} is already present')
+                self.to_screen('[info] %s is already present' % (
+                    thumb_display_id if multiple else f'{label} thumbnail').capitalize())
              else:
                  self.to_screen(f'[info] Downloading {thumb_display_id} ...')
                  try: