[outtmpl] Add operator `&` for replacement text (#2012)

[yt-dlp.git] / yt_dlp / YoutubeDL.py
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index b983b17752687031c6ab3c288f872c6a76f1a5d3..ec69151d77b2cbbb76175abdcfd19e41d0dbe59e 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -70,6 +70,7 @@
      format_field,
      formatSeconds,
      GeoRestrictedError,
+    get_domain,
      HEADRequest,
      int_or_none,
      iri_to_uri,
@@ -93,8 +94,10 @@
      PostProcessingError,
      preferredencoding,
      prepend_extension,
+    ReExtractInfo,
      register_socks_protocols,
      RejectedVideoReached,
+    remove_terminal_sequences,
      render_table,
      replace_extension,
      SameFileError,
@@ -109,7 +112,7 @@
      strftime_or_none,
      subtitles_filename,
      supports_terminal_sequences,
-    ThrottledDownload,
+    timetuple_from_msec,
      to_high_limit_path,
      traverse_obj,
      try_get,
@@ -152,7 +155,7 @@
      _PLUGIN_CLASSES as plugin_postprocessors
  )
  from .update import detect_variant
-from .version import __version__
+from .version import __version__, RELEASE_GIT_HEAD
  
  if compat_os_name == 'nt':
      import ctypes
@@ -310,6 +313,8 @@ class YoutubeDL(object):
                         file that is in the archive.
      break_on_reject:   Stop the download process when encountering a video that
                         has been filtered out.
+    break_per_url:     Whether break_on_reject and break_on_existing
+                       should act on each input URL as opposed to for the entire queue
      cookiefile:        File name where cookies should be read from and dumped to
      cookiesfrombrowser: A tuple containing the name of the browser and the profile
                         name/path from where cookies are loaded.
@@ -324,13 +329,16 @@ class YoutubeDL(object):
      bidi_workaround:   Work around buggy terminals without bidirectional text
                         support, using fridibi
      debug_printtraffic:Print out sent and received HTTP traffic
-    include_ads:       Download ads as well
+    include_ads:       Download ads as well (deprecated)
      default_search:    Prepend this string if an input url is not valid.
                         'auto' for elaborate guessing
      encoding:          Use this encoding instead of the system-specified.
      extract_flat:      Do not resolve URLs, return the immediate result.
                         Pass in 'in_playlist' to only show this behavior for
                         playlist items.
+    wait_for_video:    If given, wait for scheduled streams to become available.
+                       The value should be a tuple containing the range
+                       (min_secs, max_secs) to wait between retries
      postprocessors:    A list of dictionaries, each with an entry
                         * key:  The name of the postprocessor. See
                                 yt_dlp/postprocessor/__init__.py for a list.
@@ -560,6 +568,8 @@ def check_deprecated(param, option, suggestion):
  
          for msg in self.params.get('_warnings', []):
              self.report_warning(msg)
+        for msg in self.params.get('_deprecation_warnings', []):
+            self.deprecation_warning(msg)
  
          if 'list-formats' in self.params.get('compat_opts', []):
              self.params['listformats_table'] = False
@@ -625,13 +635,6 @@ def check_deprecated(param, option, suggestion):
                  self.print_debug_header()
              self.add_default_info_extractors()
  
-        for pp_def_raw in self.params.get('postprocessors', []):
-            pp_def = dict(pp_def_raw)
-            when = pp_def.pop('when', 'post_process')
-            pp_class = get_postprocessor(pp_def.pop('key'))
-            pp = pp_class(self, **compat_kwargs(pp_def))
-            self.add_post_processor(pp, when=when)
-
          hooks = {
              'post_hooks': self.add_post_hook,
              'progress_hooks': self.add_progress_hook,
@@ -641,6 +644,13 @@ def check_deprecated(param, option, suggestion):
              for ph in self.params.get(opt, []):
                  fn(ph)
  
+        for pp_def_raw in self.params.get('postprocessors', []):
+            pp_def = dict(pp_def_raw)
+            when = pp_def.pop('when', 'post_process')
+            self.add_post_processor(
+                get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)),
+                when=when)
+
          register_socks_protocols()
  
          def preload_download_archive(fn):
@@ -727,6 +737,9 @@ def add_progress_hook(self, ph):
      def add_postprocessor_hook(self, ph):
          """Add the postprocessing progress hook"""
          self._postprocessor_hooks.append(ph)
+        for pps in self._pps.values():
+            for pp in pps:
+                pp.add_progress_hook(ph)
  
      def _bidi_workaround(self, message):
          if not hasattr(self, '_output_channel'):
@@ -768,6 +781,7 @@ def to_stderr(self, message, only_once=False):
      def to_console_title(self, message):
          if not self.params.get('consoletitle', False):
              return
+        message = remove_terminal_sequences(message)
          if compat_os_name == 'nt':
              if ctypes.windll.kernel32.GetConsoleWindow():
                  # c_wchar_p() might not be necessary if `message` is
@@ -842,31 +856,31 @@ def to_screen(self, message, skip_eol=False):
  
      class Styles(Enum):
          HEADERS = 'yellow'
-        EMPHASIS = 'blue'
+        EMPHASIS = 'light blue'
          ID = 'green'
          DELIM = 'blue'
          ERROR = 'red'
          WARNING = 'yellow'
          SUPPRESS = 'light black'
  
-    def __format_text(self, out, text, f, fallback=None, *, test_encoding=False):
-        assert out in ('screen', 'err')
+    def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
          if test_encoding:
              original_text = text
-            handle = self._screen_file if out == 'screen' else self._err_file
              encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
              text = text.encode(encoding, 'ignore').decode(encoding)
              if fallback is not None and text != original_text:
                  text = fallback
          if isinstance(f, self.Styles):
-            f = f._value_
-        return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback
+            f = f.value
+        return format_text(text, f) if allow_colors else text if fallback is None else fallback
  
      def _format_screen(self, *args, **kwargs):
-        return self.__format_text('screen', *args, **kwargs)
+        return self._format_text(
+            self._screen_file, self._allow_colors['screen'], *args, **kwargs)
  
      def _format_err(self, *args, **kwargs):
-        return self.__format_text('err', *args, **kwargs)
+        return self._format_text(
+            self._err_file, self._allow_colors['err'], *args, **kwargs)
  
      def report_warning(self, message, only_once=False):
          '''
@@ -880,6 +894,12 @@ def report_warning(self, message, only_once=False):
                  return
              self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once)
  
+    def deprecation_warning(self, message):
+        if self.params.get('logger') is not None:
+            self.params['logger'].warning('DeprecationWarning: {message}')
+        else:
+            self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True)
+
      def report_error(self, message, tb=None):
          '''
          Do the same as trouble, but prefixes the message with 'ERROR:', colored
@@ -1035,7 +1055,8 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
              (?P<fields>{field})
              (?P<maths>(?:{math_op}{math_field})*)
              (?:>(?P<strf_format>.+?))?
-            (?P<alternate>(?<!\\),[^|)]+)?
+            (?P<alternate>(?<!\\),[^|&)]+)?
+            (?:&(?P<replacement>.*?))?
              (?:\|(?P<default>.*?))?
              $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
  
@@ -1094,11 +1115,12 @@ def create_key(outer_mobj):
              key = outer_mobj.group('key')
              mobj = re.match(INTERNAL_FORMAT_RE, key)
              initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
-            value, default = None, na
+            value, replacement, default = None, None, na
              while mobj:
                  mobj = mobj.groupdict()
                  default = mobj['default'] if mobj['default'] is not None else default
                  value = get_value(mobj)
+                replacement = mobj['replacement']
                  if value is None and mobj['alternate']:
                      mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
                  else:
@@ -1108,7 +1130,7 @@ def create_key(outer_mobj):
              if fmt == 's' and value is not None and key in field_size_compat_map.keys():
                  fmt = '0{:d}d'.format(field_size_compat_map[key])
  
-            value = default if value is None else value
+            value = default if value is None else value if replacement is None else replacement
  
              flags = outer_mobj.group('conversion') or ''
              str_fmt = f'{fmt[:-1]}s'
@@ -1172,12 +1194,8 @@ def _prepare_filename(self, info_dict, tmpl_type='default'):
              # https://github.com/blackjack4494/youtube-dlc/issues/85
              trim_file_name = self.params.get('trim_file_name', False)
              if trim_file_name:
-                fn_groups = filename.rsplit('.')
-                ext = fn_groups[-1]
-                sub_ext = ''
-                if len(fn_groups) > 2:
-                    sub_ext = fn_groups[-2]
-                filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.')
+                no_ext, *ext = filename.rsplit('.', 2)
+                filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.')
  
              return filename
          except ValueError as err:
@@ -1304,8 +1322,9 @@ def extract_info(self, url, download=True, ie_key=None, extra_info=None,
  
              temp_id = ie.get_temp_id(url)
              if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
-                self.to_screen("[%s] %s: has already been recorded in archive" % (
-                               ie_key, temp_id))
+                self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive')
+                if self.params.get('break_on_existing', False):
+                    raise ExistingVideoReached()
                  break
              return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process)
          else:
@@ -1325,9 +1344,12 @@ def wrapper(self, *args, **kwargs):
                  self.report_error(msg)
              except ExtractorError as e:  # An error we somewhat expected
                  self.report_error(compat_str(e), e.format_traceback())
-            except ThrottledDownload as e:
-                self.to_stderr('\r')
-                self.report_warning(f'{e}; Re-extracting data')
+            except ReExtractInfo as e:
+                if e.expected:
+                    self.to_screen(f'{e}; Re-extracting data')
+                else:
+                    self.to_stderr('\r')
+                    self.report_warning(f'{e}; Re-extracting data')
                  return wrapper(self, *args, **kwargs)
              except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
                  raise
@@ -1338,6 +1360,47 @@ def wrapper(self, *args, **kwargs):
                      raise
          return wrapper
  
+    def _wait_for_video(self, ie_result):
+        if (not self.params.get('wait_for_video')
+                or ie_result.get('_type', 'video') != 'video'
+                or ie_result.get('formats') or ie_result.get('url')):
+            return
+
+        format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1]
+        last_msg = ''
+
+        def progress(msg):
+            nonlocal last_msg
+            self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True)
+            last_msg = msg
+
+        min_wait, max_wait = self.params.get('wait_for_video')
+        diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
+        if diff is None and ie_result.get('live_status') == 'is_upcoming':
+            diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
+            self.report_warning('Release time of video is not known')
+        elif (diff or 0) <= 0:
+            self.report_warning('Video should already be available according to extracted info')
+        diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
+        self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
+
+        wait_till = time.time() + diff
+        try:
+            while True:
+                diff = wait_till - time.time()
+                if diff <= 0:
+                    progress('')
+                    raise ReExtractInfo('[wait] Wait period ended', expected=True)
+                progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}')
+                time.sleep(1)
+        except KeyboardInterrupt:
+            progress('')
+            raise ReExtractInfo('[wait] Interrupted by user', expected=True)
+        except BaseException as e:
+            if not isinstance(e, ReExtractInfo):
+                self.to_screen('')
+            raise
+
      @__handle_extraction_exceptions
      def __extract_info(self, url, ie, download, extra_info, process):
          ie_result = ie.extract(url)
@@ -1353,6 +1416,7 @@ def __extract_info(self, url, ie, download, extra_info, process):
              ie_result.setdefault('original_url', extra_info['original_url'])
          self.add_default_extra_info(ie_result, ie, url)
          if process:
+            self._wait_for_video(ie_result)
              return self.process_ie_result(ie_result, download, extra_info)
          else:
              return ie_result
@@ -1363,6 +1427,7 @@ def add_default_extra_info(self, ie_result, ie, url):
                  'webpage_url': url,
                  'original_url': url,
                  'webpage_url_basename': url_basename(url),
+                'webpage_url_domain': get_domain(url),
              })
          if ie is not None:
              self.add_extra_info(ie_result, {
@@ -1396,6 +1461,7 @@ def process_ie_result(self, ie_result, download=True, extra_info=None):
                      info_copy['id'] = ie.get_temp_id(ie_result['url'])
                  self.add_default_extra_info(info_copy, ie, ie_result['url'])
                  self.add_extra_info(info_copy, extra_info)
+                info_copy, _ = self.pre_process(info_copy)
                  self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
                  if self.params.get('force_write_download_archive', False):
                      self.record_download_archive(info_copy)
@@ -1486,6 +1552,7 @@ def _fixup(r):
                      'extractor': ie_result['extractor'],
                      'webpage_url': ie_result['webpage_url'],
                      'webpage_url_basename': url_basename(ie_result['webpage_url']),
+                    'webpage_url_domain': get_domain(ie_result['webpage_url']),
                      'extractor_key': ie_result['extractor_key'],
                  })
                  return r
@@ -1647,6 +1714,7 @@ def get_entry(i):
                  'extractor': ie_result['extractor'],
                  'webpage_url': ie_result['webpage_url'],
                  'webpage_url_basename': url_basename(ie_result['webpage_url']),
+                'webpage_url_domain': get_domain(ie_result['webpage_url']),
                  'extractor_key': ie_result['extractor_key'],
              }
  
@@ -1740,9 +1808,10 @@ def _filter(f):
      def _check_formats(self, formats):
          for f in formats:
              self.to_screen('[info] Testing format %s' % f['format_id'])
-            temp_file = tempfile.NamedTemporaryFile(
-                suffix='.tmp', delete=False,
-                dir=self.get_output_path('temp') or None)
+            path = self.get_output_path('temp')
+            if not self._ensure_dir_exists(f'{path}/'):
+                continue
+            temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None)
              temp_file.close()
              try:
                  success, _ = self.dl(temp_file.name, f, test=True)
@@ -2606,6 +2675,9 @@ def process_info(self, info_dict):
              if self._num_downloads >= int(max_downloads):
                  raise MaxDownloadsReached()
  
+        if info_dict.get('is_live'):
+            info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
+
          # TODO: backward compatibility, to be removed
          info_dict['fulltitle'] = info_dict['title']
  
@@ -2934,9 +3006,10 @@ def ffmpeg_fixup(cndn, msg, cls):
                      downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
                      downloader = downloader.__name__ if downloader else None
                      ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
-                                 'malformed AAC bitstream detected', FFmpegFixupM3u8PP)
-                    ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed timestamps detected', FFmpegFixupTimestampPP)
-                    ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'malformed duration detected', FFmpegFixupDurationPP)
+                                 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
+                                 FFmpegFixupM3u8PP)
+                    ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
+                    ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
  
                  fixup()
                  try:
@@ -2965,9 +3038,13 @@ def wrapper(*args, **kwargs):
                  res = func(*args, **kwargs)
              except UnavailableVideoError as e:
                  self.report_error(e)
-            except DownloadCancelled as e:
+            except MaxDownloadsReached as e:
                  self.to_screen(f'[info] {e}')
                  raise
+            except DownloadCancelled as e:
+                self.to_screen(f'[info] {e}')
+                if not self.params.get('break_per_url'):
+                    raise
              else:
                  if self.params.get('dump_single_json', False):
                      self.post_extract(res)
@@ -2998,7 +3075,7 @@ def download_with_info_file(self, info_filename):
              info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
          try:
              self.__download_wrapper(self.process_ie_result)(info, download=True)
-        except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e:
+        except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e:
              if not isinstance(e, EntryNotInPlaylist):
                  self.to_stderr('\r')
              webpage_url = info.get('webpage_url')
@@ -3165,15 +3242,19 @@ def format_resolution(format, default='unknown'):
      def _format_note(self, fdict):
          res = ''
          if fdict.get('ext') in ['f4f', 'f4m']:
-            res += '(unsupported) '
+            res += '(unsupported)'
          if fdict.get('language'):
              if res:
                  res += ' '
-            res += '[%s] ' % fdict['language']
+            res += '[%s]' % fdict['language']
          if fdict.get('format_note') is not None:
-            res += fdict['format_note'] + ' '
+            if res:
+                res += ' '
+            res += fdict['format_note']
          if fdict.get('tbr') is not None:
-            res += '%4dk ' % fdict['tbr']
+            if res:
+                res += ', '
+            res += '%4dk' % fdict['tbr']
          if fdict.get('container') is not None:
              if res:
                  res += ', '
@@ -3325,7 +3406,8 @@ def print_debug_header(self):
          def get_encoding(stream):
              ret = getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)
              if not supports_terminal_sequences(stream):
-                ret += ' (No ANSI)'
+                from .compat import WINDOWS_VT_MODE
+                ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)'
              return ret
  
          encoding_str = 'Encodings: locale %s, fs %s, out %s, err %s, pref %s' % (
@@ -3343,7 +3425,11 @@ def get_encoding(stream):
              write_debug = lambda msg: self._write_string(f'[debug] {msg}\n')
  
          source = detect_variant()
-        write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})'))
+        write_debug(join_nonempty(
+            'yt-dlp version', __version__,
+            f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
+            '' if source == 'unknown' else f'({source})',
+            delim=' '))
          if not _LAZY_LOADER:
              if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):
                  write_debug('Lazy loading extractors is forcibly disabled')
@@ -3355,20 +3441,22 @@ def get_encoding(stream):
                  for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())])
          if self.params.get('compat_opts'):
              write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts')))
-        try:
-            sp = Popen(
-                ['git', 'rev-parse', '--short', 'HEAD'],
-                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                cwd=os.path.dirname(os.path.abspath(__file__)))
-            out, err = sp.communicate_or_kill()
-            out = out.decode().strip()
-            if re.match('[0-9a-f]+', out):
-                write_debug('Git HEAD: %s' % out)
-        except Exception:
+
+        if source == 'source':
              try:
-                sys.exc_clear()
+                sp = Popen(
+                    ['git', 'rev-parse', '--short', 'HEAD'],
+                    stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                    cwd=os.path.dirname(os.path.abspath(__file__)))
+                out, err = sp.communicate_or_kill()
+                out = out.decode().strip()
+                if re.match('[0-9a-f]+', out):
+                    write_debug('Git HEAD: %s' % out)
              except Exception:
-                pass
+                try:
+                    sys.exc_clear()
+                except Exception:
+                    pass
  
          def python_implementation():
              impl_name = platform.python_implementation()