[outtmpl] Ensure ASCII in json and add option for Unicode

[yt-dlp.git] / yt_dlp / YoutubeDL.py
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index 0d0a2ebe0df804ac262aa0082fdc1105bacfd9e8..4e57dffa327501f43aba983a4acbf3bba52f3e4a 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -846,7 +846,7 @@ def to_stdout(self, message, skip_eol=False, quiet=None):
                                       'Use "YoutubeDL.to_screen" instead')
          self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out)
  
-    def to_screen(self, message, skip_eol=False, quiet=None):
+    def to_screen(self, message, skip_eol=False, quiet=None, only_once=False):
          """Print message to screen if not in quiet mode"""
          if self.params.get('logger'):
              self.params['logger'].debug(message)
@@ -855,7 +855,7 @@ def to_screen(self, message, skip_eol=False, quiet=None):
              return
          self._write_string(
              '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
-            self._out_files.screen)
+            self._out_files.screen, only_once=only_once)
  
      def to_stderr(self, message, only_once=False):
          """Print message to stderr"""
@@ -1249,7 +1249,7 @@ def create_key(outer_mobj):
              elif fmt[-1] == 'j':  # json
                  value, fmt = json.dumps(
                      value, default=_dumpjson_default,
-                    indent=4 if '#' in flags else None, ensure_ascii=False), str_fmt
+                    indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt
              elif fmt[-1] == 'h':  # html
                  value, fmt = escapeHTML(str(value)), str_fmt
              elif fmt[-1] == 'q':  # quoted
@@ -1621,6 +1621,7 @@ def process_ie_result(self, ie_result, download=True, extra_info=None):
                  self.add_default_extra_info(info_copy, ie, ie_result['url'])
                  self.add_extra_info(info_copy, extra_info)
                  info_copy, _ = self.pre_process(info_copy)
+                self._fill_common_fields(info_copy, False)
                  self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
                  self._raise_pending_errors(info_copy)
                  if self.params.get('force_write_download_archive', False):
@@ -2379,10 +2380,9 @@ def check_thumbnails(thumbnails):
          else:
              info_dict['thumbnails'] = thumbnails
  
-    def _fill_common_fields(self, info_dict, is_video=True):
+    def _fill_common_fields(self, info_dict, final=True):
          # TODO: move sanitization here
-        if is_video:
-            # playlists are allowed to lack "title"
+        if final:
              title = info_dict.get('title', NO_DEFAULT)
              if title is NO_DEFAULT:
                  raise ExtractorError('Missing "title" field in extractor result',
@@ -2426,11 +2426,13 @@ def _fill_common_fields(self, info_dict, is_video=True):
              for key in live_keys:
                  if info_dict.get(key) is None:
                      info_dict[key] = (live_status == key)
+        if live_status == 'post_live':
+            info_dict['was_live'] = True
  
          # Auto generate title fields corresponding to the *_number fields when missing
          # in order to always have clean titles. This is very common for TV series.
          for field in ('chapter', 'season', 'episode'):
-            if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+            if final and info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
                  info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
  
      def _raise_pending_errors(self, info):
@@ -2523,11 +2525,7 @@ def sanitize_numeric_fields(info):
          info_dict['requested_subtitles'] = self.process_subtitles(
              info_dict['id'], subtitles, automatic_captions)
  
-        if info_dict.get('formats') is None:
-            # There's only one format available
-            formats = [info_dict]
-        else:
-            formats = info_dict['formats']
+        formats = self._get_formats(info_dict)
  
          # or None ensures --clean-infojson removes it
          info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None
@@ -2642,7 +2640,7 @@ def is_wellformed(f):
          info_dict, _ = self.pre_process(info_dict, 'after_filter')
  
          # The pre-processors may have modified the formats
-        formats = info_dict.get('formats', [info_dict])
+        formats = self._get_formats(info_dict)
  
          list_only = self.params.get('simulate') is None and (
              self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles'))
@@ -2700,24 +2698,21 @@ def is_wellformed(f):
              # Process what we can, even without any available formats.
              formats_to_download = [{}]
  
-        requested_ranges = self.params.get('download_ranges')
-        if requested_ranges:
-            requested_ranges = tuple(requested_ranges(info_dict, self))
-
+        requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self))
          best_format, downloaded_formats = formats_to_download[-1], []
          if download:
-            if best_format:
+            if best_format and requested_ranges:
                  def to_screen(*msg):
                      self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}')
  
                  to_screen(f'Downloading {len(formats_to_download)} format(s):',
                            (f['format_id'] for f in formats_to_download))
-                if requested_ranges:
+                if requested_ranges != ({}, ):
                      to_screen(f'Downloading {len(requested_ranges)} time ranges:',
                                (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges))
              max_downloads_reached = False
  
-            for fmt, chapter in itertools.product(formats_to_download, requested_ranges or [{}]):
+            for fmt, chapter in itertools.product(formats_to_download, requested_ranges):
                  new_info = self._copy_infodict(info_dict)
                  new_info.update(fmt)
                  offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
@@ -3572,11 +3567,17 @@ def _format_note(self, fdict):
              res += '~' + format_bytes(fdict['filesize_approx'])
          return res
  
-    def render_formats_table(self, info_dict):
-        if not info_dict.get('formats') and not info_dict.get('url'):
-            return None
+    def _get_formats(self, info_dict):
+        if info_dict.get('formats') is None:
+            if info_dict.get('url') and info_dict.get('_type', 'video') == 'video':
+                return [info_dict]
+            return []
+        return info_dict['formats']
  
-        formats = info_dict.get('formats', [info_dict])
+    def render_formats_table(self, info_dict):
+        formats = self._get_formats(info_dict)
+        if not formats:
+            return
          if not self.params.get('listformats_table', True) is not False:
              table = [
                  [
@@ -3643,7 +3644,7 @@ def render_thumbnails_table(self, info_dict):
              return None
          return render_table(
              self._list_format_headers('ID', 'Width', 'Height', 'URL'),
-            [[t.get('id'), t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])
+            [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails])
  
      def render_subtitles_table(self, video_id, subtitles):
          def _row(lang, formats):
@@ -3686,6 +3687,8 @@ def print_debug_header(self):
          if not self.params.get('verbose'):
              return
  
+        from . import _IN_CLI  # Must be delayed import
+
          # These imports can be slow. So import them only as needed
          from .extractor.extractors import _LAZY_LOADER
          from .extractor.extractors import _PLUGIN_CLASSES as plugin_extractors
@@ -3722,6 +3725,7 @@ def get_encoding(stream):
              __version__,
              f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '',
              '' if source == 'unknown' else f'({source})',
+            '' if _IN_CLI else 'API',
              delim=' '))
          if not _LAZY_LOADER:
              if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'):