fix some typos and linter

[yt-dlp.git] / yt_dlp / YoutubeDL.py
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index e58f7a32f87902641dd20b08dd2639fff6cf7a2d..d5b1ba07c14beb1762457400b7b0a5e183146f7f 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -216,6 +216,7 @@ class YoutubeDL(object):
      logtostderr:       Log messages to stderr instead of stdout.
      writedescription:  Write the video description to a .description file
      writeinfojson:     Write the video description to a .info.json file
+    clean_infojson:    Remove private fields from the infojson
      writecomments:     Extract video comments. This will not be written to disk
                         unless writeinfojson is also given
      writeannotations:  Write the video annotations to a .annotations.xml file
@@ -1171,6 +1172,9 @@ def _fixup(r):
          else:
              raise Exception('Invalid result type: %s' % result_type)
  
+    def _ensure_dir_exists(self, path):
+        return make_dir(path, self.report_error)
+
      def __process_playlist(self, ie_result, download):
          # We process each entry in the playlist
          playlist = ie_result.get('title') or ie_result.get('id')
@@ -1187,28 +1191,24 @@ def __process_playlist(self, ie_result, download):
              }
              ie_copy.update(dict(ie_result))
  
-            def ensure_dir_exists(path):
-                return make_dir(path, self.report_error)
-
              if self.params.get('writeinfojson', False):
                  infofn = self.prepare_filename(ie_copy, 'pl_infojson')
-                if not ensure_dir_exists(encodeFilename(infofn)):
+                if not self._ensure_dir_exists(encodeFilename(infofn)):
                      return
                  if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
                      self.to_screen('[info] Playlist metadata is already present')
                  else:
                      playlist_info = dict(ie_result)
                      # playlist_info['entries'] = list(playlist_info['entries'])  # Entries is a generator which shouldnot be resolved here
-                    del playlist_info['entries']
                      self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
                      try:
-                        write_json_file(self.filter_requested_info(playlist_info), infofn)
+                        write_json_file(self.filter_requested_info(playlist_info, self.params.get('clean_infojson', True)), infofn)
                      except (OSError, IOError):
                          self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
  
              if self.params.get('writedescription', False):
                  descfn = self.prepare_filename(ie_copy, 'pl_description')
-                if not ensure_dir_exists(encodeFilename(descfn)):
+                if not self._ensure_dir_exists(encodeFilename(descfn)):
                      return
                  if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
                      self.to_screen('[info] Playlist description is already present')
@@ -1794,14 +1794,18 @@ def sanitize_numeric_fields(info):
          if 'display_id' not in info_dict and 'id' in info_dict:
              info_dict['display_id'] = info_dict['id']
  
-        if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
-            # Working around out-of-range timestamp values (e.g. negative ones on Windows,
-            # see http://bugs.python.org/issue1646728)
-            try:
-                upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
-                info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
-            except (ValueError, OverflowError, OSError):
-                pass
+        for ts_key, date_key in (
+                ('timestamp', 'upload_date'),
+                ('release_timestamp', 'release_date'),
+        ):
+            if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+                # see http://bugs.python.org/issue1646728)
+                try:
+                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+                    info_dict[date_key] = upload_date.strftime('%Y%m%d')
+                except (ValueError, OverflowError, OSError):
+                    pass
  
          # Auto generate title fields corresponding to the *_number fields when missing
          # in order to always have clean titles. This is very common for TV series.
@@ -2043,7 +2047,7 @@ def print_optional(field):
          print_mandatory('format')
          if self.params.get('forcejson', False):
              self.post_extract(info_dict)
-            self.to_stdout(json.dumps(info_dict))
+            self.to_stdout(json.dumps(info_dict, default=repr))
  
      def process_info(self, info_dict):
          """Process a single resolved IE result."""
@@ -2071,6 +2075,7 @@ def process_info(self, info_dict):
  
          info_dict = self.pre_process(info_dict)
  
+        # info_dict['_filename'] needs to be set for backward compatibility
          info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
          temp_filename = self.prepare_filename(info_dict, 'temp')
          files_to_move = {}
@@ -2089,17 +2094,14 @@ def process_info(self, info_dict):
          if full_filename is None:
              return
  
-        def ensure_dir_exists(path):
-            return make_dir(path, self.report_error)
-
-        if not ensure_dir_exists(encodeFilename(full_filename)):
+        if not self._ensure_dir_exists(encodeFilename(full_filename)):
              return
-        if not ensure_dir_exists(encodeFilename(temp_filename)):
+        if not self._ensure_dir_exists(encodeFilename(temp_filename)):
              return
  
          if self.params.get('writedescription', False):
              descfn = self.prepare_filename(info_dict, 'description')
-            if not ensure_dir_exists(encodeFilename(descfn)):
+            if not self._ensure_dir_exists(encodeFilename(descfn)):
                  return
              if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
                  self.to_screen('[info] Video description is already present')
@@ -2116,7 +2118,7 @@ def ensure_dir_exists(path):
  
          if self.params.get('writeannotations', False):
              annofn = self.prepare_filename(info_dict, 'annotation')
-            if not ensure_dir_exists(encodeFilename(annofn)):
+            if not self._ensure_dir_exists(encodeFilename(annofn)):
                  return
              if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
                  self.to_screen('[info] Video annotations are already present')
@@ -2158,6 +2160,7 @@ def dl(name, info, subtitle=False):
                  sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
                  if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
                      self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
+                    sub_info['filepath'] = sub_filename
                      files_to_move[sub_filename] = sub_filename_final
                  else:
                      self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
@@ -2167,13 +2170,15 @@ def dl(name, info, subtitle=False):
                              # See https://github.com/ytdl-org/youtube-dl/issues/10268
                              with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
                                  subfile.write(sub_info['data'])
+                            sub_info['filepath'] = sub_filename
                              files_to_move[sub_filename] = sub_filename_final
                          except (OSError, IOError):
                              self.report_error('Cannot write subtitles file ' + sub_filename)
                              return
                      else:
                          try:
-                            dl(sub_filename, sub_info, subtitle=True)
+                            dl(sub_filename, sub_info.copy(), subtitle=True)
+                            sub_info['filepath'] = sub_filename
                              files_to_move[sub_filename] = sub_filename_final
                          except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                              self.report_warning('Unable to download subtitle for "%s": %s' %
@@ -2204,14 +2209,14 @@ def dl(name, info, subtitle=False):
  
          if self.params.get('writeinfojson', False):
              infofn = self.prepare_filename(info_dict, 'infojson')
-            if not ensure_dir_exists(encodeFilename(infofn)):
+            if not self._ensure_dir_exists(encodeFilename(infofn)):
                  return
              if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
                  self.to_screen('[info] Video metadata is already present')
              else:
                  self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
                  try:
-                    write_json_file(self.filter_requested_info(info_dict), infofn)
+                    write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
                  except (OSError, IOError):
                      self.report_error('Cannot write video metadata to JSON file ' + infofn)
                      return
@@ -2222,7 +2227,7 @@ def dl(name, info, subtitle=False):
          for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
              thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
              thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
-            files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
+            files_to_move[thumb_filename_temp] = thumb_filename
  
          # Write internet shortcut files
          url_link = webloc_link = desktop_link = False
@@ -2360,7 +2365,7 @@ def correct_ext(filename):
                              fname = prepend_extension(
                                  self.prepare_filename(new_info, 'temp'),
                                  'f%s' % f['format_id'], new_info['ext'])
-                            if not ensure_dir_exists(fname):
+                            if not self._ensure_dir_exists(fname):
                                  return
                              downloaded.append(fname)
                              partial_success, real_download = dl(fname, new_info)
@@ -2437,9 +2442,8 @@ def correct_ext(filename):
                      else:
                          assert fixup_policy in ('ignore', 'never')
  
-                if (info_dict.get('protocol') == 'm3u8_native'
-                        or info_dict.get('protocol') == 'm3u8'
-                        and self.params.get('hls_prefer_native')):
+                if ('protocol' in info_dict
+                        and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
                      if fixup_policy == 'warn':
                          self.report_warning('%s: malformed AAC bitstream detected.' % (
                              info_dict['id']))
@@ -2501,7 +2505,7 @@ def download(self, url_list):
              else:
                  if self.params.get('dump_single_json', False):
                      self.post_extract(res)
-                    self.to_stdout(json.dumps(res))
+                    self.to_stdout(json.dumps(res, default=repr))
  
          return self._download_retcode
  
@@ -2523,21 +2527,31 @@ def download_with_info_file(self, info_filename):
          return self._download_retcode
  
      @staticmethod
-    def filter_requested_info(info_dict):
-        fields_to_remove = ('requested_formats', 'requested_subtitles')
-        return dict(
-            (k, v) for k, v in info_dict.items()
-            if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
-
-    def run_pp(self, pp, infodict, files_to_move={}):
+    def filter_requested_info(info_dict, actually_filter=True):
+        if not actually_filter:
+            return info_dict
+        exceptions = {
+            'remove': ['requested_formats', 'requested_subtitles', 'filepath', 'entries'],
+            'keep': ['_type'],
+        }
+        keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
+        filter_fn = lambda obj: (
+            list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
+            else obj if not isinstance(obj, dict)
+            else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
+        return filter_fn(info_dict)
+
+    def run_pp(self, pp, infodict):
          files_to_delete = []
+        if '__files_to_move' not in infodict:
+            infodict['__files_to_move'] = {}
          files_to_delete, infodict = pp.run(infodict)
          if not files_to_delete:
-            return files_to_move, infodict
+            return infodict
  
          if self.params.get('keepvideo', False):
              for f in files_to_delete:
-                files_to_move.setdefault(f, '')
+                infodict['__files_to_move'].setdefault(f, '')
          else:
              for old_filename in set(files_to_delete):
                  self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
@@ -2545,9 +2559,9 @@ def run_pp(self, pp, infodict, files_to_move={}):
                      os.remove(encodeFilename(old_filename))
                  except (IOError, OSError):
                      self.report_warning('Unable to remove downloaded original file')
-                if old_filename in files_to_move:
-                    del files_to_move[old_filename]
-        return files_to_move, infodict
+                if old_filename in infodict['__files_to_move']:
+                    del infodict['__files_to_move'][old_filename]
+        return infodict
  
      @staticmethod
      def post_extract(info_dict):
@@ -2570,20 +2584,21 @@ def actual_post_extract(info_dict):
      def pre_process(self, ie_info):
          info = dict(ie_info)
          for pp in self._pps['beforedl']:
-            info = self.run_pp(pp, info)[1]
+            info = self.run_pp(pp, info)
          return info
  
-    def post_process(self, filename, ie_info, files_to_move={}):
+    def post_process(self, filename, ie_info, files_to_move=None):
          """Run all the postprocessors on the given file."""
          info = dict(ie_info)
          info['filepath'] = filename
-        info['__files_to_move'] = {}
+        info['__files_to_move'] = files_to_move or {}
  
          for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
-            files_to_move, info = self.run_pp(pp, info, files_to_move)
-        info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
+            info = self.run_pp(pp, info)
+        info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
+        del info['__files_to_move']
          for pp in self._pps['aftermove']:
-            info = self.run_pp(pp, info, {})[1]
+            info = self.run_pp(pp, info)
  
      def _make_archive_id(self, info_dict):
          video_id = info_dict.get('id')
@@ -2951,7 +2966,7 @@ def _write_thumbnails(self, info_dict, filename):  # return the extensions
              thumb_ext = determine_ext(t['url'], 'jpg')
              suffix = '%s.' % t['id'] if multiple else ''
              thumb_display_id = '%s ' % t['id'] if multiple else ''
-            t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
+            t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
  
              if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
                  ret.append(suffix + thumb_ext)