]> jfr.im git - yt-dlp.git/blobdiff - youtube_dlc/YoutubeDL.py
Changed repo name to yt-dlp
[yt-dlp.git] / youtube_dlc / YoutubeDL.py
index 2cc02e46fad4e8f28447e789a6f3dc491e8c7d0a..2d3eacfebdbb29eea385c0e10cfb60a155355456 100644 (file)
@@ -99,6 +99,7 @@
     YoutubeDLCookieProcessor,
     YoutubeDLHandler,
     YoutubeDLRedirectHandler,
+    process_communicate_or_kill,
 )
 from .cache import Cache
 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
@@ -178,9 +179,11 @@ class YoutubeDL(object):
     outtmpl:           Template for output names.
     restrictfilenames: Do not allow "&" and spaces in file names.
     trim_file_name:    Limit length of filename (extension excluded).
-    ignoreerrors:      Do not stop on download errors.
+    ignoreerrors:      Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
     force_generic_extractor: Force downloader to use the generic extractor
-    nooverwrites:      Prevent overwriting files.
+    overwrites:        Overwrite all video and metadata files if True,
+                       overwrite only non-video files if None
+                       and don't overwrite any file if False
     playliststart:     Playlist item to start at.
     playlistend:       Playlist item to end at.
     playlist_items:    Specific indices of playlist to download.
@@ -252,6 +255,9 @@ class YoutubeDL(object):
                                youtube_dlc/postprocessor/__init__.py for a list.
                        as well as any further keyword arguments for the
                        postprocessor.
+    post_hooks:        A list of functions that get called as the final step
+                       for each video file, after all postprocessors have been
+                       called. The filename will be passed as the only argument.
     progress_hooks:    A list of functions that get called on download
                        progress, with a dictionary with the entries
                        * status: One of "downloading", "error", or "finished".
@@ -333,8 +339,9 @@ class YoutubeDL(object):
                        otherwise prefer ffmpeg.
     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
                        to the binary or its containing directory.
-    postprocessor_args: A list of additional command-line arguments for the
-                        postprocessor.
+    postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
+                        of additional command-line arguments for the postprocessor.
+                        Use 'default' as the name for arguments to passed to all PP.
 
     The following options are used by the Youtube extractor:
     youtube_include_dash_manifest: If True (default), DASH manifests and related
@@ -368,6 +375,7 @@ def __init__(self, params=None, auto_init=True):
         self._ies = []
         self._ies_instances = {}
         self._pps = []
+        self._post_hooks = []
         self._progress_hooks = []
         self._download_retcode = 0
         self._num_downloads = 0
@@ -471,6 +479,9 @@ def check_deprecated(param, option, suggestion):
             pp = pp_class(self, **compat_kwargs(pp_def))
             self.add_post_processor(pp)
 
+        for ph in self.params.get('post_hooks', []):
+            self.add_post_hook(ph)
+
         for ph in self.params.get('progress_hooks', []):
             self.add_progress_hook(ph)
 
@@ -523,6 +534,10 @@ def add_post_processor(self, pp):
         self._pps.append(pp)
         pp.set_downloader(self)
 
+    def add_post_hook(self, ph):
+        """Add the post hook"""
+        self._post_hooks.append(ph)
+
     def add_progress_hook(self, ph):
         """Add the progress hook (currently only for the file downloader)"""
         self._progress_hooks.append(ph)
@@ -577,7 +592,7 @@ def to_console_title(self, message):
                 # already of type unicode()
                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
         elif 'TERM' in os.environ:
-            self._write_string('\033]0;%s\007' % message, self._screen_file)
+            self._write_string('\033[0;%s\007' % message, self._screen_file)
 
     def save_console_title(self):
         if not self.params.get('consoletitle', False):
@@ -673,6 +688,13 @@ def report_file_already_downloaded(self, file_name):
         except UnicodeEncodeError:
             self.to_screen('[download] The file has already been downloaded')
 
+    def report_file_delete(self, file_name):
+        """Report that existing file will be deleted."""
+        try:
+            self.to_screen('Deleting already existent file %s' % file_name)
+        except UnicodeEncodeError:
+            self.to_screen('Deleting already existent file')
+
     def prepare_filename(self, info_dict):
         """Generate the output filename."""
         try:
@@ -908,6 +930,10 @@ def add_default_extra_info(self, ie_result, ie, url):
         self.add_extra_info(ie_result, {
             'extractor': ie.IE_NAME,
             'webpage_url': url,
+            'duration_string': (
+                formatSeconds(ie_result['duration'], '-')
+                if ie_result.get('duration', None) is not None
+                else None),
             'webpage_url_basename': url_basename(url),
             'extractor_key': ie.ie_key(),
         })
@@ -1185,23 +1211,20 @@ def can_merge():
             merger = FFmpegMergerPP(self)
             return merger.available and merger.can_merge()
 
-        def prefer_best():
-            if self.params.get('simulate', False):
-                return False
-            if not download:
-                return False
-            if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
-                return True
-            if info_dict.get('is_live'):
-                return True
-            if not can_merge():
-                return True
-            return False
-
-        req_format_list = ['bestvideo+bestaudio', 'best']
-        if prefer_best():
-            req_format_list.reverse()
-        return '/'.join(req_format_list)
+        prefer_best = (
+            not self.params.get('simulate', False)
+            and download
+            and (
+                not can_merge()
+                or info_dict.get('is_live', False)
+                or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
+
+        return (
+            'best/bestvideo+bestaudio'
+            if prefer_best
+            else 'bestvideo*+bestaudio/best'
+            if not self.params.get('allow_multiple_audio_streams', False)
+            else 'bestvideo+bestaudio/best')
 
     def build_format_selector(self, format_spec):
         def syntax_error(note, start):
@@ -1216,8 +1239,8 @@ def syntax_error(note, start):
         GROUP = 'GROUP'
         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 
-        allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', True),
-                                  'video': self.params.get('allow_multiple_video_streams', True)}
+        allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
+                                  'video': self.params.get('allow_multiple_video_streams', False)}
 
         def _parse_filter(tokens):
             filter_parts = []
@@ -1705,7 +1728,7 @@ def is_wellformed(f):
         if req_format is None:
             req_format = self._default_format_spec(info_dict, download=download)
             if self.params.get('verbose'):
-                self.to_stdout('[debug] Default format spec: %s' % req_format)
+                self._write_string('[debug] Default format spec: %s\n' % req_format)
 
         format_selector = self.build_format_selector(req_format)
 
@@ -1884,7 +1907,7 @@ def ensure_dir_exists(path):
 
         if self.params.get('writedescription', False):
             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
-            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
+            if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
                 self.to_screen('[info] Video description is already present')
             elif info_dict.get('description') is None:
                 self.report_warning('There\'s no description to write.')
@@ -1899,7 +1922,7 @@ def ensure_dir_exists(path):
 
         if self.params.get('writeannotations', False):
             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
-            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
+            if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
                 self.to_screen('[info] Video annotations are already present')
             elif not info_dict.get('annotations'):
                 self.report_warning('There are no annotations to write.')
@@ -1919,7 +1942,7 @@ def dl(name, info, subtitle=False):
             for ph in self._progress_hooks:
                 fd.add_progress_hook(ph)
             if self.params.get('verbose'):
-                self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
+                self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
             return fd.download(name, info, subtitle)
 
         subtitles_are_requested = any([self.params.get('writesubtitles', False),
@@ -1933,7 +1956,7 @@ def dl(name, info, subtitle=False):
             for sub_lang, sub_info in subtitles.items():
                 sub_format = sub_info['ext']
                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
-                if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+                if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
                 else:
                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
@@ -1988,7 +2011,7 @@ def dl(name, info, subtitle=False):
 
         if self.params.get('writeinfojson', False):
             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
-            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
+            if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
                 self.to_screen('[info] Video description metadata is already present')
             else:
                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
@@ -2096,11 +2119,15 @@ def compatible_formats(formats):
                             'Requested formats are incompatible for merge and will be merged into mkv.')
                     # Ensure filename always has a correct extension for successful merge
                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
-                    if os.path.exists(encodeFilename(filename)):
+                    file_exists = os.path.exists(encodeFilename(filename))
+                    if not self.params.get('overwrites', False) and file_exists:
                         self.to_screen(
                             '[download] %s has already been downloaded and '
                             'merged' % filename)
                     else:
+                        if file_exists:
+                            self.report_file_delete(filename)
+                            os.remove(encodeFilename(filename))
                         for f in requested_formats:
                             new_info = dict(info_dict)
                             new_info.update(f)
@@ -2117,6 +2144,11 @@ def compatible_formats(formats):
                         # Even if there were no downloads, it is being merged only now
                         info_dict['__real_download'] = True
                 else:
+                    # Delete existing file with --yes-overwrites
+                    if self.params.get('overwrites', False):
+                        if os.path.exists(encodeFilename(filename)):
+                            self.report_file_delete(filename)
+                            os.remove(encodeFilename(filename))
                     # Just a single file
                     success, real_download = dl(filename, info_dict)
                     info_dict['__real_download'] = real_download
@@ -2197,10 +2229,19 @@ def compatible_formats(formats):
                 except (PostProcessingError) as err:
                     self.report_error('postprocessing: %s' % str(err))
                     return
+                try:
+                    for ph in self._post_hooks:
+                        ph(filename)
+                except Exception as err:
+                    self.report_error('post hooks: %s' % str(err))
+                    return
                 must_record_download_archive = True
 
         if must_record_download_archive or self.params.get('force_write_download_archive', False):
             self.record_download_archive(info_dict)
+        max_downloads = self.params.get('max_downloads')
+        if max_downloads is not None and self._num_downloads >= int(max_downloads):
+            raise MaxDownloadsReached()
 
     def download(self, url_list):
         """Download a given list of URLs."""
@@ -2491,7 +2532,7 @@ def print_debug_header(self):
                 self.get_encoding()))
         write_string(encoding_str, encoding=None)
 
-        self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
+        self._write_string('[debug] yt-dlp version ' + __version__ + '\n')
         if _LAZY_LOADER:
             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
         try:
@@ -2499,7 +2540,7 @@ def print_debug_header(self):
                 ['git', 'rev-parse', '--short', 'HEAD'],
                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                 cwd=os.path.dirname(os.path.abspath(__file__)))
-            out, err = sp.communicate()
+            out, err = process_communicate_or_kill(sp)
             out = out.decode().strip()
             if re.match('[0-9a-f]+', out):
                 self._write_string('[debug] Git HEAD: ' + out + '\n')
@@ -2540,6 +2581,7 @@ def python_implementation():
         if self.params.get('call_home', False):
             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+            return
             latest_version = self.urlopen(
                 'https://yt-dl.org/latest/version').read().decode('utf-8')
             if version_tuple(latest_version) > version_tuple(__version__):
@@ -2635,9 +2677,9 @@ def _write_thumbnails(self, info_dict, filename):
             thumb_ext = determine_ext(t['url'], 'jpg')
             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
-            t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+            t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
 
-            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+            if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
             else: