]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/YoutubeDL.py
[dash,youtube] Download live from start to end (#888)
[yt-dlp.git] / yt_dlp / YoutubeDL.py
index ec69151d77b2cbbb76175abdcfd19e41d0dbe59e..b5d4380963d4f5b115b4d0383fb18f1ac71dc99f 100644 (file)
@@ -5,7 +5,6 @@
 
 import collections
 import contextlib
-import copy
 import datetime
 import errno
 import fileinput
 from .postprocessor import (
     get_postprocessor,
     EmbedThumbnailPP,
+    FFmpegFixupDuplicateMoovPP,
     FFmpegFixupDurationPP,
     FFmpegFixupM3u8PP,
     FFmpegFixupM4aPP,
@@ -1107,7 +1107,7 @@ def get_value(mdict):
         def _dumpjson_default(obj):
             if isinstance(obj, (set, LazyList)):
                 return list(obj)
-            raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
+            return repr(obj)
 
         def create_key(outer_mobj):
             if not outer_mobj.group('has_key'):
@@ -2071,8 +2071,7 @@ def selector_function(ctx):
                 selector_1, selector_2 = map(_build_selector_function, selector.selector)
 
                 def selector_function(ctx):
-                    for pair in itertools.product(
-                            selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))):
+                    for pair in itertools.product(selector_1(ctx), selector_2(ctx)):
                         yield _merge(pair)
 
             elif selector.type == SINGLE:  # atom
@@ -2142,7 +2141,7 @@ def selector_function(ctx):
             filters = [self._build_format_filter(f) for f in selector.filters]
 
             def final_selector(ctx):
-                ctx_copy = copy.deepcopy(ctx)
+                ctx_copy = dict(ctx)
                 for _filter in filters:
                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
                 return selector_function(ctx_copy)
@@ -2354,6 +2353,10 @@ def sanitize_numeric_fields(info):
         if not self.params.get('allow_unplayable_formats'):
             formats = [f for f in formats if not f.get('has_drm')]
 
+        if info_dict.get('is_live'):
+            get_from_start = bool(self.params.get('live_from_start'))
+            formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start]
+
         if not formats:
             self.raise_no_formats(info_dict)
 
@@ -2660,7 +2663,9 @@ def dl(self, name, info, subtitle=False, test=False):
             urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
             self.write_debug('Invoking downloader on "%s"' % urls)
 
-        new_info = copy.deepcopy(self._copy_infodict(info))
+        # Note: Ideally info should be a deep-copied so that hooks cannot modify it.
+        # But it may contain objects that are not deep-copyable
+        new_info = self._copy_infodict(info)
         if new_info.get('http_headers') is None:
             new_info['http_headers'] = self._calc_headers(new_info)
         return fd.download(name, new_info, subtitle)
@@ -2675,7 +2680,7 @@ def process_info(self, info_dict):
             if self._num_downloads >= int(max_downloads):
                 raise MaxDownloadsReached()
 
-        if info_dict.get('is_live'):
+        if info_dict.get('is_live') and not self.params.get('live_from_start'):
             info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
 
         # TODO: backward compatibility, to be removed
@@ -2730,7 +2735,7 @@ def process_info(self, info_dict):
         _infojson_written = self._write_info_json('video', info_dict, infofn)
         if _infojson_written:
             info_dict['infojson_filename'] = infofn
-            # For backward compatability, even though it was a private field
+            # For backward compatibility, even though it was a private field
             info_dict['__infojson_filename'] = infofn
         elif _infojson_written is None:
             return
@@ -2889,15 +2894,22 @@ def correct_ext(filename, ext=new_ext):
                     dl_filename = existing_file(full_filename, temp_filename)
                     info_dict['__real_download'] = False
 
+                    downloaded = []
+                    merger = FFmpegMergerPP(self)
+
+                    fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-')
                     if dl_filename is not None:
                         self.report_file_already_downloaded(dl_filename)
-                    elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'):
+                    elif fd:
+                        for f in requested_formats if fd != FFmpegFD else []:
+                            f['filepath'] = fname = prepend_extension(
+                                correct_ext(temp_filename, info_dict['ext']),
+                                'f%s' % f['format_id'], info_dict['ext'])
+                            downloaded.append(fname)
                         info_dict['url'] = '\n'.join(f['url'] for f in requested_formats)
                         success, real_download = self.dl(temp_filename, info_dict)
                         info_dict['__real_download'] = real_download
                     else:
-                        downloaded = []
-                        merger = FFmpegMergerPP(self)
                         if self.params.get('allow_unplayable_formats'):
                             self.report_warning(
                                 'You have requested merging of multiple formats '
@@ -2909,7 +2921,7 @@ def correct_ext(filename, ext=new_ext):
                                 'The formats won\'t be merged.')
 
                         if temp_filename == '-':
-                            reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict)
+                            reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params)
                                       else 'but the formats are incompatible for simultaneous download' if merger.available
                                       else 'but ffmpeg is not installed')
                             self.report_warning(
@@ -2931,14 +2943,15 @@ def correct_ext(filename, ext=new_ext):
                             partial_success, real_download = self.dl(fname, new_info)
                             info_dict['__real_download'] = info_dict['__real_download'] or real_download
                             success = success and partial_success
-                        if merger.available and not self.params.get('allow_unplayable_formats'):
-                            info_dict['__postprocessors'].append(merger)
-                            info_dict['__files_to_merge'] = downloaded
-                            # Even if there were no downloads, it is being merged only now
-                            info_dict['__real_download'] = True
-                        else:
-                            for file in downloaded:
-                                files_to_move[file] = None
+
+                    if downloaded and merger.available and not self.params.get('allow_unplayable_formats'):
+                        info_dict['__postprocessors'].append(merger)
+                        info_dict['__files_to_merge'] = downloaded
+                        # Even if there were no downloads, it is being merged only now
+                        info_dict['__real_download'] = True
+                    else:
+                        for file in downloaded:
+                            files_to_move[file] = None
                 else:
                     # Just a single file
                     dl_filename = existing_file(full_filename, temp_filename)
@@ -3005,9 +3018,14 @@ def ffmpeg_fixup(cndn, msg, cls):
 
                     downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None
                     downloader = downloader.__name__ if downloader else None
-                    ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD',
-                                 'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
-                                 FFmpegFixupM3u8PP)
+
+                    if info_dict.get('requested_formats') is None:  # Not necessary if doing merger
+                        ffmpeg_fixup(downloader == 'HlsFD',
+                                     'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
+                                     FFmpegFixupM3u8PP)
+                        ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD',
+                                     'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
+
                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
                     ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP)
 
@@ -3104,10 +3122,17 @@ def sanitize_info(info_dict, remove_private_keys=False):
                 k.startswith('_') or k in remove_keys or v in empty_values)
         else:
             reject = lambda k, v: k in remove_keys
-        filter_fn = lambda obj: (
-            list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
-            else obj if not isinstance(obj, dict)
-            else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
+
+        def filter_fn(obj):
+            if isinstance(obj, dict):
+                return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)}
+            elif isinstance(obj, (list, tuple, set, LazyList)):
+                return list(map(filter_fn, obj))
+            elif obj is None or isinstance(obj, (str, int, float, bool)):
+                return obj
+            else:
+                return repr(obj)
+
         return filter_fn(info_dict)
 
     @staticmethod
@@ -3685,7 +3710,7 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
             self.write_debug(f'Skipping writing {label} thumbnail')
             return ret
 
-        for t in thumbnails[::-1]:
+        for idx, t in list(enumerate(thumbnails))[::-1]:
             thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg')
             thumb_display_id = f'{label} thumbnail {t["id"]}'
             thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext'))
@@ -3706,6 +3731,7 @@ def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None
                     ret.append((thumb_filename, thumb_filename_final))
                     t['filepath'] = thumb_filename
                 except network_exceptions as err:
+                    thumbnails.pop(idx)
                     self.report_warning(f'Unable to download {thumb_display_id}: {err}')
             if ret and not write_all:
                 break