]> jfr.im git - yt-dlp.git/commitdiff
Add option `--lazy-playlist` to process entries as they are received
authorpukkandan <redacted>
Fri, 17 Jun 2022 08:05:04 +0000 (13:35 +0530)
committerpukkandan <redacted>
Fri, 17 Jun 2022 08:50:40 +0000 (14:20 +0530)
README.md
test/test_YoutubeDL.py
yt_dlp/YoutubeDL.py
yt_dlp/__init__.py
yt_dlp/options.py
yt_dlp/utils.py

index 16f02787a39085ebba3f40dd911dc69d2fa9fca2..2e1ae9c110cea4f57dbd3b13a02bbae5dfde0451 100644 (file)
--- a/README.md
+++ b/README.md
@@ -540,6 +540,11 @@ ## Download Options:
                                     bandwidth throttling imposed by a webserver
                                     (experimental)
     --playlist-random               Download playlist videos in random order
+    --lazy-playlist                 Process entries in the playlist as they are
+                                    received. This disables n_entries,
+                                    --playlist-random and --playlist-reverse
+    --no-lazy-playlist              Process videos in the playlist only after
+                                    the entire playlist is parsed (default)
     --xattr-set-filesize            Set file xattribute ytdl.filesize with
                                     expected file size
     --hls-use-mpegts                Use the mpegts container for HLS videos;
index 3aafc3c4f7215e5123aa62af9368407b374506ad..03a2c36a1c7c647fedd5eae92622a3ddb9837d0a 100644 (file)
@@ -1046,7 +1046,7 @@ def test_selection(params, expected_ids, evaluate_all=False):
             for name, func, expected_eval in (
                 ('list', list_entries, INDICES),
                 ('Generator', generator_entries, generator_eval),
-                ('LazyList', lazylist_entries, generator_eval),
+                # ('LazyList', lazylist_entries, generator_eval),  # Generator and LazyList follow the exact same code path
                 ('PagedList', pagedlist_entries, pagedlist_eval),
             ):
                 evaluated = []
index 4162727c49465aee70bdb240366b2db2ea27484a..fb3f9337f811d78e674cb3221c3d8d913af40010 100644 (file)
@@ -242,11 +242,9 @@ class YoutubeDL:
                        and don't overwrite any file if False
                        For compatibility with youtube-dl,
                        "nooverwrites" may also be used instead
-    playliststart:     Playlist item to start at.
-    playlistend:       Playlist item to end at.
     playlist_items:    Specific indices of playlist to download.
-    playlistreverse:   Download playlist items in reverse order.
     playlistrandom:    Download playlist items in random order.
+    lazy_playlist:     Process playlist entries as they are received.
     matchtitle:        Download only matching titles.
     rejecttitle:       Reject downloads for matching titles.
     logger:            Log messages to a logging.Logger instance.
@@ -469,6 +467,12 @@ class YoutubeDL:
 
     The following options are deprecated and may be removed in the future:
 
+    playliststart:     - Use playlist_items
+                       Playlist item to start at.
+    playlistend:       - Use playlist_items
+                       Playlist item to end at.
+    playlistreverse:   - Use playlist_items
+                       Download playlist items in reverse order.
     forceurl:          - Use forceprint
                        Force printing final URL.
     forcetitle:        - Use forceprint
@@ -1671,16 +1675,26 @@ def __process_playlist(self, ie_result, download):
         self.to_screen(f'[download] Downloading playlist: {title}')
 
         all_entries = PlaylistEntries(self, ie_result)
-        entries = orderedSet(all_entries.get_requested_items())
-        ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], [])
-        n_entries, ie_result['playlist_count'] = len(entries), all_entries.full_count
+        entries = orderedSet(all_entries.get_requested_items(), lazy=True)
+
+        lazy = self.params.get('lazy_playlist')
+        if lazy:
+            resolved_entries, n_entries = [], 'N/A'
+            ie_result['requested_entries'], ie_result['entries'] = None, None
+        else:
+            entries = resolved_entries = list(entries)
+            n_entries = len(resolved_entries)
+            ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
+        if not ie_result.get('playlist_count'):
+            # Better to do this after potentially exhausting entries
+            ie_result['playlist_count'] = all_entries.get_full_count()
 
         _infojson_written = False
         write_playlist_files = self.params.get('allow_playlist_files', True)
         if write_playlist_files and self.params.get('list_thumbnails'):
             self.list_thumbnails(ie_result)
         if write_playlist_files and not self.params.get('simulate'):
-            ie_copy = self._playlist_infodict(ie_result, n_entries=n_entries)
+            ie_copy = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries))
             _infojson_written = self._write_info_json(
                 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'))
             if _infojson_written is None:
@@ -1691,9 +1705,12 @@ def __process_playlist(self, ie_result, download):
             # TODO: This should be passed to ThumbnailsConvertor if necessary
             self._write_thumbnails('playlist', ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
 
-        if self.params.get('playlistreverse', False):
-            entries = entries[::-1]
-        if self.params.get('playlistrandom', False):
+        if lazy:
+            if self.params.get('playlistreverse') or self.params.get('playlistrandom'):
+                self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True)
+        elif self.params.get('playlistreverse'):
+            entries.reverse()
+        elif self.params.get('playlistrandom'):
             random.shuffle(entries)
 
         self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} videos'
@@ -1701,23 +1718,27 @@ def __process_playlist(self, ie_result, download):
 
         failures = 0
         max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
-        for i, (playlist_index, entry) in enumerate(entries, 1):
+        for i, (playlist_index, entry) in enumerate(entries):
+            if lazy:
+                resolved_entries.append((playlist_index, entry))
+
             # TODO: Add auto-generated fields
             if self._match_entry(entry, incomplete=True) is not None:
                 continue
 
-            if 'playlist-index' in self.params.get('compat_opts', []):
-                playlist_index = ie_result['requested_entries'][i - 1]
             self.to_screen('[download] Downloading video %s of %s' % (
-                self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
+                self._format_screen(i + 1, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS)))
 
             entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip')
+            if not lazy and 'playlist-index' in self.params.get('compat_opts', []):
+                playlist_index = ie_result['requested_entries'][i]
+
             entry_result = self.__process_iterable_entry(entry, download, {
-                'n_entries': n_entries,
-                '__last_playlist_index': max(ie_result['requested_entries']),
+                'n_entries': int_or_none(n_entries),
+                '__last_playlist_index': max(ie_result['requested_entries'] or (0, 0)),
                 'playlist_count': ie_result.get('playlist_count'),
                 'playlist_index': playlist_index,
-                'playlist_autonumber': i,
+                'playlist_autonumber': i + 1,
                 'playlist': title,
                 'playlist_id': ie_result.get('id'),
                 'playlist_title': ie_result.get('title'),
@@ -1735,10 +1756,10 @@ def __process_playlist(self, ie_result, download):
                 self.report_error(
                     f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction')
                 break
-            entries[i - 1] = (playlist_index, entry_result)
+            resolved_entries[i] = (playlist_index, entry_result)
 
         # Update with processed data
-        ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*entries)) or ([], [])
+        ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], [])
 
         # Write the updated info to json
         if _infojson_written is True and self._write_info_json(
index 1538a7e89608ec7d457a2870a6200a75fc391cf1..db34fe12a612df896601b0ee95790e8996b1aae3 100644 (file)
@@ -434,6 +434,9 @@ def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_u
         setattr(opts, opt1, default)
 
     # Conflicting options
+    report_conflict('--playlist-reverse', 'playlist_reverse', '--playlist-random', 'playlist_random')
+    report_conflict('--playlist-reverse', 'playlist_reverse', '--lazy-playlist', 'lazy_playlist')
+    report_conflict('--playlist-random', 'playlist_random', '--lazy-playlist', 'lazy_playlist')
     report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None)
     report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None)
     report_conflict('--exec-before-download', 'exec_before_dl_cmd',
@@ -740,6 +743,7 @@ def parse_options(argv=None):
         'playlistend': opts.playlistend,
         'playlistreverse': opts.playlist_reverse,
         'playlistrandom': opts.playlist_random,
+        'lazy_playlist': opts.lazy_playlist,
         'noplaylist': opts.noplaylist,
         'logtostderr': opts.outtmpl.get('default') == '-',
         'consoletitle': opts.consoletitle,
index bc646ab4aa5c7c10c0f136d3b6813f35a813d992..900b5c8b1a6fd5cb4f171a114862cd5103ad9c30 100644 (file)
@@ -888,7 +888,7 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
         help=optparse.SUPPRESS_HELP)
     downloader.add_option(
         '--playlist-reverse',
-        action='store_true',
+        action='store_true', dest='playlist_reverse',
         help=optparse.SUPPRESS_HELP)
     downloader.add_option(
         '--no-playlist-reverse',
@@ -896,8 +896,16 @@ def _alias_callback(option, opt_str, value, parser, opts, nargs):
         help=optparse.SUPPRESS_HELP)
     downloader.add_option(
         '--playlist-random',
-        action='store_true',
+        action='store_true', dest='playlist_random',
         help='Download playlist videos in random order')
+    downloader.add_option(
+        '--lazy-playlist',
+        action='store_true', dest='lazy_playlist',
+        help='Process entries in the playlist as they are received. This disables n_entries, --playlist-random and --playlist-reverse')
+    downloader.add_option(
+        '--no-lazy-playlist',
+        action='store_false', dest='lazy_playlist',
+        help='Process videos in the playlist only after the entire playlist is parsed (default)')
     downloader.add_option(
         '--xattr-set-filesize',
         dest='xattr_set_filesize', action='store_true',
index f21d7067295d39e6ea2c6b27a77facb4938f65fd..8dda5e931c3de8a48c9f366fcfd0b1951903f761 100644 (file)
@@ -770,13 +770,16 @@ def expand_path(s):
     return os.path.expandvars(compat_expanduser(s))
 
 
-def orderedSet(iterable):
-    """ Remove all duplicates from the input iterable """
-    res = []
-    for el in iterable:
-        if el not in res:
-            res.append(el)
-    return res
+def orderedSet(iterable, *, lazy=False):
+    """Remove all duplicates from the input iterable"""
+    def _iter():
+        seen = []  # Do not use set since the items can be unhashable
+        for x in iterable:
+            if x not in seen:
+                seen.append(x)
+                yield x
+
+    return _iter() if lazy else list(_iter())
 
 
 def _htmlentity_transform(entity_with_semicolon):
@@ -2820,7 +2823,26 @@ class PlaylistEntries:
     is_exhausted = False
 
     def __init__(self, ydl, info_dict):
-        self.ydl, self.info_dict = ydl, info_dict
+        self.ydl = ydl
+
+        # _entries must be assigned now since infodict can change during iteration
+        entries = info_dict.get('entries')
+        if entries is None:
+            raise EntryNotInPlaylist('There are no entries')
+        elif isinstance(entries, list):
+            self.is_exhausted = True
+
+        requested_entries = info_dict.get('requested_entries')
+        self.is_incomplete = bool(requested_entries)
+        if self.is_incomplete:
+            assert self.is_exhausted
+            self._entries = [self.MissingEntry] * max(requested_entries)
+            for i, entry in zip(requested_entries, entries):
+                self._entries[i - 1] = entry
+        elif isinstance(entries, (list, PagedList, LazyList)):
+            self._entries = entries
+        else:
+            self._entries = LazyList(entries)
 
     PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
         (?P<start>[+-]?\d+)?
@@ -2863,37 +2885,13 @@ def get_requested_items(self):
                 except (ExistingVideoReached, RejectedVideoReached):
                     return
 
-    @property
-    def full_count(self):
-        if self.info_dict.get('playlist_count'):
-            return self.info_dict['playlist_count']
-        elif self.is_exhausted and not self.is_incomplete:
+    def get_full_count(self):
+        if self.is_exhausted and not self.is_incomplete:
             return len(self)
         elif isinstance(self._entries, InAdvancePagedList):
             if self._entries._pagesize == 1:
                 return self._entries._pagecount
 
-    @functools.cached_property
-    def _entries(self):
-        entries = self.info_dict.get('entries')
-        if entries is None:
-            raise EntryNotInPlaylist('There are no entries')
-        elif isinstance(entries, list):
-            self.is_exhausted = True
-
-        indices = self.info_dict.get('requested_entries')
-        self.is_incomplete = bool(indices)
-        if self.is_incomplete:
-            assert self.is_exhausted
-            ret = [self.MissingEntry] * max(indices)
-            for i, entry in zip(indices, entries):
-                ret[i - 1] = entry
-            return ret
-
-        if isinstance(entries, (list, PagedList, LazyList)):
-            return entries
-        return LazyList(entries)
-
     @functools.cached_property
     def _getter(self):
         if isinstance(self._entries, list):
@@ -2937,17 +2935,12 @@ def __getitem__(self, idx):
             if i < 0:
                 continue
             try:
-                try:
-                    entry = self._getter(i)
-                except self.IndexError:
-                    self.is_exhausted = True
-                    if step > 0:
-                        break
-                    continue
-            except IndexError:
-                if self.is_exhausted:
+                entry = self._getter(i)
+            except self.IndexError:
+                self.is_exhausted = True
+                if step > 0:
                     break
-                raise
+                continue
             yield i + 1, entry
 
     def __len__(self):