[extractor] Support multiple archive ids for one video (#4307)

author pukkandan <redacted>

Wed, 13 Jul 2022 09:33:05 +0000 (15:03 +0530)

committer pukkandan <redacted>

Mon, 1 Aug 2022 19:38:16 +0000 (01:08 +0530)
author pukkandan <redacted>
Wed, 13 Jul 2022 09:33:05 +0000 (15:03 +0530)
committer pukkandan <redacted>
Mon, 1 Aug 2022 19:38:16 +0000 (01:08 +0530)
diff --git a/README.md b/README.md

index f0c49eef98d46ee4265748eb06df965fb27b4cc7..a1c7287a904f111e2b1776840d0f42d85230bd40 100644 (file)
--- a/README.md
+++ b/README.md
@@ -138,7 +138,6 @@ ### Differences in default behavior
  * Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this
  * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior
  * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this
-* All *experiences* of a funimation episode are considered as a single video. This behavior breaks existing archives. Use `--compat-options seperate-video-versions` to extract information from only the default player
  * Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading
  * Youtube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
  * Unavailable videos are also listed for youtube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
@@ -2132,6 +2131,7 @@ #### No longer supported
      --no-include-ads                 Default
      --write-annotations              No supported site has annotations now
      --no-write-annotations           Default
+    --compat-options seperate-video-versions  No longer needed
  
  #### Removed
  These options were deprecated since 2014 and have now been entirely removed
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index f6f97b8ece9628e549769b8d2b613fef0267bbde..14823a4c63ebde0d7f90b9e7925d5f23300a01c5 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -3455,11 +3455,9 @@ def in_download_archive(self, info_dict):
          if fn is None:
              return False
  
-        vid_id = self._make_archive_id(info_dict)
-        if not vid_id:
-            return False  # Incomplete video information
-
-        return vid_id in self.archive
+        vid_ids = [self._make_archive_id(info_dict)]
+        vid_ids.extend(info_dict.get('_old_archive_ids', []))
+        return any(id_ in self.archive for id_ in vid_ids)
  
      def record_download_archive(self, info_dict):
          fn = self.params.get('download_archive')
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index 317aa270e70f0600fa268e33bdfbdfb9248636c2..c91260cb0a6869dce3cd3c084b2e08103172050d 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -334,6 +334,7 @@ class InfoExtractor:
                      'private', 'premium_only', 'subscriber_only', 'needs_auth',
                      'unlisted' or 'public'. Use 'InfoExtractor._availability'
                      to set it
+    _old_archive_ids: A list of old archive ids needed for backward compatibility
      __post_extractor: A function to be called just before the metadata is
                      written to either disk, logger or console. The function
                      must return a dict which will be added to the info_dict.
diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py

index 12cacd3b4b6bea53eeb453084d28c339c5abaebf..5881f1687036f98947ad2d0ac49032e5617e22e9 100644 (file)
--- a/yt_dlp/extractor/funimation.py
+++ b/yt_dlp/extractor/funimation.py
@@ -249,7 +249,8 @@ def _real_extract(self, url):
          self._sort_formats(formats, ('lang', 'source'))
  
          return {
-            'id': initial_experience_id if only_initial_experience else episode_id,
+            'id': episode_id,
+            '_old_archive_ids': [initial_experience_id],
              'display_id': display_id,
              'duration': duration,
              'title': episode['episodeTitle'],
diff --git a/yt_dlp/extractor/genericembeds.py b/yt_dlp/extractor/genericembeds.py

index ec2673059dcd9b6e85c6339d713bae82dc0b3680..f3add479435c21531f8d868b15130d1d88882877 100644 (file)
--- a/yt_dlp/extractor/genericembeds.py
+++ b/yt_dlp/extractor/genericembeds.py
@@ -22,6 +22,9 @@ def _extract_from_webpage(self, url, webpage):
              entry.update({
                  'id': f'{video_id}-{num}',
                  'title': f'{title} ({num})',
+                '_old_archive_ids': [
+                    f'Generic {f"{video_id}-{num}" if len(entries) > 1 else video_id}',
+                ],
              })
              self._sort_formats(entry['formats'])
              yield entry
diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py

index 028e7a1e892359e5a3501bfa956d5b0ba33406db..7a798b912d2ac39b6235666ae312999581cd51dd 100644 (file)
--- a/yt_dlp/extractor/twitch.py
+++ b/yt_dlp/extractor/twitch.py
@@ -1162,8 +1162,11 @@ def _real_extract(self, url):
                  })
              thumbnails.append(thumb)
  
+        old_id = self._search_regex(r'%7C(\d+)(?:-\d+)?.mp4', formats[-1]['url'], 'old id', default=None)
+
          return {
              'id': clip.get('id') or video_id,
+            '_old_archive_ids': [f'{self.ie_key()} {old_id}'] if old_id else None,
              'display_id': video_id,
              'title': clip.get('title') or video_id,
              'formats': formats,
author	pukkandan <redacted>
	Wed, 13 Jul 2022 09:33:05 +0000 (15:03 +0530)
committer	pukkandan <redacted>
	Mon, 1 Aug 2022 19:38:16 +0000 (01:08 +0530)
README.md		patch \| blob \| blame \| history
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/extractor/common.py		patch \| blob \| blame \| history
yt_dlp/extractor/funimation.py		patch \| blob \| blame \| history
yt_dlp/extractor/genericembeds.py		patch \| blob \| blame \| history
yt_dlp/extractor/twitch.py		patch \| blob \| blame \| history