Minor changes to make it easier to merge

author pukkandan <redacted>

Mon, 23 Nov 2020 21:47:42 +0000 (03:17 +0530)

committer pukkandan <redacted>

Mon, 23 Nov 2020 22:18:17 +0000 (03:48 +0530)
author pukkandan <redacted>
Mon, 23 Nov 2020 21:47:42 +0000 (03:17 +0530)
committer pukkandan <redacted>
Mon, 23 Nov 2020 22:18:17 +0000 (03:48 +0530)
diff --git a/docs/supportedsites.md b/docs/supportedsites.md

index 45a5466505ec80571fbac0c17e2c7c72b2ac420d..db2295572285cc2e718dbd96598252b1a6815b42 100644 (file)
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -1153,7 +1153,7 @@ # Supported sites
   - **YourPorn**
   - **YourUpload**
   - **youtube**: YouTube.com
- - **youtube:favorites**: YouTube.com liked videos, ":ytfav" or "LL" for short (requires authentication)
+ - **youtube:favorites**: YouTube.com liked videos, ":ytfav" for short (requires authentication)
   - **youtube:history**: Youtube watch history, ":ythistory" for short (requires authentication)
   - **youtube:playlist**: YouTube.com playlists
   - **youtube:recommended**: YouTube.com recommended videos, ":ytrec" for short (requires authentication)
@@ -1162,7 +1162,7 @@ # Supported sites
   - **youtube:search_url**: YouTube.com search URLs
   - **youtube:subscriptions**: YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)
   - **youtube:tab**: YouTube.com tab
- - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" or "WL" for short (requires authentication)
+ - **youtube:watchlater**: Youtube watch later list, ":ytwatchlater" for short (requires authentication)
   - **YoutubeYtUser**: YouTube.com user videos, URL or "ytuser" keyword
   - **Zapiks**
   - **Zaq1**
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py

index 0f15b018969ba7455386d0aee354ca9cb64bd94a..e46614e4e6f444325b0242b45943f049c10c723a 100644 (file)
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -72,7 +72,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
      # If True it will raise an error if no login info is provided
      _LOGIN_REQUIRED = False
  
-    _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
+    _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  
      _YOUTUBE_CLIENT_HEADERS = {
          'x-youtube-client-name': '1',
@@ -1328,7 +1328,6 @@ def _get_ytplayer_config(self, video_id, webpage):
              # https://github.com/ytdl-org/youtube-dl/pull/7599)
              r';ytplayer\.config\s*=\s*({.+?});ytplayer',
              r';ytplayer\.config\s*=\s*({.+?});',
-            r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'  # Needed???
          )
          config = self._search_regex(
              patterns, webpage, 'ytplayer.config', default=None)
@@ -2532,11 +2531,22 @@ def decrypt_sig(mobj):
  
  class YoutubeTabIE(YoutubeBaseInfoExtractor):
      IE_DESC = 'YouTube.com tab'
-    _VALID_URL = (r'''(?x)
-        https?://(?:\w+\.)?(?:youtube(?:kids)?\.com|invidio\.us)/(?:
-            (?!(%s)([/#?]|$))|channel/|c/|user/|
-            (?P<not_channel>playlist|watch)/?\?.*?\blist=)
-        (?P<id>[^/?#&]+)''') % YoutubeBaseInfoExtractor._RESERVED_NAMES
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:\w+\.)?
+                        (?:
+                            youtube(?:kids)?\.com|
+                            invidio\.us
+                        )/
+                        (?:
+                            (?:channel|c|user)/|
+                            (?P<not_channel>
+                                (?:playlist|watch)\?.*?\blist=
+                            )|
+                            (?!(%s)([/#?]|$))  # Direct URLs
+                        )
+                        (?P<id>[^/?\#&]+)
+                    ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
      IE_NAME = 'youtube:tab'
  
      _TESTS = [{
@@ -2910,7 +2920,7 @@ def _itemSection_entries(self, item_sect_renderer):
  
      def _rich_entries(self, rich_grid_renderer):
          renderer = try_get(
-            rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict)
+            rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
          video_id = renderer.get('videoId')
          if not video_id:
              return
@@ -3008,14 +3018,14 @@ def _extract_continuation(cls, renderer):
  
      def _entries(self, tab, identity_token):
  
-        def extract_entries(parent_renderer):
-            slr_contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
-            for slr_content in slr_contents:
-                if not isinstance(slr_content, dict):
+        def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
+            contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
+            for content in contents:
+                if not isinstance(content, dict):
                      continue
-                is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
+                is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
                  if not is_renderer:
-                    renderer = slr_content.get('richItemRenderer')
+                    renderer = content.get('richItemRenderer')
                      if renderer:
                          for entry in self._rich_entries(renderer):
                              yield entry
@@ -3041,7 +3051,6 @@ def extract_entries(parent_renderer):
                      if renderer:
                          for entry in self._shelf_entries(renderer):
                              yield entry
-                        continuation_list[0] = self._extract_continuation(parent_renderer)
                          continue
                      renderer = isr_content.get('backstagePostThreadRenderer')
                      if renderer:
@@ -3054,18 +3063,19 @@ def extract_entries(parent_renderer):
                          entry = self._video_entry(renderer)
                          if entry:
                              yield entry
+
                  if not continuation_list[0]:
                      continuation_list[0] = self._extract_continuation(is_renderer)
-                if not continuation_list[0]:
-                    continuation_list[0] = self._extract_continuation(parent_renderer)
+
+            if not continuation_list[0]:
+                continuation_list[0] = self._extract_continuation(parent_renderer)
  
          continuation_list = [None]  # Python 2 doesnot support nonlocal
          parent_renderer = (
              try_get(tab, lambda x: x['sectionListRenderer'], dict)
              or try_get(tab, lambda x: x['richGridRenderer'], dict) or {})
-        if parent_renderer:
-            for entry in extract_entries(parent_renderer):
-                yield entry
+        for entry in extract_entries(parent_renderer):
+            yield entry
          continuation = continuation_list[0]
  
          headers = {
@@ -3078,8 +3088,6 @@ def extract_entries(parent_renderer):
          for page_num in itertools.count(1):
              if not continuation:
                  break
-            if hasattr(self, '_MAX_PAGES') and page_num > self._MAX_PAGES:
-                break
              browse = self._download_json(
                  'https://www.youtube.com/browse_ajax', None,
                  'Downloading page %d' % page_num,
@@ -3111,7 +3119,7 @@ def extract_entries(parent_renderer):
                          yield entry
                      continuation = self._extract_continuation(continuation_renderer)
                      continue
-                continuation_renderer = continuation_contents.get('sectionListContinuation')
+                continuation_renderer = continuation_contents.get('sectionListContinuation')  # for feeds
                  if continuation_renderer:
                      continuation_list = [None]
                      for entry in extract_entries(continuation_renderer):
@@ -3125,19 +3133,13 @@ def extract_entries(parent_renderer):
                  continuation_item = continuation_items[0]
                  if not isinstance(continuation_item, dict):
                      continue
-                renderer = continuation_item.get('playlistVideoRenderer')
+                renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
                  if renderer:
                      video_list_renderer = {'contents': continuation_items}
                      for entry in self._playlist_entries(video_list_renderer):
                          yield entry
                      continuation = self._extract_continuation(video_list_renderer)
                      continue
-                renderer = continuation_item.get('itemSectionRenderer')
-                if renderer:
-                    for entry in self._itemSection_entries(renderer):
-                        yield entry
-                    continuation = self._extract_continuation({'contents': continuation_items})
-                    continue
              break
  
      @staticmethod
@@ -3175,7 +3177,7 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
          selected_tab = self._extract_selected_tab(tabs)
          renderer = try_get(
              data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
-        playlist_id = None
+        playlist_id = title = description = None
          if renderer:
              channel_title = renderer.get('title') or item_id
              tab_title = selected_tab.get('title')
@@ -3191,7 +3193,9 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
              description = None
              playlist_id = item_id
          if playlist_id is None:
-            return None
+            playlist_id = item_id
+        if title is None:
+            title = "Youtube " + playlist_id.title()
          playlist = self.playlist_result(
              self._entries(selected_tab['content'], identity_token),
              playlist_id=playlist_id, playlist_title=title,
@@ -3212,7 +3216,7 @@ def _real_extract(self, url):
          url = compat_urlparse.urlunparse(
              compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
          is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
-        if is_home is not None and is_home.group('not_channel') is None:
+        if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
              self._downloader.report_warning(
                  'A channel/user page was given. All the channel\'s videos will be downloaded. '
                  'To download only the videos in the home page, add a "/home" to the URL')
@@ -3365,6 +3369,25 @@ def _real_extract(self, url):
              ie=YoutubeTabIE.ie_key(), video_id=user_id)
  
  
+class YoutubeFavouritesIE(InfoExtractor):
+    IE_NAME = 'youtube:favorites'
+    IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
+    _VALID_URL = r':ytfav(?:ou?rite)?s?'
+    _LOGIN_REQUIRED = True
+    _TESTS = [{
+        'url': ':ytfav',
+        'only_matching': True,
+    }, {
+        'url': ':ytfavorites',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        return self.url_result(
+            'https://www.youtube.com/playlist?list=LL',
+            ie=YoutubeTabIE.ie_key())
+
+
  class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
      IE_DESC = 'YouTube.com searches'
      # there doesn't appear to be a real limit, for example if you search for
@@ -3536,9 +3559,9 @@ def _real_extract(self, url):
  
  class YoutubeWatchLaterIE(InfoExtractor):
      IE_NAME = 'youtube:watchlater'
-    IE_DESC = 'Youtube watch later list, ":ytwatchlater" or "WL" for short (requires authentication)'
      _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/watch_later|:ytwatchlater|WL'
  
+    IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
      _TESTS = [{
          'url': 'https://www.youtube.com/feed/watch_later',
          'only_matching': True,
@@ -3552,21 +3575,6 @@ def _real_extract(self, url):
              'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
  
  
-class YoutubeFavouritesIE(InfoExtractor):
-    IE_NAME = 'youtube:favourites'
-    IE_DESC = 'YouTube.com liked videos, ":ytfav" or "LL" for short (requires authentication)'
-    _VALID_URL = r':ytfav(?:ou?rite)?s?|LL'
-
-    _TESTS = [{
-        'url': ':ytfav',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        return self.url_result(
-            'https://www.youtube.com/playlist?list=LL', ie=YoutubeTabIE.ie_key())
-
-
  class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
      IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
      _VALID_URL = r'https?://(?:www\.)?youtube\.com(?:/feed/recommended|/?[?#]|/?$)|:ytrec(?:ommended)?'
@@ -3575,8 +3583,8 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
  
  
  class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
      _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsub(?:scription)?s?'
+    IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
      _FEED_NAME = 'subscriptions'
      _PLAYLIST_TITLE = 'Youtube Subscriptions'
author	pukkandan <redacted>
	Mon, 23 Nov 2020 21:47:42 +0000 (03:17 +0530)
committer	pukkandan <redacted>
	Mon, 23 Nov 2020 22:18:17 +0000 (03:48 +0530)
docs/supportedsites.md		patch \| blob \| blame \| history
youtube_dlc/extractor/youtube.py		patch \| blob \| blame \| history