Readme changes

[yt-dlp.git] / youtube_dlc / extractor / youtube.py
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py

index 3f3f9c58b96cdfa4514da7b9a197760f155a0618..e0f211b741f712b1e1d9b394a54233385e67dc50 100644 (file)
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -64,7 +64,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
      _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  
      _RESERVED_NAMES = (
-        r'course|embed|watch|w|results|storefront|'
+        r'course|embed|channel|c|user|playlist|watch|w|results|storefront|oops|'
          r'shared|index|account|reporthistory|t/terms|about|upload|signin|logout|'
          r'feed/(watch_later|history|subscriptions|library|trending|recommended)')
  
@@ -72,7 +72,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
      # If True it will raise an error if no login info is provided
      _LOGIN_REQUIRED = False
  
-    _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|LL|WL)'
+    _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  
      _YOUTUBE_CLIENT_HEADERS = {
          'x-youtube-client-name': '1',
@@ -306,6 +306,8 @@ def _real_initialize(self):
          },
      }
  
+    _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
+
      def _call_api(self, ep, query, video_id):
          data = self._DEFAULT_API_DATA.copy()
          data.update(query)
@@ -322,8 +324,8 @@ def _call_api(self, ep, query, video_id):
      def _extract_yt_initial_data(self, video_id, webpage):
          return self._parse_json(
              self._search_regex(
-                r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;',
-                webpage, 'yt initial data'),
+                (r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
+                 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
              video_id)
  
  
@@ -504,7 +506,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
          '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
      }
-    _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
+    _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')  # TODO 'json3' raising issues with automatic captions
  
      _GEO_BYPASS = False
  
@@ -1089,6 +1091,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'skip_download': True,
              },
          },
+        {
+            # with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093)
+            'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
+            'info_dict': {
+                'id': 'CHqg6qOn4no',
+                'ext': 'mp4',
+                'title': 'Part 77   Sort a list of simple types in c#',
+                'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
+                'upload_date': '20130831',
+                'uploader_id': 'kudvenkat',
+                'uploader': 'kudvenkat',
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
      ]
  
      def __init__(self, *args, **kwargs):
@@ -1310,7 +1328,6 @@ def _get_ytplayer_config(self, video_id, webpage):
              # https://github.com/ytdl-org/youtube-dl/pull/7599)
              r';ytplayer\.config\s*=\s*({.+?});ytplayer',
              r';ytplayer\.config\s*=\s*({.+?});',
-            r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'  # Needed???
          )
          config = self._search_regex(
              patterns, webpage, 'ytplayer.config', default=None)
@@ -1318,44 +1335,6 @@ def _get_ytplayer_config(self, video_id, webpage):
              return self._parse_json(
                  uppercase_escape(config), video_id, fatal=False)
  
-    def _get_music_metadata_from_yt_initial(self, yt_initial):
-        music_metadata = []
-        key_map = {
-            'Album': 'album',
-            'Artist': 'artist',
-            'Song': 'track'
-        }
-        contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
-        if type(contents) is list:
-            for content in contents:
-                music_track = {}
-                if type(content) is not dict:
-                    continue
-                videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
-                if type(videoSecondaryInfoRenderer) is not dict:
-                    continue
-                rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
-                if type(rows) is not list:
-                    continue
-                for row in rows:
-                    metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
-                    if type(metadataRowRenderer) is not dict:
-                        continue
-                    key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
-                    value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
-                        try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
-                    if type(key) is not str or type(value) is not str:
-                        continue
-                    if key in key_map:
-                        if key_map[key] in music_track:
-                            # we've started on a new track
-                            music_metadata.append(music_track)
-                            music_track = {}
-                        music_track[key_map[key]] = value
-                if len(music_track.keys()):
-                    music_metadata.append(music_track)
-        return music_metadata
-
      def _get_automatic_captions(self, video_id, webpage):
          """We need the webpage for getting the captions url, pass it as an
             argument to speed up the process."""
@@ -2138,6 +2117,21 @@ def _extract_filesize(media_url):
                          formats.append(a_format)
              else:
                  error_message = extract_unavailable_message()
+                if not error_message:
+                    reason_list = try_get(
+                        player_response,
+                        lambda x: x['playabilityStatus']['errorScreen']['playerErrorMessageRenderer']['subreason']['runs'],
+                        list) or []
+                    for reason in reason_list:
+                        if not isinstance(reason, dict):
+                            continue
+                        reason_text = try_get(reason, lambda x: x['text'], compat_str)
+                        if reason_text:
+                            if not error_message:
+                                error_message = ''
+                            error_message += reason_text
+                    if error_message:
+                        error_message = clean_html(error_message)
                  if not error_message:
                      error_message = clean_html(try_get(
                          player_response, lambda x: x['playabilityStatus']['reason'],
@@ -2263,7 +2257,7 @@ def extract_meta(field):
          # Youtube Music Auto-generated description
          release_date = release_year = None
          if video_description:
-            mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
+            mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
              if mobj:
                  if not track:
                      track = mobj.group('track').strip()
@@ -2280,13 +2274,33 @@ def extract_meta(field):
                  if release_year:
                      release_year = int(release_year)
  
-        yt_initial = self._get_yt_initial_data(video_id, video_webpage)
-        if yt_initial:
-            music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
-            if len(music_metadata):
-                album = music_metadata[0].get('album')
-                artist = music_metadata[0].get('artist')
-                track = music_metadata[0].get('track')
+        yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
+        contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
+        for content in contents:
+            rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
+            multiple_songs = False
+            for row in rows:
+                if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
+                    multiple_songs = True
+                    break
+            for row in rows:
+                mrr = row.get('metadataRowRenderer') or {}
+                mrr_title = try_get(
+                    mrr, lambda x: x['title']['simpleText'], compat_str)
+                mrr_contents = try_get(
+                    mrr, lambda x: x['contents'][0], dict) or {}
+                mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
+                if not (mrr_title and mrr_contents_text):
+                    continue
+                if mrr_title == 'License':
+                    video_license = mrr_contents_text
+                elif not multiple_songs:
+                    if mrr_title == 'Album':
+                        album = mrr_contents_text
+                    elif mrr_title == 'Artist':
+                        artist = mrr_contents_text
+                    elif mrr_title == 'Song':
+                        track = mrr_contents_text
  
          m_episode = re.search(
              r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
@@ -2319,8 +2333,8 @@ def extract_meta(field):
  
          def _extract_count(count_name):
              return str_to_int(self._search_regex(
-                r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
-                % re.escape(count_name),
+                (r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>' % re.escape(count_name),
+                 r'["\']label["\']\s*:\s*["\']([\d,.]+)\s+%ss["\']' % re.escape(count_name)),
                  video_webpage, count_name, default=None))
  
          like_count = _extract_count('like')
@@ -2499,13 +2513,23 @@ def decrypt_sig(mobj):
  
  class YoutubeTabIE(YoutubeBaseInfoExtractor):
      IE_DESC = 'YouTube.com tab'
-    # (?x)^ will cause warning in LiveIE. So I cant split this into multiple lines using '''
-    _VALID_URL = (
-        r'https?://(?:\w+\.)?(?:youtube(?:kids)?\.com|invidio\.us)/'
-        r'(?:(?!(%s)([/#?]|$))|'
-        r'(?:channel|c|user)/|'
-        r'(?:playlist|watch)\?.*?\blist=)'
-        r'(?P<id>[^/?#&]+)') % YoutubeBaseInfoExtractor._RESERVED_NAMES
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:\w+\.)?
+                        (?:
+                            youtube(?:kids)?\.com|
+                            invidio\.us
+                        )/
+                        (?:
+                            (?:channel|c|user)/|
+                            (?P<not_channel>
+                                feed/|
+                                (?:playlist|watch)\?.*?\blist=
+                            )|
+                            (?!(%s)([/#?]|$))  # Direct URLs
+                        )
+                        (?P<id>[^/?\#&]+)
+                    ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
      IE_NAME = 'youtube:tab'
  
      _TESTS = [{
@@ -2613,13 +2637,13 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
          },
          'playlist_mincount': 138,
      }, {
-        'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
+        'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
          'only_matching': True,
      }, {
-        'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
+        'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
          'only_matching': True,
      }, {
-        'url': 'https://music.youtube.com/channel/UCT-K0qO8z6NzWrywqefBPBQ',
+        'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
          'only_matching': True,
      }, {
          'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
@@ -2666,7 +2690,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
          },
          'playlist_mincount': 11,
      }, {
-        'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
+        'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
          'only_matching': True,
      }, {
          # Playlist URL that does not actually serve a playlist
@@ -2698,14 +2722,82 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
      }, {
          'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
          'only_matching': True,
-    }]
-
-    @classmethod
-    def suitable(cls, url):
-        IGNORE = (YoutubeLiveIE,)
-        return (
-            False if any(ie.suitable(url) for ie in IGNORE)
-            else super(YoutubeTabIE, cls).suitable(url))
+    }, {
+        'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
+        'info_dict': {
+            'id': '9Auq9mYxFEE',
+            'ext': 'mp4',
+            'title': 'Watch Sky News live',
+            'uploader': 'Sky News',
+            'uploader_id': 'skynews',
+            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
+            'upload_date': '20191102',
+            'description': 'md5:78de4e1c2359d0ea3ed829678e38b662',
+            'categories': ['News & Politics'],
+            'tags': list,
+            'like_count': int,
+            'dislike_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://www.youtube.com/user/TheYoungTurks/live',
+        'info_dict': {
+            'id': 'a48o2S1cPoo',
+            'ext': 'mp4',
+            'title': 'The Young Turks - Live Main Show',
+            'uploader': 'The Young Turks',
+            'uploader_id': 'TheYoungTurks',
+            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
+            'upload_date': '20150715',
+            'license': 'Standard YouTube License',
+            'description': 'md5:438179573adcdff3c97ebb1ee632b891',
+            'categories': ['News & Politics'],
+            'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
+            'like_count': int,
+            'dislike_count': int,
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/feed/trending',
+        'only_matching': True,
+    }, {
+        # needs auth
+        'url': 'https://www.youtube.com/feed/library',
+        'only_matching': True,
+    }, {
+        # needs auth
+        'url': 'https://www.youtube.com/feed/history',
+        'only_matching': True,
+    }, {
+        # needs auth
+        'url': 'https://www.youtube.com/feed/subscriptions',
+        'only_matching': True,
+    }, {
+        # needs auth
+        'url': 'https://www.youtube.com/feed/watch_later',
+        'only_matching': True,
+    }, {
+        # no longer available?
+        'url': 'https://www.youtube.com/feed/recommended',
+        'only_matching': True,
+    }
+        # TODO
+        # {
+        #     'url': 'https://www.youtube.com/TheYoungTurks/live',
+        #     'only_matching': True,
+        # }
+    ]
  
      def _extract_channel_id(self, webpage):
          channel_id = self._html_search_meta(
@@ -2786,27 +2878,34 @@ def _grid_entries(self, grid_renderer):
                      'https://www.youtube.com/channel/%s' % channel_id,
                      ie=YoutubeTabIE.ie_key(), video_title=title)
  
-    def _shelf_entries_trimmed(self, shelf_renderer):
-        renderer = try_get(
-            shelf_renderer, lambda x: x['content']['horizontalListRenderer'], dict)
-        if not renderer:
+    def _shelf_entries_from_content(self, shelf_renderer):
+        content = shelf_renderer.get('content')
+        if not isinstance(content, dict):
              return
-        # TODO: add support for nested playlists so each shelf is processed
-        # as separate playlist
-        # TODO: this includes only first N items
-        for entry in self._grid_entries(renderer):
-            yield entry
+        renderer = content.get('gridRenderer')
+        if renderer:
+            # TODO: add support for nested playlists so each shelf is processed
+            # as separate playlist
+            # TODO: this includes only first N items
+            for entry in self._grid_entries(renderer):
+                yield entry
+        renderer = content.get('horizontalListRenderer')
+        if renderer:
+            # TODO
+            pass
  
      def _shelf_entries(self, shelf_renderer):
          ep = try_get(
              shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
              compat_str)
          shelf_url = urljoin('https://www.youtube.com', ep)
-        if not shelf_url:
-            return
-        title = try_get(
-            shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
-        yield self.url_result(shelf_url, video_title=title)
+        if shelf_url:
+            title = try_get(
+                shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
+            yield self.url_result(shelf_url, video_title=title)
+        # Shelf may not contain shelf URL, fallback to extraction from content
+        for entry in self._shelf_entries_from_content(shelf_renderer):
+            yield entry
  
      def _playlist_entries(self, video_list_renderer):
          for content in video_list_renderer['contents']:
@@ -2820,6 +2919,7 @@ def _playlist_entries(self, video_list_renderer):
                  continue
              yield self._extract_video(renderer)
  
+    r""" # Not needed in the new implementation
      def _itemSection_entries(self, item_sect_renderer):
          for content in item_sect_renderer['contents']:
              if not isinstance(content, dict):
@@ -2831,10 +2931,11 @@ def _itemSection_entries(self, item_sect_renderer):
              if not video_id:
                  continue
              yield self._extract_video(renderer)
+    """
  
      def _rich_entries(self, rich_grid_renderer):
          renderer = try_get(
-            rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict)
+            rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
          video_id = renderer.get('videoId')
          if not video_id:
              return
@@ -2932,14 +3033,14 @@ def _extract_continuation(cls, renderer):
  
      def _entries(self, tab, identity_token):
  
-        def extract_entries(parent_renderer):
-            slr_contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
-            for slr_content in slr_contents:
-                if not isinstance(slr_content, dict):
+        def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
+            contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
+            for content in contents:
+                if not isinstance(content, dict):
                      continue
-                is_renderer = try_get(slr_content, lambda x: x['itemSectionRenderer'], dict)
+                is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
                  if not is_renderer:
-                    renderer = slr_content.get('richItemRenderer')
+                    renderer = content.get('richItemRenderer')
                      if renderer:
                          for entry in self._rich_entries(renderer):
                              yield entry
@@ -2965,7 +3066,6 @@ def extract_entries(parent_renderer):
                      if renderer:
                          for entry in self._shelf_entries(renderer):
                              yield entry
-                        continuation_list[0] = self._extract_continuation(parent_renderer)
                          continue
                      renderer = isr_content.get('backstagePostThreadRenderer')
                      if renderer:
@@ -2978,19 +3078,19 @@ def extract_entries(parent_renderer):
                          entry = self._video_entry(renderer)
                          if entry:
                              yield entry
+
                  if not continuation_list[0]:
                      continuation_list[0] = self._extract_continuation(is_renderer)
-                if not continuation_list[0]:
-                    continuation_list[0] = self._extract_continuation(parent_renderer)
+
+            if not continuation_list[0]:
+                continuation_list[0] = self._extract_continuation(parent_renderer)
  
          continuation_list = [None]  # Python 2 doesnot support nonlocal
          parent_renderer = (
              try_get(tab, lambda x: x['sectionListRenderer'], dict)
              or try_get(tab, lambda x: x['richGridRenderer'], dict) or {})
-        if parent_renderer:
-            for entry in extract_entries(parent_renderer):
-                yield entry
-
+        for entry in extract_entries(parent_renderer):
+            yield entry
          continuation = continuation_list[0]
  
          headers = {
@@ -3003,8 +3103,6 @@ def extract_entries(parent_renderer):
          for page_num in itertools.count(1):
              if not continuation:
                  break
-            if hasattr(self, '_MAX_PAGES') and page_num > self._MAX_PAGES:
-                break
              browse = self._download_json(
                  'https://www.youtube.com/browse_ajax', None,
                  'Downloading page %d' % page_num,
@@ -3036,7 +3134,7 @@ def extract_entries(parent_renderer):
                          yield entry
                      continuation = self._extract_continuation(continuation_renderer)
                      continue
-                continuation_renderer = continuation_contents.get('sectionListContinuation')
+                continuation_renderer = continuation_contents.get('sectionListContinuation')  # for feeds
                  if continuation_renderer:
                      continuation_list = [None]
                      for entry in extract_entries(continuation_renderer):
@@ -3050,19 +3148,13 @@ def extract_entries(parent_renderer):
                  continuation_item = continuation_items[0]
                  if not isinstance(continuation_item, dict):
                      continue
-                renderer = continuation_item.get('playlistVideoRenderer')
+                renderer = continuation_item.get('playlistVideoRenderer') or continuation_item.get('itemSectionRenderer')
                  if renderer:
                      video_list_renderer = {'contents': continuation_items}
                      for entry in self._playlist_entries(video_list_renderer):
                          yield entry
                      continuation = self._extract_continuation(video_list_renderer)
                      continue
-                renderer = continuation_item.get('itemSectionRenderer')
-                if renderer:
-                    for entry in self._itemSection_entries(renderer):
-                        yield entry
-                    continuation = self._extract_continuation({'contents': continuation_items})
-                    continue
              break
  
      @staticmethod
@@ -3100,7 +3192,7 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
          selected_tab = self._extract_selected_tab(tabs)
          renderer = try_get(
              data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
-        playlist_id = None
+        playlist_id = title = description = None
          if renderer:
              channel_title = renderer.get('title') or item_id
              tab_title = selected_tab.get('title')
@@ -3116,7 +3208,9 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
              description = None
              playlist_id = item_id
          if playlist_id is None:
-            return None
+            playlist_id = item_id
+        if title is None:
+            title = "Youtube " + playlist_id.title()
          playlist = self.playlist_result(
              self._entries(selected_tab['content'], identity_token),
              playlist_id=playlist_id, playlist_title=title,
@@ -3132,24 +3226,57 @@ def _extract_from_playlist(self, item_id, data, playlist):
              self._playlist_entries(playlist), playlist_id=playlist_id,
              playlist_title=title)
  
+    def _extract_alerts(self, data):
+        for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
+            for renderer in alert_dict:
+                alert = alert_dict[renderer]
+                alert_type = alert.get('type')
+                if not alert_type:
+                    continue
+                message = try_get(alert, lambda x: x['text']['simpleText'], compat_str)
+                if message:
+                    yield alert_type, message
+                for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
+                    message = try_get(run, lambda x: x['text'], compat_str)
+                    if message:
+                        yield alert_type, message
+
      def _real_extract(self, url):
          item_id = self._match_id(url)
          url = compat_urlparse.urlunparse(
              compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
+        is_home = re.match(r'(?P<pre>%s)(?P<post>/?(?![^#?]).*$)' % self._VALID_URL, url)
+        if is_home is not None and is_home.group('not_channel') is None and item_id != 'feed':
+            self._downloader.report_warning(
+                'A channel/user page was given. All the channel\'s videos will be downloaded. '
+                'To download only the videos in the home page, add a "/home" to the URL')
+            url = '%s/videos%s' % (is_home.group('pre'), is_home.group('post') or '')
+
          # Handle both video/playlist URLs
          qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
          video_id = qs.get('v', [None])[0]
          playlist_id = qs.get('list', [None])[0]
+
+        if is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
+            if playlist_id:
+                self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
+                url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
+                # return self.url_result(playlist_id, ie=YoutubePlaylistIE.ie_key())
+            else:
+                raise ExtractorError('Unable to recognize tab page')
          if video_id and playlist_id:
              if self._downloader.params.get('noplaylist'):
                  self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
                  return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
              self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+
          webpage = self._download_webpage(url, item_id)
          identity_token = self._search_regex(
              r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
              'identity token', default=None)
          data = self._extract_yt_initial_data(item_id, webpage)
+        for alert_type, alert_message in self._extract_alerts(data):
+            self._downloader.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
          tabs = try_get(
              data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
          if tabs:
@@ -3158,7 +3285,11 @@ def _real_extract(self, url):
              data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
          if playlist:
              return self._extract_from_playlist(item_id, data, playlist)
-        # Fallback to video extraction if no playlist alike page is recognized
+        # Fallback to video extraction if no playlist alike page is recognized.
+        # First check for the current video then try the v attribute of URL query.
+        video_id = try_get(
+            data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
+            compat_str) or video_id
          if video_id:
              return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
          # Failed to recognize
@@ -3279,56 +3410,23 @@ def _real_extract(self, url):
              ie=YoutubeTabIE.ie_key(), video_id=user_id)
  
  
-class YoutubeLiveIE(YoutubeBaseInfoExtractor):
-    IE_DESC = 'YouTube.com live streams'
-    _VALID_URL = r'(?P<base_url>%s)/live' % YoutubeTabIE._VALID_URL
-    IE_NAME = 'youtube:live'
-
+class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
+    IE_NAME = 'youtube:favorites'
+    IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
+    _VALID_URL = r':ytfav(?:ou?rite)?s?'
+    _LOGIN_REQUIRED = True
      _TESTS = [{
-        'url': 'https://www.youtube.com/user/TheYoungTurks/live',
-        'info_dict': {
-            'id': 'a48o2S1cPoo',
-            'ext': 'mp4',
-            'title': 'The Young Turks - Live Main Show',
-            'uploader': 'The Young Turks',
-            'uploader_id': 'TheYoungTurks',
-            'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
-            'upload_date': '20150715',
-            'license': 'Standard YouTube License',
-            'description': 'md5:438179573adcdff3c97ebb1ee632b891',
-            'categories': ['News & Politics'],
-            'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
-            'like_count': int,
-            'dislike_count': int,
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }, {
-        'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
+        'url': ':ytfav',
          'only_matching': True,
      }, {
-        'url': 'https://www.youtube.com/TheYoungTurks/live',
+        'url': ':ytfavorites',
          'only_matching': True,
      }]
  
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        channel_id = mobj.group('id')
-        base_url = mobj.group('base_url')
-        webpage = self._download_webpage(url, channel_id, fatal=False)
-        if webpage:
-            page_type = self._og_search_property(
-                'type', webpage, 'page type', default='')
-            video_id = self._html_search_meta(
-                'videoId', webpage, 'video id', default=None)
-            if page_type.startswith('video') and video_id and re.match(
-                    r'^[0-9A-Za-z_-]{11}$', video_id):
-                return self.url_result(video_id, YoutubeIE.ie_key())
-        return self.url_result(base_url)
+        return self.url_result(
+            'https://www.youtube.com/playlist?list=LL',
+            ie=YoutubeTabIE.ie_key())
  
  
  class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
@@ -3371,10 +3469,33 @@ def _entries(self, query, n):
                  list)
              if not slr_contents:
                  break
-            isr_contents = try_get(
-                slr_contents,
-                lambda x: x[0]['itemSectionRenderer']['contents'],
-                list)
+
+            isr_contents = []
+            continuation_token = None
+            # Youtube sometimes adds promoted content to searches,
+            # changing the index location of videos and token.
+            # So we search through all entries till we find them.
+            for index, isr in enumerate(slr_contents):
+                if not isr_contents:
+                    isr_contents = try_get(
+                        slr_contents,
+                        (lambda x: x[index]['itemSectionRenderer']['contents']),
+                        list)
+                    for content in isr_contents:
+                        if content.get('videoRenderer') is not None:
+                            break
+                    else:
+                        isr_contents = []
+
+                if continuation_token is None:
+                    continuation_token = try_get(
+                        slr_contents,
+                        lambda x: x[index]['continuationItemRenderer']['continuationEndpoint']['continuationCommand'][
+                            'token'],
+                        compat_str)
+                if continuation_token is not None and isr_contents:
+                    break
+
              if not isr_contents:
                  break
              for content in isr_contents:
@@ -3408,13 +3529,9 @@ def _entries(self, query, n):
                  }
                  if total == n:
                      return
-            token = try_get(
-                slr_contents,
-                lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
-                compat_str)
-            if not token:
+            if not continuation_token:
                  break
-            data['continuation'] = token
+            data['continuation'] = continuation_token
  
      def _get_n_results(self, query, n):
          """Get a specified number of results for a query"""
@@ -3428,12 +3545,11 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
      _SEARCH_PARAMS = 'CAI%3D'
  
  
-class YoutubeSearchURLIE(InfoExtractor):
+class YoutubeSearchURLIE(YoutubeSearchIE):
      IE_DESC = 'YouTube.com search URLs'
-    IE_NAME = 'youtube:search_url'
-    _PARAM_REGEX = r''
-    _VALID_URL = r'https?://(?:www\.)?youtube\.com/results/?(?:\?|\?[^#]*?&)(?:sp=(?P<param1>[^&#]+)&(?:[^#]*&)?)?(?:q|search_query)=(?P<query>[^#&]+)(?:[^#]*?&sp=(?P<param2>[^#&]+))?'
-    _MAX_RESULTS = 100
+    IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
+    # _MAX_RESULTS = 100
      _TESTS = [{
          'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
          'playlist_mincount': 5,
@@ -3445,25 +3561,25 @@ class YoutubeSearchURLIE(InfoExtractor):
          'only_matching': True,
      }]
  
+    @classmethod
+    def _make_valid_url(cls):
+        return cls._VALID_URL
+
      def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        query = compat_urllib_parse_unquote_plus(mobj.group('query'))
-        IE = YoutubeSearchIE(self._downloader)
-        IE._SEARCH_PARAMS = mobj.group('param1') or mobj.group('param2')
-        self._downloader.to_screen(IE._SEARCH_PARAMS)
-        IE._MAX_RESULTS = self._MAX_RESULTS
-        return IE._get_n_results(query, self._MAX_RESULTS)
+        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        query = (qs.get('search_query') or qs.get('q'))[0]
+        self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
+        return self._get_n_results(query, self._MAX_RESULTS)
  
  
  class YoutubeFeedsInfoExtractor(YoutubeTabIE):
      """
      Base class for feed extractors
-    Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
+    Subclasses must define the _FEED_NAME property.
      """
      _LOGIN_REQUIRED = True
-    _TESTS = []
-    
      # _MAX_PAGES = 5
+    _TESTS = []
  
      @property
      def IE_NAME(self):
@@ -3472,85 +3588,63 @@ def IE_NAME(self):
      def _real_initialize(self):
          self._login()
  
-    def _shelf_entries(self, shelf_renderer):
-        renderer = try_get(shelf_renderer, lambda x: x['content']['gridRenderer'], dict)
-        if not renderer:
-            return
-        for entry in self._grid_entries(renderer):
-            yield entry
-
-    def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
-        selected_tab = self._extract_selected_tab(tabs)
-        return self.playlist_result(
-            self._entries(selected_tab['content'], identity_token),
-            playlist_title=self._PLAYLIST_TITLE)
-
      def _real_extract(self, url):
-        item_id = self._FEED_NAME
-        url = 'https://www.youtube.com/feed/%s' % self._FEED_NAME
-        webpage = self._download_webpage(url, item_id)
-        identity_token = self._search_regex(
-            r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
-            'identity token', default=None)
-        data = self._extract_yt_initial_data(item_id, webpage)
-        tabs = try_get(
-            data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
-        if tabs:
-            return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
-        # Failed to recognize
-        raise ExtractorError('Unable to recognize feed page')
-
+        return self.url_result(
+            'https://www.youtube.com/feed/%s' % self._FEED_NAME,
+            ie=YoutubeTabIE.ie_key())
  
-class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = 'Youtube watch later list, ":ytwatchlater" or "WL" for short (requires authentication)'
-    _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/watch_later|:ytwatchlater'
-    _FEED_NAME = 'watchlater'
  
+class YoutubeWatchLaterIE(InfoExtractor):
+    IE_NAME = 'youtube:watchlater'
+    IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
+    _VALID_URL = r':ytwatchlater'
      _TESTS = [{
-        'url': 'https://www.youtube.com/feed/watch_later',
-        'only_matching': True,
-    }, {
          'url': ':ytwatchlater',
          'only_matching': True,
      }]
  
      def _real_extract(self, url):
-        return self.url_result('WL', ie=YoutubePlaylistIE.ie_key())
-
-
-class YoutubeFavouritesIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = 'YouTube.com liked videos, ":ytfav" or "LL" for short (requires authentication)'
-    _VALID_URL = r':ytfav(?:ou?rite)s?'
-    _FEED_NAME = 'favourites'
-
-    _TESTS = [{
-        'url': ':ytfav',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        return self.url_result('LL', ie=YoutubePlaylistIE.ie_key())
+        return self.url_result(
+            'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
  
  
  class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
      IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
-    _VALID_URL = r'https?://(?:www\.)?youtube\.com(?:/feed/recommended|/?[?#]|/?$)|:ytrec(?:ommended)?'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
      _FEED_NAME = 'recommended'
-    _PLAYLIST_TITLE = 'Youtube Recommended videos'
+    _TESTS = [{
+        'url': ':ytrec',
+        'only_matching': True,
+    }, {
+        'url': ':ytrecommended',
+        'only_matching': True,
+    }, {
+        'url': 'https://youtube.com',
+        'only_matching': True,
+    }]
  
  
  class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
-    _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsub(?:scription)?s?'
+    IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
+    _VALID_URL = r':ytsub(?:scription)?s?'
      _FEED_NAME = 'subscriptions'
-    _PLAYLIST_TITLE = 'Youtube Subscriptions'
+    _TESTS = [{
+        'url': ':ytsubs',
+        'only_matching': True,
+    }, {
+        'url': ':ytsubscriptions',
+        'only_matching': True,
+    }]
  
  
  class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
      IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
-    _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
+    _VALID_URL = r':ythistory'
      _FEED_NAME = 'history'
-    _PLAYLIST_TITLE = 'Youtube History'
+    _TESTS = [{
+        'url': ':ythistory',
+        'only_matching': True,
+    }]
  
  
  class YoutubeTruncatedURLIE(InfoExtractor):