Merge pull request #187 from pukkandan/break-on-existing

[yt-dlp.git] / youtube_dlc / extractor / youtube.py
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py

index 8946b7df866cfc353c429702819d4251190a4d89..97cc793f9a20a2b864091f55007be2805aaa01f3 100644 (file)
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@@ -36,9 +36,11 @@
      get_element_by_attribute,
      get_element_by_id,
      int_or_none,
+    js_to_json,
      mimetype2ext,
      orderedSet,
      parse_codecs,
+    parse_count,
      parse_duration,
      remove_quotes,
      remove_start,
@@ -69,6 +71,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
      _LOGIN_REQUIRED = False
  
      _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
+    _INITIAL_DATA_RE = r'(?:window\["ytInitialData"\]|ytInitialData)\W?=\W?({.*?});'
+    _YTCFG_DATA_RE = r"ytcfg.set\(({.*?})\)"
  
      _YOUTUBE_CLIENT_HEADERS = {
          'x-youtube-client-name': '1',
@@ -99,6 +103,8 @@ def _login(self):
          if username is None:
              if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
                  raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+            if self._downloader.params.get('cookiefile') and False:  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
+                self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
              return True
  
          login_page = self._download_webpage(
@@ -271,11 +277,19 @@ def warn(message):
  
      def _download_webpage_handle(self, *args, **kwargs):
          query = kwargs.get('query', {}).copy()
-        query['disable_polymer'] = 'true'
          kwargs['query'] = query
          return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
              *args, **compat_kwargs(kwargs))
  
+    def _get_yt_initial_data(self, video_id, webpage):
+        config = self._search_regex(
+            (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
+             r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
+            webpage, 'ytInitialData', default=None)
+        if config:
+            return self._parse_json(
+                uppercase_escape(config), video_id, fatal=False)
+
      def _real_initialize(self):
          if self._downloader is None:
              return
@@ -285,15 +299,61 @@ def _real_initialize(self):
  
  
  class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
-    # Extract entries from page with "Load more" button
-    def _entries(self, page, playlist_id):
-        more_widget_html = content_html = page
-        for page_num in itertools.count(1):
-            for entry in self._process_page(content_html):
+
+    def _find_entries_in_json(self, extracted):
+        entries = []
+        c = {}
+
+        def _real_find(obj):
+            if obj is None or isinstance(obj, str):
+                return
+
+            if type(obj) is list:
+                for elem in obj:
+                    _real_find(elem)
+
+            if type(obj) is dict:
+                if self._is_entry(obj):
+                    entries.append(obj)
+                    return
+
+                if 'continuationCommand' in obj:
+                    c['continuation'] = obj
+                    return
+
+                for _, o in obj.items():
+                    _real_find(o)
+
+        _real_find(extracted)
+
+        return entries, try_get(c, lambda x: x["continuation"])
+
+    def _entries(self, page, playlist_id, max_pages=None):
+        seen = []
+
+        yt_conf = {}
+        for m in re.finditer(self._YTCFG_DATA_RE, page):
+            parsed = self._parse_json(m.group(1), playlist_id,
+                                      transform_source=js_to_json, fatal=False)
+            if parsed:
+                yt_conf.update(parsed)
+
+        data_json = self._parse_json(self._search_regex(self._INITIAL_DATA_RE, page, 'ytInitialData'), None)
+
+        for page_num in range(1, max_pages + 1) if max_pages is not None else itertools.count(1):
+            entries, continuation = self._find_entries_in_json(data_json)
+            processed = self._process_entries(entries, seen)
+
+            if not processed:
+                break
+            for entry in processed:
                  yield entry
  
-            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
-            if not mobj:
+            if not continuation or not yt_conf:
+                break
+            continuation_token = try_get(continuation, lambda x: x['continuationCommand']['token'])
+            continuation_url = try_get(continuation, lambda x: x['commandMetadata']['webCommandMetadata']['apiUrl'])
+            if not continuation_token or not continuation_url:
                  break
  
              count = 0
@@ -302,12 +362,23 @@ def _entries(self, page, playlist_id):
                  try:
                      # Downloading page may result in intermittent 5xx HTTP error
                      # that is usually worked around with a retry
-                    more = self._download_json(
-                        'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
-                        'Downloading page #%s%s'
-                        % (page_num, ' (retry #%d)' % count if count else ''),
+                    data_json = self._download_json(
+                        'https://www.youtube.com%s' % continuation_url,
+                        playlist_id,
+                        'Downloading continuation page #%s%s' % (page_num, ' (retry #%d)' % count if count else ''),
+
                          transform_source=uppercase_escape,
-                        headers=self._YOUTUBE_CLIENT_HEADERS)
+                        query={
+                            'key': try_get(yt_conf, lambda x: x['INNERTUBE_API_KEY'])
+                        },
+                        data=str(json.dumps({
+                            'context': try_get(yt_conf, lambda x: x['INNERTUBE_CONTEXT']),
+                            'continuation': continuation_token
+                        })).encode(encoding='UTF-8', errors='strict'),
+                        headers={
+                            'Content-Type': 'application/json'
+                        }
+                    )
                      break
                  except ExtractorError as e:
                      if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
@@ -316,31 +387,30 @@ def _entries(self, page, playlist_id):
                              continue
                      raise
  
-            content_html = more['content_html']
-            if not content_html.strip():
-                # Some webpages show a "Load more" button but they don't
-                # have more videos
-                break
-            more_widget_html = more['load_more_widget_html']
+    def _extract_title(self, renderer):
+        title = try_get(renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
+        if title:
+            return title
+        return try_get(renderer, lambda x: x['title']['simpleText'], compat_str)
  
  
  class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
-    def _process_page(self, content):
-        for video_id, video_title in self.extract_videos_from_page(content):
-            yield self.url_result(video_id, 'Youtube', video_id, video_title)
+    def _is_entry(self, obj):
+        return 'videoId' in obj
  
-    def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
-        for mobj in re.finditer(video_re, page):
-            # The link with index 0 is not the first video of the playlist (not sure if still actual)
-            if 'index' in mobj.groupdict() and mobj.group('id') == '0':
+    def _process_entries(self, entries, seen):
+        ids_in_page = []
+        titles_in_page = []
+        for renderer in entries:
+            video_id = try_get(renderer, lambda x: x['videoId'])
+            video_title = self._extract_title(renderer)
+
+            if video_id is None or video_title is None:
+                # we do not have a videoRenderer or title extraction broke
                  continue
-            video_id = mobj.group('id')
-            video_title = unescapeHTML(
-                mobj.group('title')) if 'title' in mobj.groupdict() else None
-            if video_title:
-                video_title = video_title.strip()
-            if video_title == '► Play all':
-                video_title = None
+
+            video_title = video_title.strip()
+
              try:
                  idx = ids_in_page.index(video_id)
                  if video_title and not titles_in_page[idx]:
@@ -349,19 +419,17 @@ def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_p
                  ids_in_page.append(video_id)
                  titles_in_page.append(video_title)
  
-    def extract_videos_from_page(self, page):
-        ids_in_page = []
-        titles_in_page = []
-        self.extract_videos_from_page_impl(
-            self._VIDEO_RE, page, ids_in_page, titles_in_page)
-        return zip(ids_in_page, titles_in_page)
+        for video_id, video_title in zip(ids_in_page, titles_in_page):
+            yield self.url_result(video_id, 'Youtube', video_id, video_title)
  
  
  class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
-    def _process_page(self, content):
-        for playlist_id in orderedSet(re.findall(
-                r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
-                content)):
+    def _is_entry(self, obj):
+        return 'playlistId' in obj
+
+    def _process_entries(self, entries, seen):
+        for playlist_id in orderedSet(try_get(r, lambda x: x['playlistId']) for r in entries):
+
              yield self.url_result(
                  'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
  
@@ -578,48 +646,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'end_time': 9,
              }
          },
-        {
-            'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
-            'note': 'Test generic use_cipher_signature video (#897)',
-            'info_dict': {
-                'id': 'UxxajLWwzqY',
-                'ext': 'mp4',
-                'upload_date': '20120506',
-                'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
-                'alt_title': 'I Love It (feat. Charli XCX)',
-                'description': 'md5:19a2f98d9032b9311e686ed039564f63',
-                'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
-                         'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
-                         'iconic ep', 'iconic', 'love', 'it'],
-                'duration': 180,
-                'uploader': 'Icona Pop',
-                'uploader_id': 'IconaPop',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
-                'creator': 'Icona Pop',
-                'track': 'I Love It (feat. Charli XCX)',
-                'artist': 'Icona Pop',
-            }
-        },
-        {
-            'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
-            'note': 'Test VEVO video with age protection (#956)',
-            'info_dict': {
-                'id': '07FYdnEawAQ',
-                'ext': 'mp4',
-                'upload_date': '20130703',
-                'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
-                'alt_title': 'Tunnel Vision',
-                'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
-                'duration': 419,
-                'uploader': 'justintimberlakeVEVO',
-                'uploader_id': 'justintimberlakeVEVO',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
-                'creator': 'Justin Timberlake',
-                'track': 'Tunnel Vision',
-                'artist': 'Justin Timberlake',
-                'age_limit': 18,
-            }
-        },
          {
              'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
              'note': 'Embed-only video (#1746)',
@@ -677,42 +703,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              },
              'skip': 'format 141 not served anymore',
          },
-        # DASH manifest with encrypted signature
-        {
-            'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
-            'info_dict': {
-                'id': 'IB3lcPjvWLA',
-                'ext': 'm4a',
-                'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
-                'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
-                'duration': 244,
-                'uploader': 'AfrojackVEVO',
-                'uploader_id': 'AfrojackVEVO',
-                'upload_date': '20131011',
-            },
-            'params': {
-                'youtube_include_dash_manifest': True,
-                'format': '141/bestaudio[ext=m4a]',
-            },
-        },
-        # JS player signature function name containing $
-        {
-            'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
-            'info_dict': {
-                'id': 'nfWlot6h_JM',
-                'ext': 'm4a',
-                'title': 'Taylor Swift - Shake It Off',
-                'description': 'md5:307195cd21ff7fa352270fe884570ef0',
-                'duration': 242,
-                'uploader': 'TaylorSwiftVEVO',
-                'uploader_id': 'TaylorSwiftVEVO',
-                'upload_date': '20140818',
-            },
-            'params': {
-                'youtube_include_dash_manifest': True,
-                'format': '141/bestaudio[ext=m4a]',
-            },
-        },
          # Controversy video
          {
              'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
@@ -728,7 +718,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
              }
          },
-        # Normal age-gate video (No vevo, embed allowed)
+        # Normal age-gate video (embed allowed)
          {
              'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
              'info_dict': {
@@ -744,43 +734,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'age_limit': 18,
              },
          },
-        # Age-gate video with encrypted signature
-        {
-            'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
-            'info_dict': {
-                'id': '6kLq3WMV1nU',
-                'ext': 'mp4',
-                'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
-                'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
-                'duration': 246,
-                'uploader': 'LloydVEVO',
-                'uploader_id': 'LloydVEVO',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
-                'upload_date': '20110629',
-                'age_limit': 18,
-            },
-        },
-        # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
-        # YouTube Red ad is not captured for creator
-        {
-            'url': '__2ABJjxzNo',
-            'info_dict': {
-                'id': '__2ABJjxzNo',
-                'ext': 'mp4',
-                'duration': 266,
-                'upload_date': '20100430',
-                'uploader_id': 'deadmau5',
-                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
-                'creator': 'Dada Life, deadmau5',
-                'description': 'md5:12c56784b8032162bb936a5f76d55360',
-                'uploader': 'deadmau5',
-                'title': 'Deadmau5 - Some Chords (HD)',
-                'alt_title': 'This Machine Kills Some Chords',
-            },
-            'expected_warnings': [
-                'DASH manifest missing',
-            ]
-        },
          # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
          {
              'url': 'lqQg6PlCWgI',
@@ -1502,6 +1455,7 @@ def _get_ytplayer_config(self, video_id, webpage):
              # https://github.com/ytdl-org/youtube-dl/pull/7599)
              r';ytplayer\.config\s*=\s*({.+?});ytplayer',
              r';ytplayer\.config\s*=\s*({.+?});',
+            r'ytInitialPlayerResponse\s*=\s*({.+?});var meta'
          )
          config = self._search_regex(
              patterns, webpage, 'ytplayer.config', default=None)
@@ -1509,14 +1463,43 @@ def _get_ytplayer_config(self, video_id, webpage):
              return self._parse_json(
                  uppercase_escape(config), video_id, fatal=False)
  
-    def _get_yt_initial_data(self, video_id, webpage):
-        config = self._search_regex(
-            (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
-             r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
-            webpage, 'ytInitialData', default=None)
-        if config:
-            return self._parse_json(
-                uppercase_escape(config), video_id, fatal=False)
+    def _get_music_metadata_from_yt_initial(self, yt_initial):
+        music_metadata = []
+        key_map = {
+            'Album': 'album',
+            'Artist': 'artist',
+            'Song': 'track'
+        }
+        contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
+        if type(contents) is list:
+            for content in contents:
+                music_track = {}
+                if type(content) is not dict:
+                    continue
+                videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
+                if type(videoSecondaryInfoRenderer) is not dict:
+                    continue
+                rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
+                if type(rows) is not list:
+                    continue
+                for row in rows:
+                    metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
+                    if type(metadataRowRenderer) is not dict:
+                        continue
+                    key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
+                    value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
+                        try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
+                    if type(key) is not str or type(value) is not str:
+                        continue
+                    if key in key_map:
+                        if key_map[key] in music_track:
+                            # we've started on a new track
+                            music_metadata.append(music_track)
+                            music_track = {}
+                        music_track[key_map[key]] = value
+                if len(music_track.keys()):
+                    music_metadata.append(music_track)
+        return music_metadata
  
      def _get_automatic_captions(self, video_id, webpage):
          """We need the webpage for getting the captions url, pass it as an
@@ -1528,10 +1511,11 @@ def _get_automatic_captions(self, video_id, webpage):
              self._downloader.report_warning(err_msg)
              return {}
          try:
-            args = player_config['args']
-            caption_url = args.get('ttsurl')
-            if caption_url:
+            if "args" in player_config and "ttsurl" in player_config["args"]:
+                args = player_config['args']
+                caption_url = args['ttsurl']
                  timestamp = args['timestamp']
+
                  # We get the available subtitles
                  list_params = compat_urllib_parse_urlencode({
                      'type': 'list',
@@ -1587,40 +1571,50 @@ def make_captions(sub_url, sub_langs):
                  return captions
  
              # New captions format as of 22.06.2017
-            player_response = args.get('player_response')
-            if player_response and isinstance(player_response, compat_str):
-                player_response = self._parse_json(
-                    player_response, video_id, fatal=False)
-                if player_response:
-                    renderer = player_response['captions']['playerCaptionsTracklistRenderer']
-                    caption_tracks = renderer['captionTracks']
-                    for caption_track in caption_tracks:
-                        if 'kind' not in caption_track:
-                            # not an automatic transcription
-                            continue
-                        base_url = caption_track['baseUrl']
-                        sub_lang_list = []
-                        for lang in renderer['translationLanguages']:
-                            lang_code = lang.get('languageCode')
-                            if lang_code:
-                                sub_lang_list.append(lang_code)
-                        return make_captions(base_url, sub_lang_list)
-
-                    self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
-                    return {}
-            # Some videos don't provide ttsurl but rather caption_tracks and
-            # caption_translation_languages (e.g. 20LmZk1hakA)
-            # Does not used anymore as of 22.06.2017
-            caption_tracks = args['caption_tracks']
-            caption_translation_languages = args['caption_translation_languages']
-            caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
-            sub_lang_list = []
-            for lang in caption_translation_languages.split(','):
-                lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
-                sub_lang = lang_qs.get('lc', [None])[0]
-                if sub_lang:
-                    sub_lang_list.append(sub_lang)
-            return make_captions(caption_url, sub_lang_list)
+            if "args" in player_config:
+                player_response = player_config["args"].get('player_response')
+            else:
+                # New player system (ytInitialPlayerResponse) as of October 2020
+                player_response = player_config
+
+            if player_response:
+                if isinstance(player_response, compat_str):
+                    player_response = self._parse_json(
+                        player_response, video_id, fatal=False)
+
+                renderer = player_response['captions']['playerCaptionsTracklistRenderer']
+                caption_tracks = renderer['captionTracks']
+                for caption_track in caption_tracks:
+                    if 'kind' not in caption_track:
+                        # not an automatic transcription
+                        continue
+                    base_url = caption_track['baseUrl']
+                    sub_lang_list = []
+                    for lang in renderer['translationLanguages']:
+                        lang_code = lang.get('languageCode')
+                        if lang_code:
+                            sub_lang_list.append(lang_code)
+                    return make_captions(base_url, sub_lang_list)
+
+                self._downloader.report_warning("Couldn't find automatic captions for %s" % video_id)
+                return {}
+
+            if "args" in player_config:
+                args = player_config["args"]
+
+                # Some videos don't provide ttsurl but rather caption_tracks and
+                # caption_translation_languages (e.g. 20LmZk1hakA)
+                # Does not used anymore as of 22.06.2017
+                caption_tracks = args['caption_tracks']
+                caption_translation_languages = args['caption_translation_languages']
+                caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
+                sub_lang_list = []
+                for lang in caption_translation_languages.split(','):
+                    lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
+                    sub_lang = lang_qs.get('lc', [None])[0]
+                    if sub_lang:
+                        sub_lang_list.append(sub_lang)
+                return make_captions(caption_url, sub_lang_list)
          # An extractor error can be raise by the download process if there are
          # no automatic captions but there are subtitles
          except (KeyError, IndexError, ExtractorError):
@@ -1854,6 +1848,13 @@ def extract_player_response(player_response, video_id):
                  add_dash_mpd_pr(pl_response)
                  return pl_response
  
+        def extract_embedded_config(embed_webpage, video_id):
+            embedded_config = self._search_regex(
+                r'setConfig\(({.*})\);',
+                embed_webpage, 'ytInitialData', default=None)
+            if embedded_config:
+                return embedded_config
+
          player_response = {}
  
          # Get video info
@@ -1867,15 +1868,30 @@ def extract_player_response(player_response, video_id):
              # this can be viewed without login into Youtube
              url = proto + '://www.youtube.com/embed/%s' % video_id
              embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
-            # check if video is only playable on youtube - if so it requires auth (cookies)
-            if re.search(r'player-unavailable">', embed_webpage) is not None:
+            ext = extract_embedded_config(embed_webpage, video_id)
+            # playabilityStatus = re.search(r'{\\\"status\\\":\\\"(?P<playabilityStatus>[^\"]+)\\\"', ext)
+            playable_in_embed = re.search(r'{\\\"playableInEmbed\\\":(?P<playableinEmbed>[^\,]+)', ext)
+            if not playable_in_embed:
+                self.to_screen('Could not determine whether playabale in embed for video %s' % video_id)
+                playable_in_embed = ''
+            else:
+                playable_in_embed = playable_in_embed.group('playableinEmbed')
+            # check if video is only playable on youtube in other words not playable in embed - if so it requires auth (cookies)
+            # if re.search(r'player-unavailable">', embed_webpage) is not None:
+            if playable_in_embed == 'false':
+                '''
+                # TODO apply this patch when Support for Python 2.6(!) and above drops
                  if ({'VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID'} <= cookie_keys
-                    or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
-                        age_gate = False
-                        # Try looking directly into the video webpage
-                        ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
-                        if ytplayer_config:
-                            args = ytplayer_config['args']
+                        or {'VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO'} <= cookie_keys):
+                '''
+                if (set(('VISITOR_INFO1_LIVE', 'HSID', 'SSID', 'SID')) <= set(cookie_keys)
+                        or set(('VISITOR_INFO1_LIVE', '__Secure-3PSID', 'LOGIN_INFO')) <= set(cookie_keys)):
+                    age_gate = False
+                    # Try looking directly into the video webpage
+                    ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
+                    if ytplayer_config:
+                        args = ytplayer_config.get("args")
+                        if args is not None:
                              if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
                                  # Convert to the same format returned by compat_parse_qs
                                  video_info = dict((k, [v]) for k, v in args.items())
@@ -1890,8 +1906,10 @@ def extract_player_response(player_response, video_id):
                                  is_live = True
                              if not player_response:
                                  player_response = extract_player_response(args.get('player_response'), video_id)
-                        if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
-                            add_dash_mpd_pr(player_response)
+                        elif not player_response:
+                            player_response = ytplayer_config
+                    if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
+                        add_dash_mpd_pr(player_response)
                  else:
                      raise ExtractorError('Video is age restricted and only playable on Youtube. Requires cookies!', expected=True)
              else:
@@ -1919,8 +1937,8 @@ def extract_player_response(player_response, video_id):
              age_gate = False
              # Try looking directly into the video webpage
              ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
-            if ytplayer_config:
-                args = ytplayer_config['args']
+            args = ytplayer_config.get("args")
+            if args is not None:
                  if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
                      # Convert to the same format returned by compat_parse_qs
                      video_info = dict((k, [v]) for k, v in args.items())
@@ -1935,6 +1953,8 @@ def extract_player_response(player_response, video_id):
                      is_live = True
                  if not player_response:
                      player_response = extract_player_response(args.get('player_response'), video_id)
+            elif not player_response:
+                player_response = ytplayer_config
              if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
                  add_dash_mpd_pr(player_response)
  
@@ -2142,7 +2162,7 @@ def _extract_filesize(media_url):
  
                  if cipher:
                      if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
-                        ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
+                        ASSETS_RE = r'(?:"assets":.+?"js":\s*("[^"]+"))|(?:"jsUrl":\s*("[^"]+"))'
                          jsplayer_url_json = self._search_regex(
                              ASSETS_RE,
                              embed_webpage if age_gate else video_webpage,
@@ -2419,6 +2439,14 @@ def extract_meta(field):
                  if release_year:
                      release_year = int(release_year)
  
+        yt_initial = self._get_yt_initial_data(video_id, video_webpage)
+        if yt_initial:
+            music_metadata = self._get_music_metadata_from_yt_initial(yt_initial)
+            if len(music_metadata):
+                album = music_metadata[0].get('album')
+                artist = music_metadata[0].get('artist')
+                track = music_metadata[0].get('track')
+
          m_episode = re.search(
              r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
              video_webpage)
@@ -2450,7 +2478,7 @@ def extract_meta(field):
  
          def _extract_count(count_name):
              return str_to_int(self._search_regex(
-                r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
+                r'"accessibilityData":\{"label":"([\d,\w]+) %ss"\}'
                  % re.escape(count_name),
                  video_webpage, count_name, default=None))
  
@@ -2479,6 +2507,14 @@ def _extract_count(count_name):
              video_duration = parse_duration(self._html_search_meta(
                  'duration', video_webpage, 'video duration'))
  
+        # Get Subscriber Count of channel
+        subscriber_count = parse_count(self._search_regex(
+            r'"text":"([\d\.]+\w?) subscribers"',
+            video_webpage,
+            'subscriber count',
+            default=None
+        ))
+
          # annotations
          video_annotations = None
          if self._downloader.params.get('writeannotations', False):
@@ -2616,6 +2652,7 @@ def decrypt_sig(mobj):
              'album': album,
              'release_date': release_date,
              'release_year': release_year,
+            'subscriber_count': subscriber_count,
          }
  
  
@@ -2650,6 +2687,12 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
      _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
      _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
      IE_NAME = 'youtube:playlist'
+    _YTM_PLAYLIST_PREFIX = 'RDCLAK5uy_'
+    _YTM_CHANNEL_INFO = {
+        'uploader': 'Youtube Music',
+        'uploader_id': 'music',  # or "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
+        'uploader_url': 'https://www.youtube.com/music'
+    }
      _TESTS = [{
          'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
          'info_dict': {
@@ -2847,10 +2890,21 @@ def extract_videos_from_page(self, page):
  
          return zip(ids_in_page, titles_in_page)
  
+    def _extract_mix_ids_from_yt_initial(self, yt_initial):
+        ids = []
+        playlist_contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['contents'], list)
+        if playlist_contents:
+            for item in playlist_contents:
+                videoId = try_get(item, lambda x: x['playlistPanelVideoRenderer']['videoId'], compat_str)
+                if videoId:
+                    ids.append(videoId)
+        return ids
+
      def _extract_mix(self, playlist_id):
          # The mixes are generated from a single video
          # the id of the playlist is just 'RD' + video_id
          ids = []
+        yt_initial = None
          last_id = playlist_id[-11:]
          for n in itertools.count(1):
              url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
@@ -2860,6 +2914,13 @@ def _extract_mix(self, playlist_id):
                  r'''(?xs)data-video-username=".*?".*?
                             href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
                  webpage))
+
+            # if no ids in html of page, try using embedded json
+            if (len(new_ids) == 0):
+                yt_initial = self._get_yt_initial_data(playlist_id, webpage)
+                if yt_initial:
+                    new_ids = self._extract_mix_ids_from_yt_initial(yt_initial)
+
              # Fetch new pages until all the videos are repeated, it seems that
              # there are always 51 unique videos.
              new_ids = [_id for _id in new_ids if _id not in ids]
@@ -2877,6 +2938,9 @@ def _extract_mix(self, playlist_id):
              or search_title('title'))
          title = clean_html(title_span)
  
+        if not title:
+            title = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist']['title'], compat_str)
+
          return self.playlist_result(url_results, playlist_id, title)
  
      def _extract_playlist(self, playlist_id):
@@ -2938,6 +3002,8 @@ def _extract_playlist(self, playlist_id):
              'uploader_id': uploader_id,
              'uploader_url': uploader_url,
          })
+        if playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
+            playlist.update(self._YTM_CHANNEL_INFO)
  
          return has_videos, playlist
  
@@ -2968,8 +3034,10 @@ def _real_extract(self, url):
              return video
  
          if playlist_id.startswith(('RD', 'UL', 'PU')):
-            # Mixes require a custom extraction process
-            return self._extract_mix(playlist_id)
+            if not playlist_id.startswith(self._YTM_PLAYLIST_PREFIX):
+                # Mixes require a custom extraction process,
+                # Youtube Music playlists act like normal playlists (with randomized order)
+                return self._extract_mix(playlist_id)
  
          has_videos, playlist = self._extract_playlist(playlist_id)
          if has_videos or not video_id:
@@ -3228,68 +3296,104 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
      }]
  
  
-class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
-    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
-
-
-class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
+class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistBaseInfoExtractor):
      IE_DESC = 'YouTube.com searches'
      # there doesn't appear to be a real limit, for example if you search for
      # 'python' you get more than 8.000.000 results
      _MAX_RESULTS = float('inf')
      IE_NAME = 'youtube:search'
      _SEARCH_KEY = 'ytsearch'
-    _EXTRA_QUERY_ARGS = {}
+    _SEARCH_PARAMS = None
      _TESTS = []
  
-    def _get_n_results(self, query, n):
-        """Get a specified number of results for a query"""
-
-        videos = []
-        limit = n
-
-        url_query = {
-            'search_query': query.encode('utf-8'),
+    def _entries(self, query, n):
+        data = {
+            'context': {
+                'client': {
+                    'clientName': 'WEB',
+                    'clientVersion': '2.20201021.03.00',
+                }
+            },
+            'query': query,
          }
-        url_query.update(self._EXTRA_QUERY_ARGS)
-        result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
-
-        for pagenum in itertools.count(1):
-            data = self._download_json(
-                result_url, video_id='query "%s"' % query,
-                note='Downloading page %s' % pagenum,
-                errnote='Unable to download API page',
-                query={'spf': 'navigate'})
-            html_content = data[1]['body']['content']
-
-            if 'class="search-message' in html_content:
-                raise ExtractorError(
-                    '[youtube] No video results', expected=True)
-
-            new_videos = list(self._process_page(html_content))
-            videos += new_videos
-            if not new_videos or len(videos) > limit:
+        if self._SEARCH_PARAMS:
+            data['params'] = self._SEARCH_PARAMS
+        total = 0
+        for page_num in itertools.count(1):
+            search = self._download_json(
+                'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+                video_id='query "%s"' % query,
+                note='Downloading page %s' % page_num,
+                errnote='Unable to download API page', fatal=False,
+                data=json.dumps(data).encode('utf8'),
+                headers={'content-type': 'application/json'})
+            if not search:
                  break
-            next_link = self._html_search_regex(
-                r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
-                html_content, 'next link', default=None)
-            if next_link is None:
+            slr_contents = try_get(
+                search,
+                (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
+                 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
+                list)
+            if not slr_contents:
                  break
-            result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
+            isr_contents = try_get(
+                slr_contents,
+                lambda x: x[0]['itemSectionRenderer']['contents'],
+                list)
+            if not isr_contents:
+                break
+            for content in isr_contents:
+                if not isinstance(content, dict):
+                    continue
+                video = content.get('videoRenderer')
+                if not isinstance(video, dict):
+                    continue
+                video_id = video.get('videoId')
+                if not video_id:
+                    continue
+                title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
+                description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
+                duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
+                view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
+                view_count = int_or_none(self._search_regex(
+                    r'^(\d+)', re.sub(r'\s', '', view_count_text),
+                    'view count', default=None))
+                uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
+                total += 1
+                yield {
+                    '_type': 'url_transparent',
+                    'ie_key': YoutubeIE.ie_key(),
+                    'id': video_id,
+                    'url': video_id,
+                    'title': title,
+                    'description': description,
+                    'duration': duration,
+                    'view_count': view_count,
+                    'uploader': uploader,
+                }
+                if total == n:
+                    return
+            token = try_get(
+                slr_contents,
+                lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
+                compat_str)
+            if not token:
+                break
+            data['continuation'] = token
  
-        if len(videos) > n:
-            videos = videos[:n]
-        return self.playlist_result(videos, query)
+    def _get_n_results(self, query, n):
+        """Get a specified number of results for a query"""
+        return self.playlist_result(self._entries(query, n), query)
  
  
  class YoutubeSearchDateIE(YoutubeSearchIE):
      IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
      _SEARCH_KEY = 'ytsearchdate'
      IE_DESC = 'YouTube.com searches, newest videos first'
-    _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
+    _SEARCH_PARAMS = 'CAI%3D'
  
  
-class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
+class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor):
      IE_DESC = 'YouTube.com search URLs'
      IE_NAME = 'youtube:search_url'
      _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
@@ -3304,11 +3408,20 @@ class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
          'only_matching': True,
      }]
  
+    def _process_json_dict(self, obj, videos, c):
+        if "videoId" in obj:
+            videos.append(obj)
+            return
+
+        if "nextContinuationData" in obj:
+            c["continuation"] = obj["nextContinuationData"]
+            return
+
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          query = compat_urllib_parse_unquote_plus(mobj.group('query'))
          webpage = self._download_webpage(url, query)
-        return self.playlist_result(self._process_page(webpage), playlist_title=query)
+        return self.playlist_result(self._entries(webpage, query, max_pages=5), playlist_title=query)
  
  
  class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
@@ -3330,7 +3443,7 @@ def _real_extract(self, url):
              'https://www.youtube.com/show/%s/playlists' % playlist_id)
  
  
-class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
+class YoutubeFeedsInfoExtractor(YoutubePlaylistBaseInfoExtractor):
      """
      Base class for feed extractors
      Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
@@ -3344,44 +3457,35 @@ def IE_NAME(self):
      def _real_initialize(self):
          self._login()
  
-    def _entries(self, page):
-        # The extraction process is the same as for playlists, but the regex
-        # for the video ids doesn't contain an index
-        ids = []
-        more_widget_html = content_html = page
-        for page_num in itertools.count(1):
-            matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
-
-            # 'recommended' feed has infinite 'load more' and each new portion spins
-            # the same videos in (sometimes) slightly different order, so we'll check
-            # for unicity and break when portion has no new videos
-            new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
-            if not new_ids:
-                break
+    def _process_entries(self, entries, seen):
+        new_info = []
+        for v in entries:
+            v_id = try_get(v, lambda x: x['videoId'])
+            if not v_id:
+                continue
  
-            ids.extend(new_ids)
+            have_video = False
+            for old in seen:
+                if old['videoId'] == v_id:
+                    have_video = True
+                    break
  
-            for entry in self._ids_to_results(new_ids):
-                yield entry
+            if not have_video:
+                new_info.append(v)
  
-            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
-            if not mobj:
-                break
+        if not new_info:
+            return
  
-            more = self._download_json(
-                'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
-                'Downloading page #%s' % page_num,
-                transform_source=uppercase_escape,
-                headers=self._YOUTUBE_CLIENT_HEADERS)
-            content_html = more['content_html']
-            more_widget_html = more['load_more_widget_html']
+        seen.extend(new_info)
+        for video in new_info:
+            yield self.url_result(try_get(video, lambda x: x['videoId']), YoutubeIE.ie_key(), video_title=self._extract_title(video))
  
      def _real_extract(self, url):
          page = self._download_webpage(
              'https://www.youtube.com/feed/%s' % self._FEED_NAME,
              self._PLAYLIST_TITLE)
-        return self.playlist_result(
-            self._entries(page), playlist_title=self._PLAYLIST_TITLE)
+        return self.playlist_result(self._entries(page, self._PLAYLIST_TITLE),
+                                    playlist_title=self._PLAYLIST_TITLE)
  
  
  class YoutubeWatchLaterIE(YoutubePlaylistIE):