yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bool_or_none,
  32     bytes_to_intlist,
  33     clean_html,
  34     dict_get,
  35     datetime_from_str,
  36     error_to_compat_str,
  37     ExtractorError,
  38     format_field,
  39     float_or_none,
  40     int_or_none,
  41     intlist_to_bytes,
  42     mimetype2ext,
  43     parse_codecs,
  44     parse_count,
  45     parse_duration,
  46     qualities,
  47     remove_start,
  48     smuggle_url,
  49     str_or_none,
  50     str_to_int,
  51     traverse_obj,
  52     try_get,
  53     unescapeHTML,
  54     unified_strdate,
  55     unsmuggle_url,
  56     update_url_query,
  57     url_or_none,
  58     urlencode_postdata,
  59     urljoin,
  60     variadic,
  61 )
  62
  63
  64 def parse_qs(url):
  65     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  66
  67
  68 class YoutubeBaseInfoExtractor(InfoExtractor):
  69     """Provide base functions for Youtube extractors"""
  70     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  71     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  72
  73     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  74     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  75     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  76
  77     _RESERVED_NAMES = (
  78         r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
  79         r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
  80         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
  81
  82     _NETRC_MACHINE = 'youtube'
  83     # If True it will raise an error if no login info is provided
  84     _LOGIN_REQUIRED = False
  85
  86     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  87
  88     def _login(self):
  89         """
  90         Attempt to log in to YouTube.
  91         True is returned if successful or skipped.
  92         False is returned if login failed.
  93
  94         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  95         """
  96
  97         def warn(message):
  98             self.report_warning(message)
  99
 100         # username+password login is broken
 101         if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 102             self.raise_login_required(
 103                 'Login details are needed to download this content', method='cookies')
 104         username, password = self._get_login_info()
 105         if username:
 106             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 107         return
 108
 109         # Everything below this is broken!
 110         r'''
 111         # No authentication to be performed
 112         if username is None:
 113             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 114                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 115             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 116             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 117             return True
 118
 119         login_page = self._download_webpage(
 120             self._LOGIN_URL, None,
 121             note='Downloading login page',
 122             errnote='unable to fetch login page', fatal=False)
 123         if login_page is False:
 124             return
 125
 126         login_form = self._hidden_inputs(login_page)
 127
 128         def req(url, f_req, note, errnote):
 129             data = login_form.copy()
 130             data.update({
 131                 'pstMsg': 1,
 132                 'checkConnection': 'youtube',
 133                 'checkedDomains': 'youtube',
 134                 'hl': 'en',
 135                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 136                 'f.req': json.dumps(f_req),
 137                 'flowName': 'GlifWebSignIn',
 138                 'flowEntry': 'ServiceLogin',
 139                 # TODO: reverse actual botguard identifier generation algo
 140                 'bgRequest': '["identifier",""]',
 141             })
 142             return self._download_json(
 143                 url, None, note=note, errnote=errnote,
 144                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 145                 fatal=False,
 146                 data=urlencode_postdata(data), headers={
 147                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 148                     'Google-Accounts-XSRF': 1,
 149                 })
 150
 151         lookup_req = [
 152             username,
 153             None, [], None, 'US', None, None, 2, False, True,
 154             [
 155                 None, None,
 156                 [2, 1, None, 1,
 157                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 158                  None, [], 4],
 159                 1, [None, None, []], None, None, None, True
 160             ],
 161             username,
 162         ]
 163
 164         lookup_results = req(
 165             self._LOOKUP_URL, lookup_req,
 166             'Looking up account info', 'Unable to look up account info')
 167
 168         if lookup_results is False:
 169             return False
 170
 171         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 172         if not user_hash:
 173             warn('Unable to extract user hash')
 174             return False
 175
 176         challenge_req = [
 177             user_hash,
 178             None, 1, None, [1, None, None, None, [password, None, True]],
 179             [
 180                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 181                 1, [None, None, []], None, None, None, True
 182             ]]
 183
 184         challenge_results = req(
 185             self._CHALLENGE_URL, challenge_req,
 186             'Logging in', 'Unable to log in')
 187
 188         if challenge_results is False:
 189             return
 190
 191         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 192         if login_res:
 193             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 194             warn(
 195                 'Unable to login: %s' % 'Invalid password'
 196                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 197             return False
 198
 199         res = try_get(challenge_results, lambda x: x[0][-1], list)
 200         if not res:
 201             warn('Unable to extract result entry')
 202             return False
 203
 204         login_challenge = try_get(res, lambda x: x[0][0], list)
 205         if login_challenge:
 206             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 207             if challenge_str == 'TWO_STEP_VERIFICATION':
 208                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 209                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 210                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 211                 if status == 'QUOTA_EXCEEDED':
 212                     warn('Exceeded the limit of TFA codes, try later')
 213                     return False
 214
 215                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 216                 if not tl:
 217                     warn('Unable to extract TL')
 218                     return False
 219
 220                 tfa_code = self._get_tfa_info('2-step verification code')
 221
 222                 if not tfa_code:
 223                     warn(
 224                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 225                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 226                     return False
 227
 228                 tfa_code = remove_start(tfa_code, 'G-')
 229
 230                 tfa_req = [
 231                     user_hash, None, 2, None,
 232                     [
 233                         9, None, None, None, None, None, None, None,
 234                         [None, tfa_code, True, 2]
 235                     ]]
 236
 237                 tfa_results = req(
 238                     self._TFA_URL.format(tl), tfa_req,
 239                     'Submitting TFA code', 'Unable to submit TFA code')
 240
 241                 if tfa_results is False:
 242                     return False
 243
 244                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 245                 if tfa_res:
 246                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 247                     warn(
 248                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 249                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 250                     return False
 251
 252                 check_cookie_url = try_get(
 253                     tfa_results, lambda x: x[0][-1][2], compat_str)
 254             else:
 255                 CHALLENGES = {
 256                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 257                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 258                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 259                 }
 260                 challenge = CHALLENGES.get(
 261                     challenge_str,
 262                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 263                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 264                 return False
 265         else:
 266             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 267
 268         if not check_cookie_url:
 269             warn('Unable to extract CheckCookie URL')
 270             return False
 271
 272         check_cookie_results = self._download_webpage(
 273             check_cookie_url, None, 'Checking cookie', fatal=False)
 274
 275         if check_cookie_results is False:
 276             return False
 277
 278         if 'https://myaccount.google.com/' not in check_cookie_results:
 279             warn('Unable to log in')
 280             return False
 281
 282         return True
 283         '''
 284
 285     def _initialize_consent(self):
 286         cookies = self._get_cookies('https://www.youtube.com/')
 287         if cookies.get('__Secure-3PSID'):
 288             return
 289         consent_id = None
 290         consent = cookies.get('CONSENT')
 291         if consent:
 292             if 'YES' in consent.value:
 293                 return
 294             consent_id = self._search_regex(
 295                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 296         if not consent_id:
 297             consent_id = random.randint(100, 999)
 298         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 299
 300     def _real_initialize(self):
 301         self._initialize_consent()
 302         if self._downloader is None:
 303             return
 304         if not self._login():
 305             return
 306
 307     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 308     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 309     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 310
 311     _YT_DEFAULT_YTCFGS = {
 312         'WEB': {
 313             'INNERTUBE_API_VERSION': 'v1',
 314             'INNERTUBE_CLIENT_NAME': 'WEB',
 315             'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
 316             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 317             'INNERTUBE_CONTEXT': {
 318                 'client': {
 319                     'clientName': 'WEB',
 320                     'clientVersion': '2.20210622.10.00',
 321                     'hl': 'en',
 322                 }
 323             },
 324             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
 325         },
 326         'WEB_REMIX': {
 327             'INNERTUBE_API_VERSION': 'v1',
 328             'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
 329             'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
 330             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 331             'INNERTUBE_CONTEXT': {
 332                 'client': {
 333                     'clientName': 'WEB_REMIX',
 334                     'clientVersion': '1.20210621.00.00',
 335                     'hl': 'en',
 336                 }
 337             },
 338             'INNERTUBE_CONTEXT_CLIENT_NAME': 67
 339         },
 340         'WEB_EMBEDDED_PLAYER': {
 341             'INNERTUBE_API_VERSION': 'v1',
 342             'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
 343             'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
 344             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 345             'INNERTUBE_CONTEXT': {
 346                 'client': {
 347                     'clientName': 'WEB_EMBEDDED_PLAYER',
 348                     'clientVersion': '1.20210620.0.1',
 349                     'hl': 'en',
 350                 }
 351             },
 352             'INNERTUBE_CONTEXT_CLIENT_NAME': 56
 353         },
 354         'ANDROID': {
 355             'INNERTUBE_API_VERSION': 'v1',
 356             'INNERTUBE_CLIENT_NAME': 'ANDROID',
 357             'INNERTUBE_CLIENT_VERSION': '16.20',
 358             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 359             'INNERTUBE_CONTEXT': {
 360                 'client': {
 361                     'clientName': 'ANDROID',
 362                     'clientVersion': '16.20',
 363                     'hl': 'en',
 364                 }
 365             },
 366             'INNERTUBE_CONTEXT_CLIENT_NAME': 3
 367         },
 368         'ANDROID_EMBEDDED_PLAYER': {
 369             'INNERTUBE_API_VERSION': 'v1',
 370             'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
 371             'INNERTUBE_CLIENT_VERSION': '16.20',
 372             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 373             'INNERTUBE_CONTEXT': {
 374                 'client': {
 375                     'clientName': 'ANDROID_EMBEDDED_PLAYER',
 376                     'clientVersion': '16.20',
 377                     'hl': 'en',
 378                 }
 379             },
 380             'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 381         },
 382         'ANDROID_MUSIC': {
 383             'INNERTUBE_API_VERSION': 'v1',
 384             'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
 385             'INNERTUBE_CLIENT_VERSION': '4.32',
 386             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 387             'INNERTUBE_CONTEXT': {
 388                 'client': {
 389                     'clientName': 'ANDROID_MUSIC',
 390                     'clientVersion': '4.32',
 391                     'hl': 'en',
 392                 }
 393             },
 394             'INNERTUBE_CONTEXT_CLIENT_NAME': 21
 395         }
 396     }
 397
 398     _YT_DEFAULT_INNERTUBE_HOSTS = {
 399         'DIRECT': 'youtubei.googleapis.com',
 400         'WEB': 'www.youtube.com',
 401         'WEB_REMIX': 'music.youtube.com',
 402         'ANDROID_MUSIC': 'music.youtube.com'
 403     }
 404
 405     def _get_default_ytcfg(self, client='WEB'):
 406         if client in self._YT_DEFAULT_YTCFGS:
 407             return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
 408         self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
 409         return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
 410
 411     def _get_innertube_host(self, client='WEB'):
 412         return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
 413
 414     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
 415         # try_get but with fallback to default ytcfg client values when present
 416         _func = lambda y: try_get(y, getter, expected_type)
 417         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 418
 419     def _extract_client_name(self, ytcfg, default_client='WEB'):
 420         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
 421
 422     @staticmethod
 423     def _extract_session_index(ytcfg):
 424         return int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 425
 426     def _extract_client_version(self, ytcfg, default_client='WEB'):
 427         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
 428
 429     def _extract_api_key(self, ytcfg=None, default_client='WEB'):
 430         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 431
 432     def _extract_context(self, ytcfg=None, default_client='WEB'):
 433         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 434         context = _get_context(ytcfg)
 435         if context:
 436             return context
 437
 438         context = _get_context(self._get_default_ytcfg(default_client))
 439         if not ytcfg:
 440             return context
 441
 442         # Recreate the client context (required)
 443         context['client'].update({
 444             'clientVersion': self._extract_client_version(ytcfg, default_client),
 445             'clientName': self._extract_client_name(ytcfg, default_client),
 446         })
 447         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 448         if visitor_data:
 449             context['client']['visitorData'] = visitor_data
 450         return context
 451
 452     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 453         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 454         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 455         yt_cookies = self._get_cookies('https://www.youtube.com')
 456         sapisid_cookie = dict_get(
 457             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 458         if sapisid_cookie is None:
 459             return
 460         time_now = round(time.time())
 461         # SAPISID cookie is required if not already present
 462         if not yt_cookies.get('SAPISID'):
 463             self._set_cookie(
 464                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 465         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 466         sapisidhash = hashlib.sha1(
 467             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 468         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 469
 470     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 471                   note='Downloading API JSON', errnote='Unable to download API page',
 472                   context=None, api_key=None, api_hostname=None, default_client='WEB'):
 473
 474         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 475         data.update(query)
 476         real_headers = self._generate_api_headers(client=default_client)
 477         real_headers.update({'content-type': 'application/json'})
 478         if headers:
 479             real_headers.update(headers)
 480         return self._download_json(
 481             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 482             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 483             data=json.dumps(data).encode('utf8'), headers=real_headers,
 484             query={'key': api_key or self._extract_api_key()})
 485
 486     def _extract_yt_initial_data(self, video_id, webpage):
 487         return self._parse_json(
 488             self._search_regex(
 489                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 490                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 491             video_id)
 492
 493     def _extract_identity_token(self, webpage, item_id):
 494         ytcfg = self._extract_ytcfg(item_id, webpage)
 495         if ytcfg:
 496             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 497             if token:
 498                 return token
 499         return self._search_regex(
 500             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 501             'identity token', default=None)
 502
 503     @staticmethod
 504     def _extract_account_syncid(*args):
 505         """
 506         Extract syncId required to download private playlists of secondary channels
 507         @params response and/or ytcfg
 508         """
 509         for data in args:
 510             # ytcfg includes channel_syncid if on secondary channel
 511             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 512             if delegated_sid:
 513                 return delegated_sid
 514             sync_ids = (try_get(
 515                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 516                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 517             if len(sync_ids) >= 2 and sync_ids[1]:
 518                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 519                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 520                 return sync_ids[0]
 521
 522     def _extract_ytcfg(self, video_id, webpage):
 523         if not webpage:
 524             return {}
 525         return self._parse_json(
 526             self._search_regex(
 527                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 528                 default='{}'), video_id, fatal=False) or {}
 529
 530     def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
 531                               visitor_data=None, api_hostname=None, client='WEB', session_index=None):
 532         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
 533         headers = {
 534             'X-YouTube-Client-Name': compat_str(
 535                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
 536             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
 537             'Origin': origin
 538         }
 539         if not visitor_data and ytcfg:
 540             visitor_data = try_get(
 541                 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
 542         if identity_token:
 543             headers['X-Youtube-Identity-Token'] = identity_token
 544         if account_syncid:
 545             headers['X-Goog-PageId'] = account_syncid
 546         if session_index is None and ytcfg:
 547             session_index = self._extract_session_index(ytcfg)
 548         if account_syncid or session_index is not None:
 549             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 550         if visitor_data:
 551             headers['X-Goog-Visitor-Id'] = visitor_data
 552         auth = self._generate_sapisidhash_header(origin)
 553         if auth is not None:
 554             headers['Authorization'] = auth
 555             headers['X-Origin'] = origin
 556         return headers
 557
 558     @staticmethod
 559     def _build_api_continuation_query(continuation, ctp=None):
 560         query = {
 561             'continuation': continuation
 562         }
 563         # TODO: Inconsistency with clickTrackingParams.
 564         # Currently we have a fixed ctp contained within context (from ytcfg)
 565         # and a ctp in root query for continuation.
 566         if ctp:
 567             query['clickTracking'] = {'clickTrackingParams': ctp}
 568         return query
 569
 570     @classmethod
 571     def _extract_next_continuation_data(cls, renderer):
 572         next_continuation = try_get(
 573             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 574                        lambda x: x['continuation']['reloadContinuationData']), dict)
 575         if not next_continuation:
 576             return
 577         continuation = next_continuation.get('continuation')
 578         if not continuation:
 579             return
 580         ctp = next_continuation.get('clickTrackingParams')
 581         return cls._build_api_continuation_query(continuation, ctp)
 582
 583     @classmethod
 584     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 585         if isinstance(continuation_ep, dict):
 586             continuation = try_get(
 587                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 588             if not continuation:
 589                 return
 590             ctp = continuation_ep.get('clickTrackingParams')
 591             return cls._build_api_continuation_query(continuation, ctp)
 592
 593     @classmethod
 594     def _extract_continuation(cls, renderer):
 595         next_continuation = cls._extract_next_continuation_data(renderer)
 596         if next_continuation:
 597             return next_continuation
 598
 599         contents = []
 600         for key in ('contents', 'items'):
 601             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 602
 603         for content in contents:
 604             if not isinstance(content, dict):
 605                 continue
 606             continuation_ep = try_get(
 607                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 608                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 609                 dict)
 610             continuation = cls._extract_continuation_ep_data(continuation_ep)
 611             if continuation:
 612                 return continuation
 613
 614     @classmethod
 615     def _extract_alerts(cls, data):
 616         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 617             if not isinstance(alert_dict, dict):
 618                 continue
 619             for alert in alert_dict.values():
 620                 alert_type = alert.get('type')
 621                 if not alert_type:
 622                     continue
 623                 message = cls._get_text(alert.get('text'))
 624                 if message:
 625                     yield alert_type, message
 626
 627     def _report_alerts(self, alerts, expected=True):
 628         errors = []
 629         warnings = []
 630         for alert_type, alert_message in alerts:
 631             if alert_type.lower() == 'error':
 632                 errors.append([alert_type, alert_message])
 633             else:
 634                 warnings.append([alert_type, alert_message])
 635
 636         for alert_type, alert_message in (warnings + errors[:-1]):
 637             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 638         if errors:
 639             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 640
 641     def _extract_and_report_alerts(self, data, *args, **kwargs):
 642         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 643
 644     def _extract_badges(self, renderer: dict):
 645         badges = set()
 646         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 647             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 648             if label:
 649                 badges.add(label.lower())
 650         return badges
 651
 652     @staticmethod
 653     def _get_text(data, getter=None, max_runs=None):
 654         for get in variadic(getter):
 655             d = try_get(data, get) if get is not None else data
 656             text = try_get(d, lambda x: x['simpleText'], compat_str)
 657             if text:
 658                 return text
 659             runs = try_get(d, lambda x: x['runs'], list) or []
 660             if not runs and isinstance(d, list):
 661                 runs = d
 662
 663             def get_runs(runs):
 664                 for run in runs[:min(len(runs), max_runs or len(runs))]:
 665                     yield try_get(run, lambda x: x['text'], compat_str) or ''
 666
 667             text = ''.join(get_runs(runs))
 668             if text:
 669                 return text
 670
 671     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 672                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 673                           default_client='WEB'):
 674         response = None
 675         last_error = None
 676         count = -1
 677         retries = self.get_param('extractor_retries', 3)
 678         if check_get_keys is None:
 679             check_get_keys = []
 680         while count < retries:
 681             count += 1
 682             if last_error:
 683                 self.report_warning('%s. Retrying ...' % last_error)
 684             try:
 685                 response = self._call_api(
 686                     ep=ep, fatal=True, headers=headers,
 687                     video_id=item_id, query=query,
 688                     context=self._extract_context(ytcfg, default_client),
 689                     api_key=self._extract_api_key(ytcfg, default_client),
 690                     api_hostname=api_hostname, default_client=default_client,
 691                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 692             except ExtractorError as e:
 693                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
 694                     # Downloading page may result in intermittent 5xx HTTP error
 695                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 696                     last_error = 'HTTP Error %s' % e.cause.code
 697                     if count < retries:
 698                         continue
 699                 if fatal:
 700                     raise
 701                 else:
 702                     self.report_warning(error_to_compat_str(e))
 703                     return
 704
 705             else:
 706                 # Youtube may send alerts if there was an issue with the continuation page
 707                 try:
 708                     self._extract_and_report_alerts(response, expected=False)
 709                 except ExtractorError as e:
 710                     if fatal:
 711                         raise
 712                     self.report_warning(error_to_compat_str(e))
 713                     return
 714                 if not check_get_keys or dict_get(response, check_get_keys):
 715                     break
 716                 # Youtube sometimes sends incomplete data
 717                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 718                 last_error = 'Incomplete data received'
 719                 if count >= retries:
 720                     if fatal:
 721                         raise ExtractorError(last_error)
 722                     else:
 723                         self.report_warning(last_error)
 724                         return
 725         return response
 726
 727     @staticmethod
 728     def is_music_url(url):
 729         return re.match(r'https?://music\.youtube\.com/', url) is not None
 730
 731     def _extract_video(self, renderer):
 732         video_id = renderer.get('videoId')
 733         title = self._get_text(renderer.get('title'))
 734         description = self._get_text(renderer.get('descriptionSnippet'))
 735         duration = parse_duration(self._get_text(renderer.get('lengthText')))
 736         view_count_text = self._get_text(renderer.get('viewCountText')) or ''
 737         view_count = str_to_int(self._search_regex(
 738             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 739             'view count', default=None))
 740
 741         uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
 742
 743         return {
 744             '_type': 'url',
 745             'ie_key': YoutubeIE.ie_key(),
 746             'id': video_id,
 747             'url': video_id,
 748             'title': title,
 749             'description': description,
 750             'duration': duration,
 751             'view_count': view_count,
 752             'uploader': uploader,
 753         }
 754
 755
 756 class YoutubeIE(YoutubeBaseInfoExtractor):
 757     IE_DESC = 'YouTube.com'
 758     _INVIDIOUS_SITES = (
 759         # invidious-redirect websites
 760         r'(?:www\.)?redirect\.invidious\.io',
 761         r'(?:(?:www|dev)\.)?invidio\.us',
 762         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 763         r'(?:www\.)?invidious\.pussthecat\.org',
 764         r'(?:www\.)?invidious\.zee\.li',
 765         r'(?:www\.)?invidious\.ethibox\.fr',
 766         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 767         # youtube-dl invidious instances list
 768         r'(?:(?:www|no)\.)?invidiou\.sh',
 769         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 770         r'(?:www\.)?invidious\.kabi\.tk',
 771         r'(?:www\.)?invidious\.mastodon\.host',
 772         r'(?:www\.)?invidious\.zapashcanon\.fr',
 773         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 774         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 775         r'(?:www\.)?invidious\.himiko\.cloud',
 776         r'(?:www\.)?invidious\.reallyancient\.tech',
 777         r'(?:www\.)?invidious\.tube',
 778         r'(?:www\.)?invidiou\.site',
 779         r'(?:www\.)?invidious\.site',
 780         r'(?:www\.)?invidious\.xyz',
 781         r'(?:www\.)?invidious\.nixnet\.xyz',
 782         r'(?:www\.)?invidious\.048596\.xyz',
 783         r'(?:www\.)?invidious\.drycat\.fr',
 784         r'(?:www\.)?inv\.skyn3t\.in',
 785         r'(?:www\.)?tube\.poal\.co',
 786         r'(?:www\.)?tube\.connect\.cafe',
 787         r'(?:www\.)?vid\.wxzm\.sx',
 788         r'(?:www\.)?vid\.mint\.lgbt',
 789         r'(?:www\.)?vid\.puffyan\.us',
 790         r'(?:www\.)?yewtu\.be',
 791         r'(?:www\.)?yt\.elukerio\.org',
 792         r'(?:www\.)?yt\.lelux\.fi',
 793         r'(?:www\.)?invidious\.ggc-project\.de',
 794         r'(?:www\.)?yt\.maisputain\.ovh',
 795         r'(?:www\.)?ytprivate\.com',
 796         r'(?:www\.)?invidious\.13ad\.de',
 797         r'(?:www\.)?invidious\.toot\.koeln',
 798         r'(?:www\.)?invidious\.fdn\.fr',
 799         r'(?:www\.)?watch\.nettohikari\.com',
 800         r'(?:www\.)?invidious\.namazso\.eu',
 801         r'(?:www\.)?invidious\.silkky\.cloud',
 802         r'(?:www\.)?invidious\.exonip\.de',
 803         r'(?:www\.)?invidious\.riverside\.rocks',
 804         r'(?:www\.)?invidious\.blamefran\.net',
 805         r'(?:www\.)?invidious\.moomoo\.de',
 806         r'(?:www\.)?ytb\.trom\.tf',
 807         r'(?:www\.)?yt\.cyberhost\.uk',
 808         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 809         r'(?:www\.)?qklhadlycap4cnod\.onion',
 810         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 811         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 812         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 813         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 814         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 815         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 816         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 817         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 818         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 819         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 820     )
 821     _VALID_URL = r"""(?x)^
 822                      (
 823                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 824                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 825                             (?:www\.)?deturl\.com/www\.youtube\.com|
 826                             (?:www\.)?pwnyoutube\.com|
 827                             (?:www\.)?hooktube\.com|
 828                             (?:www\.)?yourepeat\.com|
 829                             tube\.majestyc\.net|
 830                             %(invidious)s|
 831                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 832                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 833                          (?:                                                  # the various things that can precede the ID:
 834                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 835                              |(?:                                             # or the v= param in all its forms
 836                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 837                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 838                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 839                                  v=
 840                              )
 841                          ))
 842                          |(?:
 843                             youtu\.be|                                        # just youtu.be/xxxx
 844                             vid\.plus|                                        # or vid.plus/xxxx
 845                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 846                             %(invidious)s
 847                          )/
 848                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 849                          )
 850                      )?                                                       # all until now is optional -> you can pass the naked ID
 851                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 852                      (?(1).+)?                                                # if we found the ID, everything can follow
 853                      (?:\#|$)""" % {
 854         'invidious': '|'.join(_INVIDIOUS_SITES),
 855     }
 856     _PLAYER_INFO_RE = (
 857         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 858         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 859         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 860     )
 861     _formats = {
 862         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 863         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 864         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 865         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 866         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 867         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 868         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 869         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 870         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 871         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 872         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 873         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 874         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 875         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 876         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 877         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 878         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 879         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 880
 881
 882         # 3D videos
 883         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 884         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 885         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 886         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 887         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 888         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 889         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 890
 891         # Apple HTTP Live Streaming
 892         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 893         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 894         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 895         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 896         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 897         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 898         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 899         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 900
 901         # DASH mp4 video
 902         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 903         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 904         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 905         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 906         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 907         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 908         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 909         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 910         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 911         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 912         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 913         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 914
 915         # Dash mp4 audio
 916         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 917         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 918         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 919         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 920         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 921         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 922         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 923
 924         # Dash webm
 925         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 926         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 927         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 928         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 929         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 930         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 931         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 932         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 933         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 934         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 935         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 936         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 937         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 938         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 939         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 940         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 941         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 942         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 943         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 944         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 945         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 946         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 947
 948         # Dash webm audio
 949         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 950         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 951
 952         # Dash webm audio with opus inside
 953         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 954         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 955         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 956
 957         # RTMP (unnamed)
 958         '_rtmp': {'protocol': 'rtmp'},
 959
 960         # av01 video only formats sometimes served with "unknown" codecs
 961         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 962         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 963         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 964         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 965     }
 966     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 967
 968     _AGE_GATE_REASONS = (
 969         'Sign in to confirm your age',
 970         'This video may be inappropriate for some users.',
 971         'Sorry, this content is age-restricted.')
 972
 973     _GEO_BYPASS = False
 974
 975     IE_NAME = 'youtube'
 976     _TESTS = [
 977         {
 978             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 979             'info_dict': {
 980                 'id': 'BaW_jenozKc',
 981                 'ext': 'mp4',
 982                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 983                 'uploader': 'Philipp Hagemeister',
 984                 'uploader_id': 'phihag',
 985                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 986                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 987                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 988                 'upload_date': '20121002',
 989                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 990                 'categories': ['Science & Technology'],
 991                 'tags': ['youtube-dl'],
 992                 'duration': 10,
 993                 'view_count': int,
 994                 'like_count': int,
 995                 'dislike_count': int,
 996                 'start_time': 1,
 997                 'end_time': 9,
 998             }
 999         },
1000         {
1001             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1002             'note': 'Embed-only video (#1746)',
1003             'info_dict': {
1004                 'id': 'yZIXLfi8CZQ',
1005                 'ext': 'mp4',
1006                 'upload_date': '20120608',
1007                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1008                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1009                 'uploader': 'SET India',
1010                 'uploader_id': 'setindia',
1011                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1012                 'age_limit': 18,
1013             },
1014             'skip': 'Private video',
1015         },
1016         {
1017             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1018             'note': 'Use the first video ID in the URL',
1019             'info_dict': {
1020                 'id': 'BaW_jenozKc',
1021                 'ext': 'mp4',
1022                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1023                 'uploader': 'Philipp Hagemeister',
1024                 'uploader_id': 'phihag',
1025                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1026                 'upload_date': '20121002',
1027                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1028                 'categories': ['Science & Technology'],
1029                 'tags': ['youtube-dl'],
1030                 'duration': 10,
1031                 'view_count': int,
1032                 'like_count': int,
1033                 'dislike_count': int,
1034             },
1035             'params': {
1036                 'skip_download': True,
1037             },
1038         },
1039         {
1040             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1041             'note': '256k DASH audio (format 141) via DASH manifest',
1042             'info_dict': {
1043                 'id': 'a9LDPn-MO4I',
1044                 'ext': 'm4a',
1045                 'upload_date': '20121002',
1046                 'uploader_id': '8KVIDEO',
1047                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1048                 'description': '',
1049                 'uploader': '8KVIDEO',
1050                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1051             },
1052             'params': {
1053                 'youtube_include_dash_manifest': True,
1054                 'format': '141',
1055             },
1056             'skip': 'format 141 not served anymore',
1057         },
1058         # DASH manifest with encrypted signature
1059         {
1060             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1061             'info_dict': {
1062                 'id': 'IB3lcPjvWLA',
1063                 'ext': 'm4a',
1064                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1065                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1066                 'duration': 244,
1067                 'uploader': 'AfrojackVEVO',
1068                 'uploader_id': 'AfrojackVEVO',
1069                 'upload_date': '20131011',
1070                 'abr': 129.495,
1071             },
1072             'params': {
1073                 'youtube_include_dash_manifest': True,
1074                 'format': '141/bestaudio[ext=m4a]',
1075             },
1076         },
1077         # Normal age-gate video (embed allowed)
1078         {
1079             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1080             'info_dict': {
1081                 'id': 'HtVdAasjOgU',
1082                 'ext': 'mp4',
1083                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1084                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1085                 'duration': 142,
1086                 'uploader': 'The Witcher',
1087                 'uploader_id': 'WitcherGame',
1088                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1089                 'upload_date': '20140605',
1090                 'age_limit': 18,
1091             },
1092         },
1093         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1094         # YouTube Red ad is not captured for creator
1095         {
1096             'url': '__2ABJjxzNo',
1097             'info_dict': {
1098                 'id': '__2ABJjxzNo',
1099                 'ext': 'mp4',
1100                 'duration': 266,
1101                 'upload_date': '20100430',
1102                 'uploader_id': 'deadmau5',
1103                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1104                 'creator': 'deadmau5',
1105                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1106                 'uploader': 'deadmau5',
1107                 'title': 'Deadmau5 - Some Chords (HD)',
1108                 'alt_title': 'Some Chords',
1109             },
1110             'expected_warnings': [
1111                 'DASH manifest missing',
1112             ]
1113         },
1114         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1115         {
1116             'url': 'lqQg6PlCWgI',
1117             'info_dict': {
1118                 'id': 'lqQg6PlCWgI',
1119                 'ext': 'mp4',
1120                 'duration': 6085,
1121                 'upload_date': '20150827',
1122                 'uploader_id': 'olympic',
1123                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1124                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1125                 'uploader': 'Olympic',
1126                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1127             },
1128             'params': {
1129                 'skip_download': 'requires avconv',
1130             }
1131         },
1132         # Non-square pixels
1133         {
1134             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1135             'info_dict': {
1136                 'id': '_b-2C3KPAM0',
1137                 'ext': 'mp4',
1138                 'stretched_ratio': 16 / 9.,
1139                 'duration': 85,
1140                 'upload_date': '20110310',
1141                 'uploader_id': 'AllenMeow',
1142                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1143                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1144                 'uploader': '孫ᄋᄅ',
1145                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1146             },
1147         },
1148         # url_encoded_fmt_stream_map is empty string
1149         {
1150             'url': 'qEJwOuvDf7I',
1151             'info_dict': {
1152                 'id': 'qEJwOuvDf7I',
1153                 'ext': 'webm',
1154                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1155                 'description': '',
1156                 'upload_date': '20150404',
1157                 'uploader_id': 'spbelect',
1158                 'uploader': 'Наблюдатели Петербурга',
1159             },
1160             'params': {
1161                 'skip_download': 'requires avconv',
1162             },
1163             'skip': 'This live event has ended.',
1164         },
1165         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1166         {
1167             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1168             'info_dict': {
1169                 'id': 'FIl7x6_3R5Y',
1170                 'ext': 'webm',
1171                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1172                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1173                 'duration': 220,
1174                 'upload_date': '20150625',
1175                 'uploader_id': 'dorappi2000',
1176                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1177                 'uploader': 'dorappi2000',
1178                 'formats': 'mincount:31',
1179             },
1180             'skip': 'not actual anymore',
1181         },
1182         # DASH manifest with segment_list
1183         {
1184             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1185             'md5': '8ce563a1d667b599d21064e982ab9e31',
1186             'info_dict': {
1187                 'id': 'CsmdDsKjzN8',
1188                 'ext': 'mp4',
1189                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1190                 'uploader': 'Airtek',
1191                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1192                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1193                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1194             },
1195             'params': {
1196                 'youtube_include_dash_manifest': True,
1197                 'format': '135',  # bestvideo
1198             },
1199             'skip': 'This live event has ended.',
1200         },
1201         {
1202             # Multifeed videos (multiple cameras), URL is for Main Camera
1203             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1204             'info_dict': {
1205                 'id': 'jvGDaLqkpTg',
1206                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1207                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1208             },
1209             'playlist': [{
1210                 'info_dict': {
1211                     'id': 'jvGDaLqkpTg',
1212                     'ext': 'mp4',
1213                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1214                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1215                     'duration': 10643,
1216                     'upload_date': '20161111',
1217                     'uploader': 'Team PGP',
1218                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1219                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1220                 },
1221             }, {
1222                 'info_dict': {
1223                     'id': '3AKt1R1aDnw',
1224                     'ext': 'mp4',
1225                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1226                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1227                     'duration': 10991,
1228                     'upload_date': '20161111',
1229                     'uploader': 'Team PGP',
1230                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1231                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1232                 },
1233             }, {
1234                 'info_dict': {
1235                     'id': 'RtAMM00gpVc',
1236                     'ext': 'mp4',
1237                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1238                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1239                     'duration': 10995,
1240                     'upload_date': '20161111',
1241                     'uploader': 'Team PGP',
1242                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1243                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1244                 },
1245             }, {
1246                 'info_dict': {
1247                     'id': '6N2fdlP3C5U',
1248                     'ext': 'mp4',
1249                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1250                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1251                     'duration': 10990,
1252                     'upload_date': '20161111',
1253                     'uploader': 'Team PGP',
1254                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1255                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1256                 },
1257             }],
1258             'params': {
1259                 'skip_download': True,
1260             },
1261         },
1262         {
1263             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1264             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1265             'info_dict': {
1266                 'id': 'gVfLd0zydlo',
1267                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1268             },
1269             'playlist_count': 2,
1270             'skip': 'Not multifeed anymore',
1271         },
1272         {
1273             'url': 'https://vid.plus/FlRa-iH7PGw',
1274             'only_matching': True,
1275         },
1276         {
1277             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1278             'only_matching': True,
1279         },
1280         {
1281             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1282             # Also tests cut-off URL expansion in video description (see
1283             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1284             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1285             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1286             'info_dict': {
1287                 'id': 'lsguqyKfVQg',
1288                 'ext': 'mp4',
1289                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1290                 'alt_title': 'Dark Walk - Position Music',
1291                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1292                 'duration': 133,
1293                 'upload_date': '20151119',
1294                 'uploader_id': 'IronSoulElf',
1295                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1296                 'uploader': 'IronSoulElf',
1297                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
1298                 'track': 'Dark Walk - Position Music',
1299                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
1300                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1301             },
1302             'params': {
1303                 'skip_download': True,
1304             },
1305         },
1306         {
1307             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1308             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1309             'only_matching': True,
1310         },
1311         {
1312             # Video with yt:stretch=17:0
1313             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1314             'info_dict': {
1315                 'id': 'Q39EVAstoRM',
1316                 'ext': 'mp4',
1317                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1318                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1319                 'upload_date': '20151107',
1320                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1321                 'uploader': 'CH GAMER DROID',
1322             },
1323             'params': {
1324                 'skip_download': True,
1325             },
1326             'skip': 'This video does not exist.',
1327         },
1328         {
1329             # Video with incomplete 'yt:stretch=16:'
1330             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1331             'only_matching': True,
1332         },
1333         {
1334             # Video licensed under Creative Commons
1335             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1336             'info_dict': {
1337                 'id': 'M4gD1WSo5mA',
1338                 'ext': 'mp4',
1339                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1340                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1341                 'duration': 721,
1342                 'upload_date': '20150127',
1343                 'uploader_id': 'BerkmanCenter',
1344                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1345                 'uploader': 'The Berkman Klein Center for Internet & Society',
1346                 'license': 'Creative Commons Attribution license (reuse allowed)',
1347             },
1348             'params': {
1349                 'skip_download': True,
1350             },
1351         },
1352         {
1353             # Channel-like uploader_url
1354             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1355             'info_dict': {
1356                 'id': 'eQcmzGIKrzg',
1357                 'ext': 'mp4',
1358                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1359                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1360                 'duration': 4060,
1361                 'upload_date': '20151119',
1362                 'uploader': 'Bernie Sanders',
1363                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1364                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1365                 'license': 'Creative Commons Attribution license (reuse allowed)',
1366             },
1367             'params': {
1368                 'skip_download': True,
1369             },
1370         },
1371         {
1372             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1373             'only_matching': True,
1374         },
1375         {
1376             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1377             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1378             'only_matching': True,
1379         },
1380         {
1381             # Rental video preview
1382             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1383             'info_dict': {
1384                 'id': 'uGpuVWrhIzE',
1385                 'ext': 'mp4',
1386                 'title': 'Piku - Trailer',
1387                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1388                 'upload_date': '20150811',
1389                 'uploader': 'FlixMatrix',
1390                 'uploader_id': 'FlixMatrixKaravan',
1391                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1392                 'license': 'Standard YouTube License',
1393             },
1394             'params': {
1395                 'skip_download': True,
1396             },
1397             'skip': 'This video is not available.',
1398         },
1399         {
1400             # YouTube Red video with episode data
1401             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1402             'info_dict': {
1403                 'id': 'iqKdEhx-dD4',
1404                 'ext': 'mp4',
1405                 'title': 'Isolation - Mind Field (Ep 1)',
1406                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1407                 'duration': 2085,
1408                 'upload_date': '20170118',
1409                 'uploader': 'Vsauce',
1410                 'uploader_id': 'Vsauce',
1411                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1412                 'series': 'Mind Field',
1413                 'season_number': 1,
1414                 'episode_number': 1,
1415             },
1416             'params': {
1417                 'skip_download': True,
1418             },
1419             'expected_warnings': [
1420                 'Skipping DASH manifest',
1421             ],
1422         },
1423         {
1424             # The following content has been identified by the YouTube community
1425             # as inappropriate or offensive to some audiences.
1426             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1427             'info_dict': {
1428                 'id': '6SJNVb0GnPI',
1429                 'ext': 'mp4',
1430                 'title': 'Race Differences in Intelligence',
1431                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1432                 'duration': 965,
1433                 'upload_date': '20140124',
1434                 'uploader': 'New Century Foundation',
1435                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1436                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1437             },
1438             'params': {
1439                 'skip_download': True,
1440             },
1441             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1442         },
1443         {
1444             # itag 212
1445             'url': '1t24XAntNCY',
1446             'only_matching': True,
1447         },
1448         {
1449             # geo restricted to JP
1450             'url': 'sJL6WA-aGkQ',
1451             'only_matching': True,
1452         },
1453         {
1454             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1455             'only_matching': True,
1456         },
1457         {
1458             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1459             'only_matching': True,
1460         },
1461         {
1462             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1463             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1464             'only_matching': True,
1465         },
1466         {
1467             # DRM protected
1468             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1469             'only_matching': True,
1470         },
1471         {
1472             # Video with unsupported adaptive stream type formats
1473             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1474             'info_dict': {
1475                 'id': 'Z4Vy8R84T1U',
1476                 'ext': 'mp4',
1477                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1478                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1479                 'duration': 433,
1480                 'upload_date': '20130923',
1481                 'uploader': 'Amelia Putri Harwita',
1482                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1483                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1484                 'formats': 'maxcount:10',
1485             },
1486             'params': {
1487                 'skip_download': True,
1488                 'youtube_include_dash_manifest': False,
1489             },
1490             'skip': 'not actual anymore',
1491         },
1492         {
1493             # Youtube Music Auto-generated description
1494             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1495             'info_dict': {
1496                 'id': 'MgNrAu2pzNs',
1497                 'ext': 'mp4',
1498                 'title': 'Voyeur Girl',
1499                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1500                 'upload_date': '20190312',
1501                 'uploader': 'Stephen - Topic',
1502                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1503                 'artist': 'Stephen',
1504                 'track': 'Voyeur Girl',
1505                 'album': 'it\'s too much love to know my dear',
1506                 'release_date': '20190313',
1507                 'release_year': 2019,
1508             },
1509             'params': {
1510                 'skip_download': True,
1511             },
1512         },
1513         {
1514             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1515             'only_matching': True,
1516         },
1517         {
1518             # invalid -> valid video id redirection
1519             'url': 'DJztXj2GPfl',
1520             'info_dict': {
1521                 'id': 'DJztXj2GPfk',
1522                 'ext': 'mp4',
1523                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1524                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1525                 'upload_date': '20090125',
1526                 'uploader': 'Prochorowka',
1527                 'uploader_id': 'Prochorowka',
1528                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1529                 'artist': 'Panjabi MC',
1530                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1531                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1532             },
1533             'params': {
1534                 'skip_download': True,
1535             },
1536             'skip': 'Video unavailable',
1537         },
1538         {
1539             # empty description results in an empty string
1540             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1541             'info_dict': {
1542                 'id': 'x41yOUIvK2k',
1543                 'ext': 'mp4',
1544                 'title': 'IMG 3456',
1545                 'description': '',
1546                 'upload_date': '20170613',
1547                 'uploader_id': 'ElevageOrVert',
1548                 'uploader': 'ElevageOrVert',
1549             },
1550             'params': {
1551                 'skip_download': True,
1552             },
1553         },
1554         {
1555             # with '};' inside yt initial data (see [1])
1556             # see [2] for an example with '};' inside ytInitialPlayerResponse
1557             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1558             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1559             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1560             'info_dict': {
1561                 'id': 'CHqg6qOn4no',
1562                 'ext': 'mp4',
1563                 'title': 'Part 77   Sort a list of simple types in c#',
1564                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1565                 'upload_date': '20130831',
1566                 'uploader_id': 'kudvenkat',
1567                 'uploader': 'kudvenkat',
1568             },
1569             'params': {
1570                 'skip_download': True,
1571             },
1572         },
1573         {
1574             # another example of '};' in ytInitialData
1575             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1576             'only_matching': True,
1577         },
1578         {
1579             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1580             'only_matching': True,
1581         },
1582         {
1583             # https://github.com/ytdl-org/youtube-dl/pull/28094
1584             'url': 'OtqTfy26tG0',
1585             'info_dict': {
1586                 'id': 'OtqTfy26tG0',
1587                 'ext': 'mp4',
1588                 'title': 'Burn Out',
1589                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1590                 'upload_date': '20141120',
1591                 'uploader': 'The Cinematic Orchestra - Topic',
1592                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1593                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1594                 'artist': 'The Cinematic Orchestra',
1595                 'track': 'Burn Out',
1596                 'album': 'Every Day',
1597                 'release_data': None,
1598                 'release_year': None,
1599             },
1600             'params': {
1601                 'skip_download': True,
1602             },
1603         },
1604         {
1605             # controversial video, only works with bpctr when authenticated with cookies
1606             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1607             'only_matching': True,
1608         },
1609         {
1610             # controversial video, requires bpctr/contentCheckOk
1611             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1612             'info_dict': {
1613                 'id': 'SZJvDhaSDnc',
1614                 'ext': 'mp4',
1615                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1616                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1617                 'uploader': 'CBS This Morning',
1618                 'upload_date': '20140716',
1619                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1620             }
1621         },
1622         {
1623             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1624             'url': 'cBvYw8_A0vQ',
1625             'info_dict': {
1626                 'id': 'cBvYw8_A0vQ',
1627                 'ext': 'mp4',
1628                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1629                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1630                 'upload_date': '20201120',
1631                 'uploader': 'Walk around Japan',
1632                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1633                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1634             },
1635             'params': {
1636                 'skip_download': True,
1637             },
1638         }, {
1639             # Has multiple audio streams
1640             'url': 'WaOKSUlf4TM',
1641             'only_matching': True
1642         }, {
1643             # Requires Premium: has format 141 when requested using YTM url
1644             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1645             'only_matching': True
1646         }, {
1647             # multiple subtitles with same lang_code
1648             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1649             'only_matching': True,
1650         }, {
1651             # Force use android client fallback
1652             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1653             'info_dict': {
1654                 'id': 'YOelRv7fMxY',
1655                 'title': 'Digging a Secret Tunnel from my Workshop',
1656                 'ext': '3gp',
1657                 'upload_date': '20210624',
1658                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1659                 'uploader': 'colinfurze',
1660                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1661                 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1662             },
1663             'params': {
1664                 'format': '17',  # 3gp format available on android
1665                 'extractor_args': {'youtube': {'player_client': ['android']}},
1666             },
1667         },
1668         {
1669             # Skip download of additional client configs (remix client config in this case)
1670             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1671             'only_matching': True,
1672             'params': {
1673                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1674             },
1675         }
1676     ]
1677
1678     @classmethod
1679     def suitable(cls, url):
1680         # Hack for lazy extractors until more generic solution is implemented
1681         # (see #28780)
1682         from .youtube import parse_qs
1683         qs = parse_qs(url)
1684         if qs.get('list', [None])[0]:
1685             return False
1686         return super(YoutubeIE, cls).suitable(url)
1687
1688     def __init__(self, *args, **kwargs):
1689         super(YoutubeIE, self).__init__(*args, **kwargs)
1690         self._code_cache = {}
1691         self._player_cache = {}
1692
1693     def _extract_player_url(self, ytcfg=None, webpage=None):
1694         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1695         if not player_url:
1696             player_url = self._search_regex(
1697                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1698                 webpage, 'player URL', fatal=False)
1699         if player_url.startswith('//'):
1700             player_url = 'https:' + player_url
1701         elif not re.match(r'https?://', player_url):
1702             player_url = compat_urlparse.urljoin(
1703                 'https://www.youtube.com', player_url)
1704         return player_url
1705
1706     def _signature_cache_id(self, example_sig):
1707         """ Return a string representation of a signature """
1708         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1709
1710     @classmethod
1711     def _extract_player_info(cls, player_url):
1712         for player_re in cls._PLAYER_INFO_RE:
1713             id_m = re.search(player_re, player_url)
1714             if id_m:
1715                 break
1716         else:
1717             raise ExtractorError('Cannot identify player %r' % player_url)
1718         return id_m.group('id')
1719
1720     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1721         player_id = self._extract_player_info(player_url)
1722         if player_id not in self._code_cache:
1723             self._code_cache[player_id] = self._download_webpage(
1724                 player_url, video_id, fatal=fatal,
1725                 note='Downloading player ' + player_id,
1726                 errnote='Download of %s failed' % player_url)
1727         return player_id in self._code_cache
1728
1729     def _extract_signature_function(self, video_id, player_url, example_sig):
1730         player_id = self._extract_player_info(player_url)
1731
1732         # Read from filesystem cache
1733         func_id = 'js_%s_%s' % (
1734             player_id, self._signature_cache_id(example_sig))
1735         assert os.path.basename(func_id) == func_id
1736
1737         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1738         if cache_spec is not None:
1739             return lambda s: ''.join(s[i] for i in cache_spec)
1740
1741         if self._load_player(video_id, player_url):
1742             code = self._code_cache[player_id]
1743             res = self._parse_sig_js(code)
1744
1745             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1746             cache_res = res(test_string)
1747             cache_spec = [ord(c) for c in cache_res]
1748
1749             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1750             return res
1751
1752     def _print_sig_code(self, func, example_sig):
1753         def gen_sig_code(idxs):
1754             def _genslice(start, end, step):
1755                 starts = '' if start == 0 else str(start)
1756                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1757                 steps = '' if step == 1 else (':%d' % step)
1758                 return 's[%s%s%s]' % (starts, ends, steps)
1759
1760             step = None
1761             # Quelch pyflakes warnings - start will be set when step is set
1762             start = '(Never used)'
1763             for i, prev in zip(idxs[1:], idxs[:-1]):
1764                 if step is not None:
1765                     if i - prev == step:
1766                         continue
1767                     yield _genslice(start, prev, step)
1768                     step = None
1769                     continue
1770                 if i - prev in [-1, 1]:
1771                     step = i - prev
1772                     start = prev
1773                     continue
1774                 else:
1775                     yield 's[%d]' % prev
1776             if step is None:
1777                 yield 's[%d]' % i
1778             else:
1779                 yield _genslice(start, i, step)
1780
1781         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1782         cache_res = func(test_string)
1783         cache_spec = [ord(c) for c in cache_res]
1784         expr_code = ' + '.join(gen_sig_code(cache_spec))
1785         signature_id_tuple = '(%s)' % (
1786             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1787         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1788                 '    return %s\n') % (signature_id_tuple, expr_code)
1789         self.to_screen('Extracted signature function:\n' + code)
1790
1791     def _parse_sig_js(self, jscode):
1792         funcname = self._search_regex(
1793             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1794              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1795              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1796              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1797              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1798              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1799              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1800              # Obsolete patterns
1801              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1802              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1803              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1804              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1805              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1806              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1807              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1808              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1809             jscode, 'Initial JS player signature function name', group='sig')
1810
1811         jsi = JSInterpreter(jscode)
1812         initial_function = jsi.extract_function(funcname)
1813         return lambda s: initial_function([s])
1814
1815     def _decrypt_signature(self, s, video_id, player_url):
1816         """Turn the encrypted s field into a working signature"""
1817
1818         if player_url is None:
1819             raise ExtractorError('Cannot decrypt signature without player_url')
1820
1821         try:
1822             player_id = (player_url, self._signature_cache_id(s))
1823             if player_id not in self._player_cache:
1824                 func = self._extract_signature_function(
1825                     video_id, player_url, s
1826                 )
1827                 self._player_cache[player_id] = func
1828             func = self._player_cache[player_id]
1829             if self.get_param('youtube_print_sig_code'):
1830                 self._print_sig_code(func, s)
1831             return func(s)
1832         except Exception as e:
1833             tb = traceback.format_exc()
1834             raise ExtractorError(
1835                 'Signature extraction failed: ' + tb, cause=e)
1836
1837     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1838         """
1839         Extract signatureTimestamp (sts)
1840         Required to tell API what sig/player version is in use.
1841         """
1842         sts = None
1843         if isinstance(ytcfg, dict):
1844             sts = int_or_none(ytcfg.get('STS'))
1845
1846         if not sts:
1847             # Attempt to extract from player
1848             if player_url is None:
1849                 error_msg = 'Cannot extract signature timestamp without player_url.'
1850                 if fatal:
1851                     raise ExtractorError(error_msg)
1852                 self.report_warning(error_msg)
1853                 return
1854             if self._load_player(video_id, player_url, fatal=fatal):
1855                 player_id = self._extract_player_info(player_url)
1856                 code = self._code_cache[player_id]
1857                 sts = int_or_none(self._search_regex(
1858                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1859                     'JS player signature timestamp', group='sts', fatal=fatal))
1860         return sts
1861
1862     def _mark_watched(self, video_id, player_response):
1863         playback_url = url_or_none(try_get(
1864             player_response,
1865             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
1866         if not playback_url:
1867             return
1868         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1869         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1870
1871         # cpn generation algorithm is reverse engineered from base.js.
1872         # In fact it works even with dummy cpn.
1873         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1874         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1875
1876         qs.update({
1877             'ver': ['2'],
1878             'cpn': [cpn],
1879         })
1880         playback_url = compat_urlparse.urlunparse(
1881             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1882
1883         self._download_webpage(
1884             playback_url, video_id, 'Marking watched',
1885             'Unable to mark watched', fatal=False)
1886
1887     @staticmethod
1888     def _extract_urls(webpage):
1889         # Embedded YouTube player
1890         entries = [
1891             unescapeHTML(mobj.group('url'))
1892             for mobj in re.finditer(r'''(?x)
1893             (?:
1894                 <iframe[^>]+?src=|
1895                 data-video-url=|
1896                 <embed[^>]+?src=|
1897                 embedSWF\(?:\s*|
1898                 <object[^>]+data=|
1899                 new\s+SWFObject\(
1900             )
1901             (["\'])
1902                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1903                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1904             \1''', webpage)]
1905
1906         # lazyYT YouTube embed
1907         entries.extend(list(map(
1908             unescapeHTML,
1909             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1910
1911         # Wordpress "YouTube Video Importer" plugin
1912         matches = re.findall(r'''(?x)<div[^>]+
1913             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1914             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1915         entries.extend(m[-1] for m in matches)
1916
1917         return entries
1918
1919     @staticmethod
1920     def _extract_url(webpage):
1921         urls = YoutubeIE._extract_urls(webpage)
1922         return urls[0] if urls else None
1923
1924     @classmethod
1925     def extract_id(cls, url):
1926         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1927         if mobj is None:
1928             raise ExtractorError('Invalid URL: %s' % url)
1929         video_id = mobj.group(2)
1930         return video_id
1931
1932     def _extract_chapters_from_json(self, data, duration):
1933         chapter_list = traverse_obj(
1934             data, (
1935                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
1936                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
1937             ), expected_type=list)
1938
1939         return self._extract_chapters(
1940             chapter_list,
1941             chapter_time=lambda chapter: float_or_none(
1942                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
1943             chapter_title=lambda chapter: traverse_obj(
1944                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
1945             duration=duration)
1946
1947     def _extract_chapters_from_engagement_panel(self, data, duration):
1948         content_list = traverse_obj(
1949             data,
1950             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
1951             expected_type=list, default=[])
1952         chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
1953         chapter_title = lambda chapter: self._get_text(chapter.get('title'))
1954
1955         return next((
1956             filter(None, (
1957                 self._extract_chapters(
1958                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
1959                     chapter_time, chapter_title, duration)
1960                 for contents in content_list
1961             ))), [])
1962
1963     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
1964         chapters = []
1965         last_chapter = {'start_time': 0}
1966         for idx, chapter in enumerate(chapter_list or []):
1967             title = chapter_title(chapter)
1968             start_time = chapter_time(chapter)
1969             if start_time is None:
1970                 continue
1971             last_chapter['end_time'] = start_time
1972             if start_time < last_chapter['start_time']:
1973                 if idx == 1:
1974                     chapters.pop()
1975                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
1976                 else:
1977                     self.report_warning(f'Invalid start time for chapter "{title}"')
1978                     continue
1979             last_chapter = {'start_time': start_time, 'title': title}
1980             chapters.append(last_chapter)
1981         last_chapter['end_time'] = duration
1982         return chapters
1983
1984     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1985         return self._parse_json(self._search_regex(
1986             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1987              regex), webpage, name, default='{}'), video_id, fatal=False)
1988
1989     @staticmethod
1990     def parse_time_text(time_text):
1991         """
1992         Parse the comment time text
1993         time_text is in the format 'X units ago (edited)'
1994         """
1995         time_text_split = time_text.split(' ')
1996         if len(time_text_split) >= 3:
1997             try:
1998                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1999             except ValueError:
2000                 return None
2001
2002     def _extract_comment(self, comment_renderer, parent=None):
2003         comment_id = comment_renderer.get('commentId')
2004         if not comment_id:
2005             return
2006
2007         text = self._get_text(comment_renderer.get('contentText'))
2008
2009         # note: timestamp is an estimate calculated from the current time and time_text
2010         time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2011         time_text_dt = self.parse_time_text(time_text)
2012         if isinstance(time_text_dt, datetime.datetime):
2013             timestamp = calendar.timegm(time_text_dt.timetuple())
2014         author = self._get_text(comment_renderer.get('authorText'))
2015         author_id = try_get(comment_renderer,
2016                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2017
2018         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2019                                                        lambda x: x['likeCount']), compat_str)) or 0
2020         author_thumbnail = try_get(comment_renderer,
2021                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2022
2023         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2024         is_favorited = 'creatorHeart' in (try_get(
2025             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2026         return {
2027             'id': comment_id,
2028             'text': text,
2029             'timestamp': timestamp,
2030             'time_text': time_text,
2031             'like_count': votes,
2032             'is_favorited': is_favorited,
2033             'author': author,
2034             'author_id': author_id,
2035             'author_thumbnail': author_thumbnail,
2036             'author_is_uploader': author_is_uploader,
2037             'parent': parent or 'root'
2038         }
2039
2040     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2041                          ytcfg, video_id, parent=None, comment_counts=None):
2042
2043         def extract_header(contents):
2044             _total_comments = 0
2045             _continuation = None
2046             for content in contents:
2047                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2048                 expected_comment_count = parse_count(self._get_text(
2049                     comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2050
2051                 if expected_comment_count:
2052                     comment_counts[1] = expected_comment_count
2053                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2054                     _total_comments = comment_counts[1]
2055                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2056                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2057
2058                 sort_menu_item = try_get(
2059                     comments_header_renderer,
2060                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2061                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2062
2063                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2064                 if not _continuation:
2065                     continue
2066
2067                 sort_text = sort_menu_item.get('title')
2068                 if isinstance(sort_text, compat_str):
2069                     sort_text = sort_text.lower()
2070                 else:
2071                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2072                 self.to_screen('Sorting comments by %s' % sort_text)
2073                 break
2074             return _total_comments, _continuation
2075
2076         def extract_thread(contents):
2077             if not parent:
2078                 comment_counts[2] = 0
2079             for content in contents:
2080                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2081                 comment_renderer = try_get(
2082                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2083                     content, (lambda x: x['commentRenderer'], dict))
2084
2085                 if not comment_renderer:
2086                     continue
2087                 comment = self._extract_comment(comment_renderer, parent)
2088                 if not comment:
2089                     continue
2090                 comment_counts[0] += 1
2091                 yield comment
2092                 # Attempt to get the replies
2093                 comment_replies_renderer = try_get(
2094                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2095
2096                 if comment_replies_renderer:
2097                     comment_counts[2] += 1
2098                     comment_entries_iter = self._comment_entries(
2099                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2100                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2101
2102                     for reply_comment in comment_entries_iter:
2103                         yield reply_comment
2104
2105         # YouTube comments have a max depth of 2
2106         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2107         if max_depth == 1 and parent:
2108             return
2109         if not comment_counts:
2110             # comment so far, est. total comments, current comment thread #
2111             comment_counts = [0, 0, 0]
2112
2113         continuation = self._extract_continuation(root_continuation_data)
2114         if continuation and len(continuation['continuation']) < 27:
2115             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2116             continuation_token = self._generate_comment_continuation(video_id)
2117             continuation = self._build_api_continuation_query(continuation_token, None)
2118
2119         visitor_data = None
2120         is_first_continuation = parent is None
2121
2122         for page_num in itertools.count(0):
2123             if not continuation:
2124                 break
2125             headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2126             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2127             if page_num == 0:
2128                 if is_first_continuation:
2129                     note_prefix = 'Downloading comment section API JSON'
2130                 else:
2131                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2132                         comment_counts[2], comment_prog_str)
2133             else:
2134                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2135                     '       ' if parent else '', ' replies' if parent else '',
2136                     page_num, comment_prog_str)
2137
2138             response = self._extract_response(
2139                 item_id=None, query=continuation,
2140                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2141                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2142             if not response:
2143                 break
2144             visitor_data = try_get(
2145                 response,
2146                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2147                 compat_str) or visitor_data
2148
2149             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2150
2151             continuation = None
2152             if isinstance(continuation_contents, list):
2153                 for continuation_section in continuation_contents:
2154                     if not isinstance(continuation_section, dict):
2155                         continue
2156                     continuation_items = try_get(
2157                         continuation_section,
2158                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2159                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2160                         list) or []
2161                     if is_first_continuation:
2162                         total_comments, continuation = extract_header(continuation_items)
2163                         if total_comments:
2164                             yield total_comments
2165                         is_first_continuation = False
2166                         if continuation:
2167                             break
2168                         continue
2169                     count = 0
2170                     for count, entry in enumerate(extract_thread(continuation_items)):
2171                         yield entry
2172                     continuation = self._extract_continuation({'contents': continuation_items})
2173                     if continuation:
2174                         # Sometimes YouTube provides a continuation without any comments
2175                         # In most cases we end up just downloading these with very little comments to come.
2176                         if count == 0:
2177                             if not parent:
2178                                 self.report_warning('No comments received - assuming end of comments')
2179                             continuation = None
2180                         break
2181
2182             # Deprecated response structure
2183             elif isinstance(continuation_contents, dict):
2184                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2185                 for key, continuation_renderer in continuation_contents.items():
2186                     if key not in known_continuation_renderers:
2187                         continue
2188                     if not isinstance(continuation_renderer, dict):
2189                         continue
2190                     if is_first_continuation:
2191                         header_continuation_items = [continuation_renderer.get('header') or {}]
2192                         total_comments, continuation = extract_header(header_continuation_items)
2193                         if total_comments:
2194                             yield total_comments
2195                         is_first_continuation = False
2196                         if continuation:
2197                             break
2198
2199                     # Sometimes YouTube provides a continuation without any comments
2200                     # In most cases we end up just downloading these with very little comments to come.
2201                     count = 0
2202                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2203                         yield entry
2204                     continuation = self._extract_continuation(continuation_renderer)
2205                     if count == 0:
2206                         if not parent:
2207                             self.report_warning('No comments received - assuming end of comments')
2208                         continuation = None
2209                     break
2210
2211     @staticmethod
2212     def _generate_comment_continuation(video_id):
2213         """
2214         Generates initial comment section continuation token from given video id
2215         """
2216         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2217         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2218         new_continuation_intlist = list(itertools.chain.from_iterable(
2219             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2220         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2221
2222     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2223         """Entry for comment extraction"""
2224         def _real_comment_extract(contents):
2225             if isinstance(contents, list):
2226                 for entry in contents:
2227                     for key, renderer in entry.items():
2228                         if key not in known_entry_comment_renderers:
2229                             continue
2230                         yield from self._comment_entries(
2231                             renderer, video_id=video_id, ytcfg=ytcfg,
2232                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2233                             account_syncid=self._extract_account_syncid(ytcfg))
2234                         break
2235         comments = []
2236         known_entry_comment_renderers = ('itemSectionRenderer',)
2237         estimated_total = 0
2238         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2239
2240         try:
2241             for comment in _real_comment_extract(contents):
2242                 if len(comments) >= max_comments:
2243                     break
2244                 if isinstance(comment, int):
2245                     estimated_total = comment
2246                     continue
2247                 comments.append(comment)
2248         except KeyboardInterrupt:
2249             self.to_screen('Interrupted by user')
2250         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2251         return {
2252             'comments': comments,
2253             'comment_count': len(comments),
2254         }
2255
2256     @staticmethod
2257     def _generate_player_context(sts=None):
2258         context = {
2259             'html5Preference': 'HTML5_PREF_WANTS',
2260         }
2261         if sts is not None:
2262             context['signatureTimestamp'] = sts
2263         return {
2264             'playbackContext': {
2265                 'contentPlaybackContext': context
2266             },
2267             'contentCheckOk': True
2268         }
2269
2270     @staticmethod
2271     def _get_video_info_params(video_id, client='TVHTML5'):
2272         GVI_CLIENTS = {
2273             'ANDROID': {
2274                 'c': 'ANDROID',
2275                 'cver': '16.20',
2276             },
2277             'TVHTML5': {
2278                 'c': 'TVHTML5',
2279                 'cver': '6.20180913',
2280             }
2281         }
2282         query = {
2283             'video_id': video_id,
2284             'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2285             'html5': '1'
2286         }
2287         query.update(GVI_CLIENTS.get(client))
2288         return query
2289
2290     def _real_extract(self, url):
2291         url, smuggled_data = unsmuggle_url(url, {})
2292         video_id = self._match_id(url)
2293
2294         is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2295
2296         base_url = self.http_scheme() + '//www.youtube.com/'
2297         webpage_url = base_url + 'watch?v=' + video_id
2298         webpage = self._download_webpage(
2299             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2300
2301         ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2302         identity_token = self._extract_identity_token(webpage, video_id)
2303         session_index = self._extract_session_index(ytcfg)
2304         player_url = self._extract_player_url(ytcfg, webpage)
2305
2306         player_client = self._configuration_arg('player_client', [''])[0]
2307         if player_client not in ('web', 'android', ''):
2308             self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2309         force_mobile_client = player_client != 'web'
2310         player_skip = self._configuration_arg('player_skip')
2311         player_response = None
2312         if webpage:
2313             player_response = self._extract_yt_initial_variable(
2314                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2315                 video_id, 'initial player response')
2316
2317         syncid = self._extract_account_syncid(ytcfg, player_response)
2318         headers = self._generate_api_headers(ytcfg, identity_token, syncid, session_index=session_index)
2319
2320         ytm_streaming_data = {}
2321         if is_music_url:
2322             ytm_webpage = None
2323             sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2324             if sts and not force_mobile_client and 'configs' not in player_skip:
2325                 ytm_webpage = self._download_webpage(
2326                     'https://music.youtube.com',
2327                     video_id, fatal=False, note='Downloading remix client config')
2328
2329             ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2330             ytm_client = 'WEB_REMIX'
2331             if not sts or force_mobile_client:
2332                 # Android client already has signature descrambled
2333                 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2334                 if not sts:
2335                     self.report_warning('Falling back to android remix client for player API.')
2336                 ytm_client = 'ANDROID_MUSIC'
2337                 ytm_cfg = {}
2338
2339             ytm_headers = self._generate_api_headers(
2340                 ytm_cfg, identity_token, syncid,
2341                 client=ytm_client, session_index=session_index)
2342             ytm_query = {'videoId': video_id}
2343             ytm_query.update(self._generate_player_context(sts))
2344
2345             ytm_player_response = self._extract_response(
2346                 item_id=video_id, ep='player', query=ytm_query,
2347                 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2348                 default_client=ytm_client,
2349                 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2350             ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
2351
2352         if not player_response or force_mobile_client:
2353             sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2354             yt_client = 'WEB'
2355             ytpcfg = ytcfg
2356             ytp_headers = headers
2357             if not sts or force_mobile_client:
2358                 # Android client already has signature descrambled
2359                 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2360                 if not sts:
2361                     self.report_warning('Falling back to android client for player API.')
2362                 yt_client = 'ANDROID'
2363                 ytpcfg = {}
2364                 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid,
2365                                                          client=yt_client, session_index=session_index)
2366
2367             yt_query = {'videoId': video_id}
2368             yt_query.update(self._generate_player_context(sts))
2369             player_response = self._extract_response(
2370                 item_id=video_id, ep='player', query=yt_query,
2371                 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2372                 default_client=yt_client,
2373                 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2374             ) or player_response
2375
2376         # Age-gate workarounds
2377         playability_status = player_response.get('playabilityStatus') or {}
2378         if playability_status.get('reason') in self._AGE_GATE_REASONS:
2379             gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2380             for gvi_client in gvi_clients:
2381                 pr = self._parse_json(try_get(compat_parse_qs(
2382                     self._download_webpage(
2383                         base_url + 'get_video_info', video_id,
2384                         'Refetching age-gated %s info webpage' % gvi_client.lower(),
2385                         'unable to download video info webpage', fatal=False,
2386                         query=self._get_video_info_params(video_id, client=gvi_client))),
2387                     lambda x: x['player_response'][0],
2388                     compat_str) or '{}', video_id)
2389                 if pr:
2390                     break
2391             if not pr:
2392                 self.report_warning('Falling back to embedded-only age-gate workaround.')
2393                 embed_webpage = None
2394                 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2395                 if sts and not force_mobile_client and 'configs' not in player_skip:
2396                     embed_webpage = self._download_webpage(
2397                         'https://www.youtube.com/embed/%s?html5=1' % video_id,
2398                         video_id=video_id, note='Downloading age-gated embed config')
2399
2400                 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2401                 # If we extracted the embed webpage, it'll tell us if we can view the video
2402                 embedded_pr = self._parse_json(
2403                     try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2404                     video_id=video_id)
2405                 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2406                 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2407                     yt_client = 'WEB_EMBEDDED_PLAYER'
2408                     if not sts or force_mobile_client:
2409                         # Android client already has signature descrambled
2410                         # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2411                         if not sts:
2412                             self.report_warning(
2413                                 'Falling back to android embedded client for player API (note: some formats may be missing).')
2414                         yt_client = 'ANDROID_EMBEDDED_PLAYER'
2415                         ytcfg_age = {}
2416
2417                     ytage_headers = self._generate_api_headers(
2418                         ytcfg_age, identity_token, syncid,
2419                         client=yt_client, session_index=session_index)
2420                     yt_age_query = {'videoId': video_id}
2421                     yt_age_query.update(self._generate_player_context(sts))
2422                     pr = self._extract_response(
2423                         item_id=video_id, ep='player', query=yt_age_query,
2424                         ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2425                         default_client=yt_client,
2426                         note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
2427                     ) or {}
2428
2429             if pr:
2430                 player_response = pr
2431
2432         trailer_video_id = try_get(
2433             playability_status,
2434             lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2435             compat_str)
2436         if trailer_video_id:
2437             return self.url_result(
2438                 trailer_video_id, self.ie_key(), trailer_video_id)
2439
2440         search_meta = (
2441             lambda x: self._html_search_meta(x, webpage, default=None)) \
2442             if webpage else lambda x: None
2443
2444         video_details = player_response.get('videoDetails') or {}
2445         microformat = try_get(
2446             player_response,
2447             lambda x: x['microformat']['playerMicroformatRenderer'],
2448             dict) or {}
2449         video_title = video_details.get('title') \
2450             or self._get_text(microformat.get('title')) \
2451             or search_meta(['og:title', 'twitter:title', 'title'])
2452         video_description = video_details.get('shortDescription')
2453
2454         if not smuggled_data.get('force_singlefeed', False):
2455             if not self.get_param('noplaylist'):
2456                 multifeed_metadata_list = try_get(
2457                     player_response,
2458                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2459                     compat_str)
2460                 if multifeed_metadata_list:
2461                     entries = []
2462                     feed_ids = []
2463                     for feed in multifeed_metadata_list.split(','):
2464                         # Unquote should take place before split on comma (,) since textual
2465                         # fields may contain comma as well (see
2466                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2467                         feed_data = compat_parse_qs(
2468                             compat_urllib_parse_unquote_plus(feed))
2469
2470                         def feed_entry(name):
2471                             return try_get(
2472                                 feed_data, lambda x: x[name][0], compat_str)
2473
2474                         feed_id = feed_entry('id')
2475                         if not feed_id:
2476                             continue
2477                         feed_title = feed_entry('title')
2478                         title = video_title
2479                         if feed_title:
2480                             title += ' (%s)' % feed_title
2481                         entries.append({
2482                             '_type': 'url_transparent',
2483                             'ie_key': 'Youtube',
2484                             'url': smuggle_url(
2485                                 base_url + 'watch?v=' + feed_data['id'][0],
2486                                 {'force_singlefeed': True}),
2487                             'title': title,
2488                         })
2489                         feed_ids.append(feed_id)
2490                     self.to_screen(
2491                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2492                         % (', '.join(feed_ids), video_id))
2493                     return self.playlist_result(
2494                         entries, video_id, video_title, video_description)
2495             else:
2496                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2497
2498         formats, itags, stream_ids = [], [], []
2499         itag_qualities = {}
2500         q = qualities([
2501             # "tiny" is the smallest video-only format. But some audio-only formats
2502             # was also labeled "tiny". It is not clear if such formats still exist
2503             'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2504             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2505         ])
2506
2507         streaming_data = player_response.get('streamingData') or {}
2508         streaming_formats = streaming_data.get('formats') or []
2509         streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
2510         streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2511         streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2512
2513         for fmt in streaming_formats:
2514             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2515                 continue
2516
2517             itag = str_or_none(fmt.get('itag'))
2518             audio_track = fmt.get('audioTrack') or {}
2519             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2520             if stream_id in stream_ids:
2521                 continue
2522
2523             quality = fmt.get('quality')
2524             if quality == 'tiny' or not quality:
2525                 quality = fmt.get('audioQuality', '').lower() or quality
2526             if itag and quality:
2527                 itag_qualities[itag] = quality
2528             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2529             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2530             # number of fragment that would subsequently requested with (`&sq=N`)
2531             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2532                 continue
2533
2534             fmt_url = fmt.get('url')
2535             if not fmt_url:
2536                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2537                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2538                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2539                 if not (sc and fmt_url and encrypted_sig):
2540                     continue
2541                 if not player_url:
2542                     continue
2543                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2544                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2545                 fmt_url += '&' + sp + '=' + signature
2546
2547             if itag:
2548                 itags.append(itag)
2549                 stream_ids.append(stream_id)
2550
2551             tbr = float_or_none(
2552                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2553             dct = {
2554                 'asr': int_or_none(fmt.get('audioSampleRate')),
2555                 'filesize': int_or_none(fmt.get('contentLength')),
2556                 'format_id': itag,
2557                 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
2558                 'fps': int_or_none(fmt.get('fps')),
2559                 'height': int_or_none(fmt.get('height')),
2560                 'quality': q(quality),
2561                 'tbr': tbr,
2562                 'url': fmt_url,
2563                 'width': fmt.get('width'),
2564                 'language': audio_track.get('id', '').split('.')[0],
2565             }
2566             mime_mobj = re.match(
2567                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2568             if mime_mobj:
2569                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2570                 dct.update(parse_codecs(mime_mobj.group(2)))
2571                 # The 3gp format in android client has a quality of "small",
2572                 # but is actually worse than all other formats
2573                 if dct['ext'] == '3gp':
2574                     dct['quality'] = q('tiny')
2575             no_audio = dct.get('acodec') == 'none'
2576             no_video = dct.get('vcodec') == 'none'
2577             if no_audio:
2578                 dct['vbr'] = tbr
2579             if no_video:
2580                 dct['abr'] = tbr
2581             if no_audio or no_video:
2582                 dct['downloader_options'] = {
2583                     # Youtube throttles chunks >~10M
2584                     'http_chunk_size': 10485760,
2585                 }
2586                 if dct.get('ext'):
2587                     dct['container'] = dct['ext'] + '_dash'
2588             formats.append(dct)
2589
2590         skip_manifests = self._configuration_arg('skip')
2591         get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2592         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2593
2594         for sd in (streaming_data, ytm_streaming_data):
2595             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2596             if hls_manifest_url:
2597                 for f in self._extract_m3u8_formats(
2598                         hls_manifest_url, video_id, 'mp4', fatal=False):
2599                     itag = self._search_regex(
2600                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2601                     if itag:
2602                         f['format_id'] = itag
2603                     formats.append(f)
2604
2605             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2606             if dash_manifest_url:
2607                 for f in self._extract_mpd_formats(
2608                         dash_manifest_url, video_id, fatal=False):
2609                     itag = f['format_id']
2610                     if itag in itags:
2611                         continue
2612                     if itag in itag_qualities:
2613                         f['quality'] = q(itag_qualities[itag])
2614                     filesize = int_or_none(self._search_regex(
2615                         r'/clen/(\d+)', f.get('fragment_base_url')
2616                         or f['url'], 'file size', default=None))
2617                     if filesize:
2618                         f['filesize'] = filesize
2619                     formats.append(f)
2620
2621         if not formats:
2622             if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
2623                 self.raise_no_formats(
2624                     'This video is DRM protected.', expected=True)
2625             pemr = try_get(
2626                 playability_status,
2627                 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2628                 dict) or {}
2629             reason = self._get_text(pemr.get('reason')) or playability_status.get('reason')
2630             subreason = pemr.get('subreason')
2631             if subreason:
2632                 subreason = clean_html(self._get_text(subreason))
2633                 if subreason == 'The uploader has not made this video available in your country.':
2634                     countries = microformat.get('availableCountries')
2635                     if not countries:
2636                         regions_allowed = search_meta('regionsAllowed')
2637                         countries = regions_allowed.split(',') if regions_allowed else None
2638                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2639                 reason += '\n' + subreason
2640             if reason:
2641                 self.raise_no_formats(reason, expected=True)
2642
2643         self._sort_formats(formats)
2644
2645         keywords = video_details.get('keywords') or []
2646         if not keywords and webpage:
2647             keywords = [
2648                 unescapeHTML(m.group('content'))
2649                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2650         for keyword in keywords:
2651             if keyword.startswith('yt:stretch='):
2652                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2653                 if mobj:
2654                     # NB: float is intentional for forcing float division
2655                     w, h = (float(v) for v in mobj.groups())
2656                     if w > 0 and h > 0:
2657                         ratio = w / h
2658                         for f in formats:
2659                             if f.get('vcodec') != 'none':
2660                                 f['stretched_ratio'] = ratio
2661                         break
2662
2663         category = microformat.get('category') or search_meta('genre')
2664         channel_id = video_details.get('channelId') \
2665             or microformat.get('externalChannelId') \
2666             or search_meta('channelId')
2667         duration = int_or_none(
2668             video_details.get('lengthSeconds')
2669             or microformat.get('lengthSeconds')) \
2670             or parse_duration(search_meta('duration'))
2671         is_live = video_details.get('isLive')
2672         is_upcoming = video_details.get('isUpcoming')
2673         owner_profile_url = microformat.get('ownerProfileUrl')
2674
2675         thumbnails = []
2676         for container in (video_details, microformat):
2677             for thumbnail in (try_get(
2678                     container,
2679                     lambda x: x['thumbnail']['thumbnails'], list) or []):
2680                 thumbnail_url = thumbnail.get('url')
2681                 if not thumbnail_url:
2682                     continue
2683                 # Sometimes youtube gives a wrong thumbnail URL. See:
2684                 # https://github.com/yt-dlp/yt-dlp/issues/233
2685                 # https://github.com/ytdl-org/youtube-dl/issues/28023
2686                 if 'maxresdefault' in thumbnail_url:
2687                     thumbnail_url = thumbnail_url.split('?')[0]
2688                 thumbnails.append({
2689                     'url': thumbnail_url,
2690                     'height': int_or_none(thumbnail.get('height')),
2691                     'width': int_or_none(thumbnail.get('width')),
2692                 })
2693         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2694         if thumbnail_url:
2695             thumbnails.append({
2696                 'url': thumbnail_url,
2697             })
2698         # The best resolution thumbnails sometimes does not appear in the webpage
2699         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2700         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2701         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2702         guaranteed_thumbnail_names = [
2703             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2704             'mqdefault', 'mq1', 'mq2', 'mq3',
2705             'default', '1', '2', '3'
2706         ]
2707         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2708         n_thumbnail_names = len(thumbnail_names)
2709
2710         thumbnails.extend({
2711             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2712                 video_id=video_id, name=name, ext=ext,
2713                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2714             '_test_url': name in hq_thumbnail_names,
2715         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2716         for thumb in thumbnails:
2717             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2718             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2719         self._remove_duplicate_formats(thumbnails)
2720
2721         info = {
2722             'id': video_id,
2723             'title': self._live_title(video_title) if is_live else video_title,
2724             'formats': formats,
2725             'thumbnails': thumbnails,
2726             'description': video_description,
2727             'upload_date': unified_strdate(
2728                 microformat.get('uploadDate')
2729                 or search_meta('uploadDate')),
2730             'uploader': video_details['author'],
2731             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2732             'uploader_url': owner_profile_url,
2733             'channel_id': channel_id,
2734             'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2735             'duration': duration,
2736             'view_count': int_or_none(
2737                 video_details.get('viewCount')
2738                 or microformat.get('viewCount')
2739                 or search_meta('interactionCount')),
2740             'average_rating': float_or_none(video_details.get('averageRating')),
2741             'age_limit': 18 if (
2742                 microformat.get('isFamilySafe') is False
2743                 or search_meta('isFamilyFriendly') == 'false'
2744                 or search_meta('og:restrictions:age') == '18+') else 0,
2745             'webpage_url': webpage_url,
2746             'categories': [category] if category else None,
2747             'tags': keywords,
2748             'is_live': is_live,
2749             'playable_in_embed': playability_status.get('playableInEmbed'),
2750             'was_live': video_details.get('isLiveContent'),
2751         }
2752
2753         pctr = try_get(
2754             player_response,
2755             lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2756         subtitles = {}
2757         if pctr:
2758             def process_language(container, base_url, lang_code, sub_name, query):
2759                 lang_subs = container.setdefault(lang_code, [])
2760                 for fmt in self._SUBTITLE_FORMATS:
2761                     query.update({
2762                         'fmt': fmt,
2763                     })
2764                     lang_subs.append({
2765                         'ext': fmt,
2766                         'url': update_url_query(base_url, query),
2767                         'name': sub_name,
2768                     })
2769
2770             for caption_track in (pctr.get('captionTracks') or []):
2771                 base_url = caption_track.get('baseUrl')
2772                 if not base_url:
2773                     continue
2774                 if caption_track.get('kind') != 'asr':
2775                     lang_code = (
2776                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2777                         or caption_track.get('languageCode'))
2778                     if not lang_code:
2779                         continue
2780                     process_language(
2781                         subtitles, base_url, lang_code,
2782                         try_get(caption_track, lambda x: x['name']['simpleText']),
2783                         {})
2784                     continue
2785                 automatic_captions = {}
2786                 for translation_language in (pctr.get('translationLanguages') or []):
2787                     translation_language_code = translation_language.get('languageCode')
2788                     if not translation_language_code:
2789                         continue
2790                     process_language(
2791                         automatic_captions, base_url, translation_language_code,
2792                         self._get_text(translation_language.get('languageName'), max_runs=1),
2793                         {'tlang': translation_language_code})
2794                 info['automatic_captions'] = automatic_captions
2795         info['subtitles'] = subtitles
2796
2797         parsed_url = compat_urllib_parse_urlparse(url)
2798         for component in [parsed_url.fragment, parsed_url.query]:
2799             query = compat_parse_qs(component)
2800             for k, v in query.items():
2801                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2802                     d_k += '_time'
2803                     if d_k not in info and k in s_ks:
2804                         info[d_k] = parse_duration(query[k][0])
2805
2806         # Youtube Music Auto-generated description
2807         if video_description:
2808             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2809             if mobj:
2810                 release_year = mobj.group('release_year')
2811                 release_date = mobj.group('release_date')
2812                 if release_date:
2813                     release_date = release_date.replace('-', '')
2814                     if not release_year:
2815                         release_year = release_date[:4]
2816                 info.update({
2817                     'album': mobj.group('album'.strip()),
2818                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2819                     'track': mobj.group('track').strip(),
2820                     'release_date': release_date,
2821                     'release_year': int_or_none(release_year),
2822                 })
2823
2824         initial_data = None
2825         if webpage:
2826             initial_data = self._extract_yt_initial_variable(
2827                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2828                 'yt initial data')
2829         if not initial_data:
2830             initial_data = self._extract_response(
2831                 item_id=video_id, ep='next', fatal=False,
2832                 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2833                 note='Downloading initial data API JSON')
2834
2835         try:
2836             # This will error if there is no livechat
2837             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2838             info['subtitles']['live_chat'] = [{
2839                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2840                 'video_id': video_id,
2841                 'ext': 'json',
2842                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2843             }]
2844         except (KeyError, IndexError, TypeError):
2845             pass
2846
2847         if initial_data:
2848             info['chapters'] = (
2849                 self._extract_chapters_from_json(initial_data, duration)
2850                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2851                 or None)
2852
2853             contents = try_get(
2854                 initial_data,
2855                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2856                 list) or []
2857             for content in contents:
2858                 vpir = content.get('videoPrimaryInfoRenderer')
2859                 if vpir:
2860                     stl = vpir.get('superTitleLink')
2861                     if stl:
2862                         stl = self._get_text(stl)
2863                         if try_get(
2864                                 vpir,
2865                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2866                             info['location'] = stl
2867                         else:
2868                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2869                             if mobj:
2870                                 info.update({
2871                                     'series': mobj.group(1),
2872                                     'season_number': int(mobj.group(2)),
2873                                     'episode_number': int(mobj.group(3)),
2874                                 })
2875                     for tlb in (try_get(
2876                             vpir,
2877                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2878                             list) or []):
2879                         tbr = tlb.get('toggleButtonRenderer') or {}
2880                         for getter, regex in [(
2881                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2882                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2883                                     lambda x: x['accessibility'],
2884                                     lambda x: x['accessibilityData']['accessibilityData'],
2885                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2886                             label = (try_get(tbr, getter, dict) or {}).get('label')
2887                             if label:
2888                                 mobj = re.match(regex, label)
2889                                 if mobj:
2890                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2891                                     break
2892                     sbr_tooltip = try_get(
2893                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2894                     if sbr_tooltip:
2895                         like_count, dislike_count = sbr_tooltip.split(' / ')
2896                         info.update({
2897                             'like_count': str_to_int(like_count),
2898                             'dislike_count': str_to_int(dislike_count),
2899                         })
2900                 vsir = content.get('videoSecondaryInfoRenderer')
2901                 if vsir:
2902                     info['channel'] = self._get_text(try_get(
2903                         vsir,
2904                         lambda x: x['owner']['videoOwnerRenderer']['title'],
2905                         dict))
2906                     rows = try_get(
2907                         vsir,
2908                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2909                         list) or []
2910                     multiple_songs = False
2911                     for row in rows:
2912                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2913                             multiple_songs = True
2914                             break
2915                     for row in rows:
2916                         mrr = row.get('metadataRowRenderer') or {}
2917                         mrr_title = mrr.get('title')
2918                         if not mrr_title:
2919                             continue
2920                         mrr_title = self._get_text(mrr['title'])
2921                         mrr_contents_text = self._get_text(mrr['contents'][0])
2922                         if mrr_title == 'License':
2923                             info['license'] = mrr_contents_text
2924                         elif not multiple_songs:
2925                             if mrr_title == 'Album':
2926                                 info['album'] = mrr_contents_text
2927                             elif mrr_title == 'Artist':
2928                                 info['artist'] = mrr_contents_text
2929                             elif mrr_title == 'Song':
2930                                 info['track'] = mrr_contents_text
2931
2932         fallbacks = {
2933             'channel': 'uploader',
2934             'channel_id': 'uploader_id',
2935             'channel_url': 'uploader_url',
2936         }
2937         for to, frm in fallbacks.items():
2938             if not info.get(to):
2939                 info[to] = info.get(frm)
2940
2941         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2942             v = info.get(s_k)
2943             if v:
2944                 info[d_k] = v
2945
2946         is_private = bool_or_none(video_details.get('isPrivate'))
2947         is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2948         is_membersonly = None
2949         is_premium = None
2950         if initial_data and is_private is not None:
2951             is_membersonly = False
2952             is_premium = False
2953             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2954             badge_labels = set()
2955             for content in contents:
2956                 if not isinstance(content, dict):
2957                     continue
2958                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
2959             for badge_label in badge_labels:
2960                 if badge_label.lower() == 'members only':
2961                     is_membersonly = True
2962                 elif badge_label.lower() == 'premium':
2963                     is_premium = True
2964                 elif badge_label.lower() == 'unlisted':
2965                     is_unlisted = True
2966
2967         info['availability'] = self._availability(
2968             is_private=is_private,
2969             needs_premium=is_premium,
2970             needs_subscription=is_membersonly,
2971             needs_auth=info['age_limit'] >= 18,
2972             is_unlisted=None if is_private is None else is_unlisted)
2973
2974         # get xsrf for annotations or comments
2975         get_annotations = self.get_param('writeannotations', False)
2976         get_comments = self.get_param('getcomments', False)
2977         if get_annotations or get_comments:
2978             xsrf_token = None
2979             ytcfg = self._extract_ytcfg(video_id, webpage)
2980             if ytcfg:
2981                 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2982             if not xsrf_token:
2983                 xsrf_token = self._search_regex(
2984                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2985                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
2986
2987         # annotations
2988         if get_annotations:
2989             invideo_url = try_get(
2990                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2991             if xsrf_token and invideo_url:
2992                 xsrf_field_name = None
2993                 if ytcfg:
2994                     xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2995                 if not xsrf_field_name:
2996                     xsrf_field_name = self._search_regex(
2997                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2998                         webpage, 'xsrf field name',
2999                         group='xsrf_field_name', default='session_token')
3000                 info['annotations'] = self._download_webpage(
3001                     self._proto_relative_url(invideo_url),
3002                     video_id, note='Downloading annotations',
3003                     errnote='Unable to download video annotations', fatal=False,
3004                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3005
3006         if get_comments:
3007             info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
3008
3009         self.mark_watched(video_id, player_response)
3010
3011         return info
3012
3013
3014 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3015     IE_DESC = 'YouTube.com tab'
3016     _VALID_URL = r'''(?x)
3017                     https?://
3018                         (?:\w+\.)?
3019                         (?:
3020                             youtube(?:kids)?\.com|
3021                             invidio\.us
3022                         )/
3023                         (?:
3024                             (?P<channel_type>channel|c|user|browse)/|
3025                             (?P<not_channel>
3026                                 feed/|hashtag/|
3027                                 (?:playlist|watch)\?.*?\blist=
3028                             )|
3029                             (?!(?:%s)\b)  # Direct URLs
3030                         )
3031                         (?P<id>[^/?\#&]+)
3032                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3033     IE_NAME = 'youtube:tab'
3034
3035     _TESTS = [{
3036         'note': 'playlists, multipage',
3037         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3038         'playlist_mincount': 94,
3039         'info_dict': {
3040             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3041             'title': 'Игорь Клейнер - Playlists',
3042             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3043             'uploader': 'Игорь Клейнер',
3044             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3045         },
3046     }, {
3047         'note': 'playlists, multipage, different order',
3048         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3049         'playlist_mincount': 94,
3050         'info_dict': {
3051             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3052             'title': 'Игорь Клейнер - Playlists',
3053             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3054             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3055             'uploader': 'Игорь Клейнер',
3056         },
3057     }, {
3058         'note': 'playlists, series',
3059         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3060         'playlist_mincount': 5,
3061         'info_dict': {
3062             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3063             'title': '3Blue1Brown - Playlists',
3064             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3065             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3066             'uploader': '3Blue1Brown',
3067         },
3068     }, {
3069         'note': 'playlists, singlepage',
3070         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3071         'playlist_mincount': 4,
3072         'info_dict': {
3073             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3074             'title': 'ThirstForScience - Playlists',
3075             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3076             'uploader': 'ThirstForScience',
3077             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3078         }
3079     }, {
3080         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3081         'only_matching': True,
3082     }, {
3083         'note': 'basic, single video playlist',
3084         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3085         'info_dict': {
3086             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3087             'uploader': 'Sergey M.',
3088             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3089             'title': 'youtube-dl public playlist',
3090         },
3091         'playlist_count': 1,
3092     }, {
3093         'note': 'empty playlist',
3094         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3095         'info_dict': {
3096             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3097             'uploader': 'Sergey M.',
3098             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3099             'title': 'youtube-dl empty playlist',
3100         },
3101         'playlist_count': 0,
3102     }, {
3103         'note': 'Home tab',
3104         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3105         'info_dict': {
3106             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3107             'title': 'lex will - Home',
3108             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3109             'uploader': 'lex will',
3110             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3111         },
3112         'playlist_mincount': 2,
3113     }, {
3114         'note': 'Videos tab',
3115         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3116         'info_dict': {
3117             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3118             'title': 'lex will - Videos',
3119             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3120             'uploader': 'lex will',
3121             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3122         },
3123         'playlist_mincount': 975,
3124     }, {
3125         'note': 'Videos tab, sorted by popular',
3126         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3127         'info_dict': {
3128             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3129             'title': 'lex will - Videos',
3130             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3131             'uploader': 'lex will',
3132             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3133         },
3134         'playlist_mincount': 199,
3135     }, {
3136         'note': 'Playlists tab',
3137         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3138         'info_dict': {
3139             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3140             'title': 'lex will - Playlists',
3141             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3142             'uploader': 'lex will',
3143             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3144         },
3145         'playlist_mincount': 17,
3146     }, {
3147         'note': 'Community tab',
3148         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3149         'info_dict': {
3150             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3151             'title': 'lex will - Community',
3152             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3153             'uploader': 'lex will',
3154             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3155         },
3156         'playlist_mincount': 18,
3157     }, {
3158         'note': 'Channels tab',
3159         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3160         'info_dict': {
3161             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3162             'title': 'lex will - Channels',
3163             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3164             'uploader': 'lex will',
3165             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3166         },
3167         'playlist_mincount': 12,
3168     }, {
3169         'note': 'Search tab',
3170         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3171         'playlist_mincount': 40,
3172         'info_dict': {
3173             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3174             'title': '3Blue1Brown - Search - linear algebra',
3175             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3176             'uploader': '3Blue1Brown',
3177             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3178         },
3179     }, {
3180         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3181         'only_matching': True,
3182     }, {
3183         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3184         'only_matching': True,
3185     }, {
3186         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3187         'only_matching': True,
3188     }, {
3189         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3190         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3191         'info_dict': {
3192             'title': '29C3: Not my department',
3193             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3194             'uploader': 'Christiaan008',
3195             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3196             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3197         },
3198         'playlist_count': 96,
3199     }, {
3200         'note': 'Large playlist',
3201         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3202         'info_dict': {
3203             'title': 'Uploads from Cauchemar',
3204             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3205             'uploader': 'Cauchemar',
3206             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3207         },
3208         'playlist_mincount': 1123,
3209     }, {
3210         'note': 'even larger playlist, 8832 videos',
3211         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3212         'only_matching': True,
3213     }, {
3214         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3215         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3216         'info_dict': {
3217             'title': 'Uploads from Interstellar Movie',
3218             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3219             'uploader': 'Interstellar Movie',
3220             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3221         },
3222         'playlist_mincount': 21,
3223     }, {
3224         'note': 'Playlist with "show unavailable videos" button',
3225         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3226         'info_dict': {
3227             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3228             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3229             'uploader': 'Phim Siêu Nhân Nhật Bản',
3230             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3231         },
3232         'playlist_mincount': 200,
3233     }, {
3234         'note': 'Playlist with unavailable videos in page 7',
3235         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3236         'info_dict': {
3237             'title': 'Uploads from BlankTV',
3238             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3239             'uploader': 'BlankTV',
3240             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3241         },
3242         'playlist_mincount': 1000,
3243     }, {
3244         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3245         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3246         'info_dict': {
3247             'title': 'Data Analysis with Dr Mike Pound',
3248             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3249             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3250             'uploader': 'Computerphile',
3251             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3252         },
3253         'playlist_mincount': 11,
3254     }, {
3255         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3256         'only_matching': True,
3257     }, {
3258         'note': 'Playlist URL that does not actually serve a playlist',
3259         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3260         'info_dict': {
3261             'id': 'FqZTN594JQw',
3262             'ext': 'webm',
3263             'title': "Smiley's People 01 detective, Adventure Series, Action",
3264             'uploader': 'STREEM',
3265             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3266             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3267             'upload_date': '20150526',
3268             'license': 'Standard YouTube License',
3269             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3270             'categories': ['People & Blogs'],
3271             'tags': list,
3272             'view_count': int,
3273             'like_count': int,
3274             'dislike_count': int,
3275         },
3276         'params': {
3277             'skip_download': True,
3278         },
3279         'skip': 'This video is not available.',
3280         'add_ie': [YoutubeIE.ie_key()],
3281     }, {
3282         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3283         'only_matching': True,
3284     }, {
3285         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3286         'only_matching': True,
3287     }, {
3288         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3289         'info_dict': {
3290             'id': 'X1whbWASnNQ',  # This will keep changing
3291             'ext': 'mp4',
3292             'title': compat_str,
3293             'uploader': 'Sky News',
3294             'uploader_id': 'skynews',
3295             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3296             'upload_date': r're:\d{8}',
3297             'description': compat_str,
3298             'categories': ['News & Politics'],
3299             'tags': list,
3300             'like_count': int,
3301             'dislike_count': int,
3302         },
3303         'params': {
3304             'skip_download': True,
3305         },
3306         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3307     }, {
3308         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3309         'info_dict': {
3310             'id': 'a48o2S1cPoo',
3311             'ext': 'mp4',
3312             'title': 'The Young Turks - Live Main Show',
3313             'uploader': 'The Young Turks',
3314             'uploader_id': 'TheYoungTurks',
3315             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3316             'upload_date': '20150715',
3317             'license': 'Standard YouTube License',
3318             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3319             'categories': ['News & Politics'],
3320             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3321             'like_count': int,
3322             'dislike_count': int,
3323         },
3324         'params': {
3325             'skip_download': True,
3326         },
3327         'only_matching': True,
3328     }, {
3329         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3330         'only_matching': True,
3331     }, {
3332         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3333         'only_matching': True,
3334     }, {
3335         'note': 'A channel that is not live. Should raise error',
3336         'url': 'https://www.youtube.com/user/numberphile/live',
3337         'only_matching': True,
3338     }, {
3339         'url': 'https://www.youtube.com/feed/trending',
3340         'only_matching': True,
3341     }, {
3342         'url': 'https://www.youtube.com/feed/library',
3343         'only_matching': True,
3344     }, {
3345         'url': 'https://www.youtube.com/feed/history',
3346         'only_matching': True,
3347     }, {
3348         'url': 'https://www.youtube.com/feed/subscriptions',
3349         'only_matching': True,
3350     }, {
3351         'url': 'https://www.youtube.com/feed/watch_later',
3352         'only_matching': True,
3353     }, {
3354         'note': 'Recommended - redirects to home page',
3355         'url': 'https://www.youtube.com/feed/recommended',
3356         'only_matching': True,
3357     }, {
3358         'note': 'inline playlist with not always working continuations',
3359         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3360         'only_matching': True,
3361     }, {
3362         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3363         'only_matching': True,
3364     }, {
3365         'url': 'https://www.youtube.com/course',
3366         'only_matching': True,
3367     }, {
3368         'url': 'https://www.youtube.com/zsecurity',
3369         'only_matching': True,
3370     }, {
3371         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3372         'only_matching': True,
3373     }, {
3374         'url': 'https://www.youtube.com/TheYoungTurks/live',
3375         'only_matching': True,
3376     }, {
3377         'url': 'https://www.youtube.com/hashtag/cctv9',
3378         'info_dict': {
3379             'id': 'cctv9',
3380             'title': '#cctv9',
3381         },
3382         'playlist_mincount': 350,
3383     }, {
3384         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3385         'only_matching': True,
3386     }, {
3387         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3388         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3389         'only_matching': True
3390     }, {
3391         'note': '/browse/ should redirect to /channel/',
3392         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3393         'only_matching': True
3394     }, {
3395         'note': 'VLPL, should redirect to playlist?list=PL...',
3396         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3397         'info_dict': {
3398             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3399             'uploader': 'NoCopyrightSounds',
3400             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3401             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3402             'title': 'NCS Releases',
3403         },
3404         'playlist_mincount': 166,
3405     }, {
3406         'note': 'Topic, should redirect to playlist?list=UU...',
3407         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3408         'info_dict': {
3409             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3410             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3411             'title': 'Uploads from Royalty Free Music - Topic',
3412             'uploader': 'Royalty Free Music - Topic',
3413         },
3414         'expected_warnings': [
3415             'A channel/user page was given',
3416             'The URL does not have a videos tab',
3417         ],
3418         'playlist_mincount': 101,
3419     }, {
3420         'note': 'Topic without a UU playlist',
3421         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3422         'info_dict': {
3423             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3424             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3425         },
3426         'expected_warnings': [
3427             'A channel/user page was given',
3428             'The URL does not have a videos tab',
3429             'Falling back to channel URL',
3430         ],
3431         'playlist_mincount': 9,
3432     }, {
3433         'note': 'Youtube music Album',
3434         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3435         'info_dict': {
3436             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3437             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3438         },
3439         'playlist_count': 50,
3440     }, {
3441         'note': 'unlisted single video playlist',
3442         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3443         'info_dict': {
3444             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3445             'uploader': 'colethedj',
3446             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3447             'title': 'yt-dlp unlisted playlist test',
3448             'availability': 'unlisted'
3449         },
3450         'playlist_count': 1,
3451     }]
3452
3453     @classmethod
3454     def suitable(cls, url):
3455         return False if YoutubeIE.suitable(url) else super(
3456             YoutubeTabIE, cls).suitable(url)
3457
3458     def _extract_channel_id(self, webpage):
3459         channel_id = self._html_search_meta(
3460             'channelId', webpage, 'channel id', default=None)
3461         if channel_id:
3462             return channel_id
3463         channel_url = self._html_search_meta(
3464             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3465              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3466              'twitter:app:url:googleplay'), webpage, 'channel url')
3467         return self._search_regex(
3468             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3469             channel_url, 'channel id')
3470
3471     @staticmethod
3472     def _extract_basic_item_renderer(item):
3473         # Modified from _extract_grid_item_renderer
3474         known_basic_renderers = (
3475             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3476         )
3477         for key, renderer in item.items():
3478             if not isinstance(renderer, dict):
3479                 continue
3480             elif key in known_basic_renderers:
3481                 return renderer
3482             elif key.startswith('grid') and key.endswith('Renderer'):
3483                 return renderer
3484
3485     def _grid_entries(self, grid_renderer):
3486         for item in grid_renderer['items']:
3487             if not isinstance(item, dict):
3488                 continue
3489             renderer = self._extract_basic_item_renderer(item)
3490             if not isinstance(renderer, dict):
3491                 continue
3492             title = self._get_text(renderer.get('title'))
3493
3494             # playlist
3495             playlist_id = renderer.get('playlistId')
3496             if playlist_id:
3497                 yield self.url_result(
3498                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3499                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3500                     video_title=title)
3501                 continue
3502             # video
3503             video_id = renderer.get('videoId')
3504             if video_id:
3505                 yield self._extract_video(renderer)
3506                 continue
3507             # channel
3508             channel_id = renderer.get('channelId')
3509             if channel_id:
3510                 yield self.url_result(
3511                     'https://www.youtube.com/channel/%s' % channel_id,
3512                     ie=YoutubeTabIE.ie_key(), video_title=title)
3513                 continue
3514             # generic endpoint URL support
3515             ep_url = urljoin('https://www.youtube.com/', try_get(
3516                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3517                 compat_str))
3518             if ep_url:
3519                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3520                     if ie.suitable(ep_url):
3521                         yield self.url_result(
3522                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3523                         break
3524
3525     def _shelf_entries_from_content(self, shelf_renderer):
3526         content = shelf_renderer.get('content')
3527         if not isinstance(content, dict):
3528             return
3529         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3530         if renderer:
3531             # TODO: add support for nested playlists so each shelf is processed
3532             # as separate playlist
3533             # TODO: this includes only first N items
3534             for entry in self._grid_entries(renderer):
3535                 yield entry
3536         renderer = content.get('horizontalListRenderer')
3537         if renderer:
3538             # TODO
3539             pass
3540
3541     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3542         ep = try_get(
3543             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3544             compat_str)
3545         shelf_url = urljoin('https://www.youtube.com', ep)
3546         if shelf_url:
3547             # Skipping links to another channels, note that checking for
3548             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3549             # will not work
3550             if skip_channels and '/channels?' in shelf_url:
3551                 return
3552             title = self._get_text(shelf_renderer, lambda x: x['title'])
3553             yield self.url_result(shelf_url, video_title=title)
3554         # Shelf may not contain shelf URL, fallback to extraction from content
3555         for entry in self._shelf_entries_from_content(shelf_renderer):
3556             yield entry
3557
3558     def _playlist_entries(self, video_list_renderer):
3559         for content in video_list_renderer['contents']:
3560             if not isinstance(content, dict):
3561                 continue
3562             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3563             if not isinstance(renderer, dict):
3564                 continue
3565             video_id = renderer.get('videoId')
3566             if not video_id:
3567                 continue
3568             yield self._extract_video(renderer)
3569
3570     def _rich_entries(self, rich_grid_renderer):
3571         renderer = try_get(
3572             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3573         video_id = renderer.get('videoId')
3574         if not video_id:
3575             return
3576         yield self._extract_video(renderer)
3577
3578     def _video_entry(self, video_renderer):
3579         video_id = video_renderer.get('videoId')
3580         if video_id:
3581             return self._extract_video(video_renderer)
3582
3583     def _post_thread_entries(self, post_thread_renderer):
3584         post_renderer = try_get(
3585             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3586         if not post_renderer:
3587             return
3588         # video attachment
3589         video_renderer = try_get(
3590             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3591         video_id = video_renderer.get('videoId')
3592         if video_id:
3593             entry = self._extract_video(video_renderer)
3594             if entry:
3595                 yield entry
3596         # playlist attachment
3597         playlist_id = try_get(
3598             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3599         if playlist_id:
3600             yield self.url_result(
3601                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3602                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3603         # inline video links
3604         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3605         for run in runs:
3606             if not isinstance(run, dict):
3607                 continue
3608             ep_url = try_get(
3609                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3610             if not ep_url:
3611                 continue
3612             if not YoutubeIE.suitable(ep_url):
3613                 continue
3614             ep_video_id = YoutubeIE._match_id(ep_url)
3615             if video_id == ep_video_id:
3616                 continue
3617             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3618
3619     def _post_thread_continuation_entries(self, post_thread_continuation):
3620         contents = post_thread_continuation.get('contents')
3621         if not isinstance(contents, list):
3622             return
3623         for content in contents:
3624             renderer = content.get('backstagePostThreadRenderer')
3625             if not isinstance(renderer, dict):
3626                 continue
3627             for entry in self._post_thread_entries(renderer):
3628                 yield entry
3629
3630     r''' # unused
3631     def _rich_grid_entries(self, contents):
3632         for content in contents:
3633             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3634             if video_renderer:
3635                 entry = self._video_entry(video_renderer)
3636                 if entry:
3637                     yield entry
3638     '''
3639     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3640
3641         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3642             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3643             for content in contents:
3644                 if not isinstance(content, dict):
3645                     continue
3646                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3647                 if not is_renderer:
3648                     renderer = content.get('richItemRenderer')
3649                     if renderer:
3650                         for entry in self._rich_entries(renderer):
3651                             yield entry
3652                         continuation_list[0] = self._extract_continuation(parent_renderer)
3653                     continue
3654                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3655                 for isr_content in isr_contents:
3656                     if not isinstance(isr_content, dict):
3657                         continue
3658
3659                     known_renderers = {
3660                         'playlistVideoListRenderer': self._playlist_entries,
3661                         'gridRenderer': self._grid_entries,
3662                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3663                         'backstagePostThreadRenderer': self._post_thread_entries,
3664                         'videoRenderer': lambda x: [self._video_entry(x)],
3665                     }
3666                     for key, renderer in isr_content.items():
3667                         if key not in known_renderers:
3668                             continue
3669                         for entry in known_renderers[key](renderer):
3670                             if entry:
3671                                 yield entry
3672                         continuation_list[0] = self._extract_continuation(renderer)
3673                         break
3674
3675                 if not continuation_list[0]:
3676                     continuation_list[0] = self._extract_continuation(is_renderer)
3677
3678             if not continuation_list[0]:
3679                 continuation_list[0] = self._extract_continuation(parent_renderer)
3680
3681         continuation_list = [None]  # Python 2 doesnot support nonlocal
3682         tab_content = try_get(tab, lambda x: x['content'], dict)
3683         if not tab_content:
3684             return
3685         parent_renderer = (
3686             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3687             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3688         for entry in extract_entries(parent_renderer):
3689             yield entry
3690         continuation = continuation_list[0]
3691         visitor_data = None
3692
3693         for page_num in itertools.count(1):
3694             if not continuation:
3695                 break
3696             headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3697             response = self._extract_response(
3698                 item_id='%s page %s' % (item_id, page_num),
3699                 query=continuation, headers=headers, ytcfg=ytcfg,
3700                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3701
3702             if not response:
3703                 break
3704             visitor_data = try_get(
3705                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3706
3707             known_continuation_renderers = {
3708                 'playlistVideoListContinuation': self._playlist_entries,
3709                 'gridContinuation': self._grid_entries,
3710                 'itemSectionContinuation': self._post_thread_continuation_entries,
3711                 'sectionListContinuation': extract_entries,  # for feeds
3712             }
3713             continuation_contents = try_get(
3714                 response, lambda x: x['continuationContents'], dict) or {}
3715             continuation_renderer = None
3716             for key, value in continuation_contents.items():
3717                 if key not in known_continuation_renderers:
3718                     continue
3719                 continuation_renderer = value
3720                 continuation_list = [None]
3721                 for entry in known_continuation_renderers[key](continuation_renderer):
3722                     yield entry
3723                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3724                 break
3725             if continuation_renderer:
3726                 continue
3727
3728             known_renderers = {
3729                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3730                 'gridVideoRenderer': (self._grid_entries, 'items'),
3731                 'gridChannelRenderer': (self._grid_entries, 'items'),
3732                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3733                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3734                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3735                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3736             }
3737             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3738             continuation_items = try_get(
3739                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3740             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3741             video_items_renderer = None
3742             for key, value in continuation_item.items():
3743                 if key not in known_renderers:
3744                     continue
3745                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3746                 continuation_list = [None]
3747                 for entry in known_renderers[key][0](video_items_renderer):
3748                     yield entry
3749                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3750                 break
3751             if video_items_renderer:
3752                 continue
3753             break
3754
3755     @staticmethod
3756     def _extract_selected_tab(tabs):
3757         for tab in tabs:
3758             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3759             if renderer.get('selected') is True:
3760                 return renderer
3761         else:
3762             raise ExtractorError('Unable to find selected tab')
3763
3764     @classmethod
3765     def _extract_uploader(cls, data):
3766         uploader = {}
3767         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3768         owner = try_get(
3769             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3770         if owner:
3771             uploader['uploader'] = owner.get('text')
3772             uploader['uploader_id'] = try_get(
3773                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3774             uploader['uploader_url'] = urljoin(
3775                 'https://www.youtube.com/',
3776                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3777         return {k: v for k, v in uploader.items() if v is not None}
3778
3779     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3780         playlist_id = title = description = channel_url = channel_name = channel_id = None
3781         thumbnails_list = tags = []
3782
3783         selected_tab = self._extract_selected_tab(tabs)
3784         renderer = try_get(
3785             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3786         if renderer:
3787             channel_name = renderer.get('title')
3788             channel_url = renderer.get('channelUrl')
3789             channel_id = renderer.get('externalId')
3790         else:
3791             renderer = try_get(
3792                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3793
3794         if renderer:
3795             title = renderer.get('title')
3796             description = renderer.get('description', '')
3797             playlist_id = channel_id
3798             tags = renderer.get('keywords', '').split()
3799             thumbnails_list = (
3800                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3801                 or try_get(
3802                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3803                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3804                     list)
3805                 or [])
3806
3807         thumbnails = []
3808         for t in thumbnails_list:
3809             if not isinstance(t, dict):
3810                 continue
3811             thumbnail_url = url_or_none(t.get('url'))
3812             if not thumbnail_url:
3813                 continue
3814             thumbnails.append({
3815                 'url': thumbnail_url,
3816                 'width': int_or_none(t.get('width')),
3817                 'height': int_or_none(t.get('height')),
3818             })
3819         if playlist_id is None:
3820             playlist_id = item_id
3821         if title is None:
3822             title = (
3823                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3824                 or playlist_id)
3825         title += format_field(selected_tab, 'title', ' - %s')
3826         title += format_field(selected_tab, 'expandedText', ' - %s')
3827         metadata = {
3828             'playlist_id': playlist_id,
3829             'playlist_title': title,
3830             'playlist_description': description,
3831             'uploader': channel_name,
3832             'uploader_id': channel_id,
3833             'uploader_url': channel_url,
3834             'thumbnails': thumbnails,
3835             'tags': tags,
3836         }
3837         availability = self._extract_availability(data)
3838         if availability:
3839             metadata['availability'] = availability
3840         if not channel_id:
3841             metadata.update(self._extract_uploader(data))
3842         metadata.update({
3843             'channel': metadata['uploader'],
3844             'channel_id': metadata['uploader_id'],
3845             'channel_url': metadata['uploader_url']})
3846         ytcfg = self._extract_ytcfg(item_id, webpage)
3847         return self.playlist_result(
3848             self._entries(
3849                 selected_tab, playlist_id,
3850                 self._extract_identity_token(webpage, item_id),
3851                 self._extract_account_syncid(ytcfg, data), ytcfg),
3852             **metadata)
3853
3854     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3855         first_id = last_id = None
3856         ytcfg = self._extract_ytcfg(playlist_id, webpage)
3857         headers = self._generate_api_headers(
3858             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3859             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
3860         for page_num in itertools.count(1):
3861             videos = list(self._playlist_entries(playlist))
3862             if not videos:
3863                 return
3864             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3865             if start >= len(videos):
3866                 return
3867             for video in videos[start:]:
3868                 if video['id'] == first_id:
3869                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3870                     return
3871                 yield video
3872             first_id = first_id or videos[0]['id']
3873             last_id = videos[-1]['id']
3874             watch_endpoint = try_get(
3875                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3876             query = {
3877                 'playlistId': playlist_id,
3878                 'videoId': watch_endpoint.get('videoId') or last_id,
3879                 'index': watch_endpoint.get('index') or len(videos),
3880                 'params': watch_endpoint.get('params') or 'OAE%3D'
3881             }
3882             response = self._extract_response(
3883                 item_id='%s page %d' % (playlist_id, page_num),
3884                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3885                 check_get_keys='contents'
3886             )
3887             playlist = try_get(
3888                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3889
3890     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3891         title = playlist.get('title') or try_get(
3892             data, lambda x: x['titleText']['simpleText'], compat_str)
3893         playlist_id = playlist.get('playlistId') or item_id
3894
3895         # Delegating everything except mix playlists to regular tab-based playlist URL
3896         playlist_url = urljoin(url, try_get(
3897             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3898             compat_str))
3899         if playlist_url and playlist_url != url:
3900             return self.url_result(
3901                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3902                 video_title=title)
3903
3904         return self.playlist_result(
3905             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
3906             playlist_id=playlist_id, playlist_title=title)
3907
3908     def _extract_availability(self, data):
3909         """
3910         Gets the availability of a given playlist/tab.
3911         Note: Unless YouTube tells us explicitly, we do not assume it is public
3912         @param data: response
3913         """
3914         is_private = is_unlisted = None
3915         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3916         badge_labels = self._extract_badges(renderer)
3917
3918         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3919         privacy_dropdown_entries = try_get(
3920             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3921         for renderer_dict in privacy_dropdown_entries:
3922             is_selected = try_get(
3923                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3924             if not is_selected:
3925                 continue
3926             label = self._get_text(
3927                 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
3928             if label:
3929                 badge_labels.add(label.lower())
3930                 break
3931
3932         for badge_label in badge_labels:
3933             if badge_label == 'unlisted':
3934                 is_unlisted = True
3935             elif badge_label == 'private':
3936                 is_private = True
3937             elif badge_label == 'public':
3938                 is_unlisted = is_private = False
3939         return self._availability(is_private, False, False, False, is_unlisted)
3940
3941     @staticmethod
3942     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3943         sidebar_renderer = try_get(
3944             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3945         for item in sidebar_renderer:
3946             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3947             if renderer:
3948                 return renderer
3949
3950     def _reload_with_unavailable_videos(self, item_id, data, webpage):
3951         """
3952         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3953         """
3954         browse_id = params = None
3955         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3956         if not renderer:
3957             return
3958         menu_renderer = try_get(
3959             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3960         for menu_item in menu_renderer:
3961             if not isinstance(menu_item, dict):
3962                 continue
3963             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3964             text = try_get(
3965                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3966             if not text or text.lower() != 'show unavailable videos':
3967                 continue
3968             browse_endpoint = try_get(
3969                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3970             browse_id = browse_endpoint.get('browseId')
3971             params = browse_endpoint.get('params')
3972             break
3973
3974         ytcfg = self._extract_ytcfg(item_id, webpage)
3975         headers = self._generate_api_headers(
3976             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3977             identity_token=self._extract_identity_token(webpage, item_id=item_id),
3978             visitor_data=try_get(
3979                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3980         query = {
3981             'params': params or 'wgYCCAA=',
3982             'browseId': browse_id or 'VL%s' % item_id
3983         }
3984         return self._extract_response(
3985             item_id=item_id, headers=headers, query=query,
3986             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3987             note='Downloading API JSON with unavailable videos')
3988
3989     def _extract_webpage(self, url, item_id):
3990         retries = self.get_param('extractor_retries', 3)
3991         count = -1
3992         last_error = 'Incomplete yt initial data recieved'
3993         while count < retries:
3994             count += 1
3995             # Sometimes youtube returns a webpage with incomplete ytInitialData
3996             # See: https://github.com/yt-dlp/yt-dlp/issues/116
3997             if count:
3998                 self.report_warning('%s. Retrying ...' % last_error)
3999             webpage = self._download_webpage(
4000                 url, item_id,
4001                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4002             data = self._extract_yt_initial_data(item_id, webpage)
4003             if data.get('contents') or data.get('currentVideoEndpoint'):
4004                 break
4005             # Extract alerts here only when there is error
4006             self._extract_and_report_alerts(data)
4007             if count >= retries:
4008                 raise ExtractorError(last_error)
4009         return webpage, data
4010
4011     @staticmethod
4012     def _smuggle_data(entries, data):
4013         for entry in entries:
4014             if data:
4015                 entry['url'] = smuggle_url(entry['url'], data)
4016             yield entry
4017
4018     def _real_extract(self, url):
4019         url, smuggled_data = unsmuggle_url(url, {})
4020         if self.is_music_url(url):
4021             smuggled_data['is_music_url'] = True
4022         info_dict = self.__real_extract(url, smuggled_data)
4023         if info_dict.get('entries'):
4024             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4025         return info_dict
4026
4027     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4028
4029     def __real_extract(self, url, smuggled_data):
4030         item_id = self._match_id(url)
4031         url = compat_urlparse.urlunparse(
4032             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4033         compat_opts = self.get_param('compat_opts', [])
4034
4035         def get_mobj(url):
4036             mobj = self._url_re.match(url).groupdict()
4037             mobj.update((k, '') for k, v in mobj.items() if v is None)
4038             return mobj
4039
4040         mobj = get_mobj(url)
4041         # Youtube returns incomplete data if tabname is not lower case
4042         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4043
4044         if is_channel:
4045             if smuggled_data.get('is_music_url'):
4046                 if item_id[:2] == 'VL':
4047                     # Youtube music VL channels have an equivalent playlist
4048                     item_id = item_id[2:]
4049                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4050                 elif item_id[:2] == 'MP':
4051                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4052                     item_id = self._search_regex(
4053                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4054                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4055                         'playlist id')
4056                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4057                 elif mobj['channel_type'] == 'browse':
4058                     # Youtube music /browse/ should be changed to /channel/
4059                     pre = 'https://www.youtube.com/channel/%s' % item_id
4060         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4061             # Home URLs should redirect to /videos/
4062             self.report_warning(
4063                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4064                 'To download only the videos in the home page, add a "/featured" to the URL')
4065             tab = '/videos'
4066
4067         url = ''.join((pre, tab, post))
4068         mobj = get_mobj(url)
4069
4070         # Handle both video/playlist URLs
4071         qs = parse_qs(url)
4072         video_id = qs.get('v', [None])[0]
4073         playlist_id = qs.get('list', [None])[0]
4074
4075         if not video_id and mobj['not_channel'].startswith('watch'):
4076             if not playlist_id:
4077                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4078                 raise ExtractorError('Unable to recognize tab page')
4079             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4080             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4081             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4082             mobj = get_mobj(url)
4083
4084         if video_id and playlist_id:
4085             if self.get_param('noplaylist'):
4086                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4087                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4088             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4089
4090         webpage, data = self._extract_webpage(url, item_id)
4091
4092         tabs = try_get(
4093             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4094         if tabs:
4095             selected_tab = self._extract_selected_tab(tabs)
4096             tab_name = selected_tab.get('title', '')
4097             if 'no-youtube-channel-redirect' not in compat_opts:
4098                 if mobj['tab'] == '/live':
4099                     # Live tab should have redirected to the video
4100                     raise ExtractorError('The channel is not currently live', expected=True)
4101                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4102                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4103                         # Topic channels don't have /videos. Use the equivalent playlist instead
4104                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4105                         pl_id = 'UU%s' % item_id[2:]
4106                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4107                         try:
4108                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4109                             for alert_type, alert_message in self._extract_alerts(pl_data):
4110                                 if alert_type == 'error':
4111                                     raise ExtractorError('Youtube said: %s' % alert_message)
4112                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4113                         except ExtractorError:
4114                             self.report_warning('The playlist gave error. Falling back to channel URL')
4115                     else:
4116                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4117
4118         self.write_debug('Final URL: %s' % url)
4119
4120         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4121         if 'no-youtube-unavailable-videos' not in compat_opts:
4122             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4123         self._extract_and_report_alerts(data)
4124         tabs = try_get(
4125             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4126         if tabs:
4127             return self._extract_from_tabs(item_id, webpage, data, tabs)
4128
4129         playlist = try_get(
4130             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4131         if playlist:
4132             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4133
4134         video_id = try_get(
4135             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4136             compat_str) or video_id
4137         if video_id:
4138             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4139                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4140             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4141
4142         raise ExtractorError('Unable to recognize tab page')
4143
4144
4145 class YoutubePlaylistIE(InfoExtractor):
4146     IE_DESC = 'YouTube.com playlists'
4147     _VALID_URL = r'''(?x)(?:
4148                         (?:https?://)?
4149                         (?:\w+\.)?
4150                         (?:
4151                             (?:
4152                                 youtube(?:kids)?\.com|
4153                                 invidio\.us
4154                             )
4155                             /.*?\?.*?\blist=
4156                         )?
4157                         (?P<id>%(playlist_id)s)
4158                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4159     IE_NAME = 'youtube:playlist'
4160     _TESTS = [{
4161         'note': 'issue #673',
4162         'url': 'PLBB231211A4F62143',
4163         'info_dict': {
4164             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4165             'id': 'PLBB231211A4F62143',
4166             'uploader': 'Wickydoo',
4167             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4168         },
4169         'playlist_mincount': 29,
4170     }, {
4171         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4172         'info_dict': {
4173             'title': 'YDL_safe_search',
4174             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4175         },
4176         'playlist_count': 2,
4177         'skip': 'This playlist is private',
4178     }, {
4179         'note': 'embedded',
4180         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4181         'playlist_count': 4,
4182         'info_dict': {
4183             'title': 'JODA15',
4184             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4185             'uploader': 'milan',
4186             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4187         }
4188     }, {
4189         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4190         'playlist_mincount': 982,
4191         'info_dict': {
4192             'title': '2018 Chinese New Singles (11/6 updated)',
4193             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4194             'uploader': 'LBK',
4195             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4196         }
4197     }, {
4198         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4199         'only_matching': True,
4200     }, {
4201         # music album playlist
4202         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4203         'only_matching': True,
4204     }]
4205
4206     @classmethod
4207     def suitable(cls, url):
4208         if YoutubeTabIE.suitable(url):
4209             return False
4210         # Hack for lazy extractors until more generic solution is implemented
4211         # (see #28780)
4212         from .youtube import parse_qs
4213         qs = parse_qs(url)
4214         if qs.get('v', [None])[0]:
4215             return False
4216         return super(YoutubePlaylistIE, cls).suitable(url)
4217
4218     def _real_extract(self, url):
4219         playlist_id = self._match_id(url)
4220         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4221         url = update_url_query(
4222             'https://www.youtube.com/playlist',
4223             parse_qs(url) or {'list': playlist_id})
4224         if is_music_url:
4225             url = smuggle_url(url, {'is_music_url': True})
4226         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4227
4228
4229 class YoutubeYtBeIE(InfoExtractor):
4230     IE_DESC = 'youtu.be'
4231     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4232     _TESTS = [{
4233         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4234         'info_dict': {
4235             'id': 'yeWKywCrFtk',
4236             'ext': 'mp4',
4237             'title': 'Small Scale Baler and Braiding Rugs',
4238             'uploader': 'Backus-Page House Museum',
4239             'uploader_id': 'backuspagemuseum',
4240             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4241             'upload_date': '20161008',
4242             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4243             'categories': ['Nonprofits & Activism'],
4244             'tags': list,
4245             'like_count': int,
4246             'dislike_count': int,
4247         },
4248         'params': {
4249             'noplaylist': True,
4250             'skip_download': True,
4251         },
4252     }, {
4253         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4254         'only_matching': True,
4255     }]
4256
4257     def _real_extract(self, url):
4258         mobj = re.match(self._VALID_URL, url)
4259         video_id = mobj.group('id')
4260         playlist_id = mobj.group('playlist_id')
4261         return self.url_result(
4262             update_url_query('https://www.youtube.com/watch', {
4263                 'v': video_id,
4264                 'list': playlist_id,
4265                 'feature': 'youtu.be',
4266             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4267
4268
4269 class YoutubeYtUserIE(InfoExtractor):
4270     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4271     _VALID_URL = r'ytuser:(?P<id>.+)'
4272     _TESTS = [{
4273         'url': 'ytuser:phihag',
4274         'only_matching': True,
4275     }]
4276
4277     def _real_extract(self, url):
4278         user_id = self._match_id(url)
4279         return self.url_result(
4280             'https://www.youtube.com/user/%s' % user_id,
4281             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4282
4283
4284 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4285     IE_NAME = 'youtube:favorites'
4286     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4287     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4288     _LOGIN_REQUIRED = True
4289     _TESTS = [{
4290         'url': ':ytfav',
4291         'only_matching': True,
4292     }, {
4293         'url': ':ytfavorites',
4294         'only_matching': True,
4295     }]
4296
4297     def _real_extract(self, url):
4298         return self.url_result(
4299             'https://www.youtube.com/playlist?list=LL',
4300             ie=YoutubeTabIE.ie_key())
4301
4302
4303 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4304     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4305     # there doesn't appear to be a real limit, for example if you search for
4306     # 'python' you get more than 8.000.000 results
4307     _MAX_RESULTS = float('inf')
4308     IE_NAME = 'youtube:search'
4309     _SEARCH_KEY = 'ytsearch'
4310     _SEARCH_PARAMS = None
4311     _TESTS = []
4312
4313     def _entries(self, query, n):
4314         data = {'query': query}
4315         if self._SEARCH_PARAMS:
4316             data['params'] = self._SEARCH_PARAMS
4317         total = 0
4318         continuation = {}
4319         for page_num in itertools.count(1):
4320             data.update(continuation)
4321             search = self._extract_response(
4322                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4323                 check_get_keys=('contents', 'onResponseReceivedCommands')
4324             )
4325             if not search:
4326                 break
4327             slr_contents = try_get(
4328                 search,
4329                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4330                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4331                 list)
4332             if not slr_contents:
4333                 break
4334
4335             # Youtube sometimes adds promoted content to searches,
4336             # changing the index location of videos and token.
4337             # So we search through all entries till we find them.
4338             continuation = None
4339             for slr_content in slr_contents:
4340                 if not continuation:
4341                     continuation = self._extract_continuation({'contents': [slr_content]})
4342
4343                 isr_contents = try_get(
4344                     slr_content,
4345                     lambda x: x['itemSectionRenderer']['contents'],
4346                     list)
4347                 if not isr_contents:
4348                     continue
4349                 for content in isr_contents:
4350                     if not isinstance(content, dict):
4351                         continue
4352                     video = content.get('videoRenderer')
4353                     if not isinstance(video, dict):
4354                         continue
4355                     video_id = video.get('videoId')
4356                     if not video_id:
4357                         continue
4358
4359                     yield self._extract_video(video)
4360                     total += 1
4361                     if total == n:
4362                         return
4363
4364             if not continuation:
4365                 break
4366
4367     def _get_n_results(self, query, n):
4368         """Get a specified number of results for a query"""
4369         return self.playlist_result(self._entries(query, n), query)
4370
4371
4372 class YoutubeSearchDateIE(YoutubeSearchIE):
4373     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4374     _SEARCH_KEY = 'ytsearchdate'
4375     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4376     _SEARCH_PARAMS = 'CAI%3D'
4377
4378
4379 class YoutubeSearchURLIE(YoutubeSearchIE):
4380     IE_DESC = 'YouTube.com search URLs'
4381     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4382     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4383     # _MAX_RESULTS = 100
4384     _TESTS = [{
4385         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4386         'playlist_mincount': 5,
4387         'info_dict': {
4388             'title': 'youtube-dl test video',
4389         }
4390     }, {
4391         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4392         'only_matching': True,
4393     }]
4394
4395     @classmethod
4396     def _make_valid_url(cls):
4397         return cls._VALID_URL
4398
4399     def _real_extract(self, url):
4400         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4401         query = (qs.get('search_query') or qs.get('q'))[0]
4402         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4403         return self._get_n_results(query, self._MAX_RESULTS)
4404
4405
4406 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4407     """
4408     Base class for feed extractors
4409     Subclasses must define the _FEED_NAME property.
4410     """
4411     _LOGIN_REQUIRED = True
4412     _TESTS = []
4413
4414     @property
4415     def IE_NAME(self):
4416         return 'youtube:%s' % self._FEED_NAME
4417
4418     def _real_extract(self, url):
4419         return self.url_result(
4420             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4421             ie=YoutubeTabIE.ie_key())
4422
4423
4424 class YoutubeWatchLaterIE(InfoExtractor):
4425     IE_NAME = 'youtube:watchlater'
4426     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4427     _VALID_URL = r':ytwatchlater'
4428     _TESTS = [{
4429         'url': ':ytwatchlater',
4430         'only_matching': True,
4431     }]
4432
4433     def _real_extract(self, url):
4434         return self.url_result(
4435             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4436
4437
4438 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4439     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4440     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4441     _FEED_NAME = 'recommended'
4442     _LOGIN_REQUIRED = False
4443     _TESTS = [{
4444         'url': ':ytrec',
4445         'only_matching': True,
4446     }, {
4447         'url': ':ytrecommended',
4448         'only_matching': True,
4449     }, {
4450         'url': 'https://youtube.com',
4451         'only_matching': True,
4452     }]
4453
4454
4455 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4456     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4457     _VALID_URL = r':ytsub(?:scription)?s?'
4458     _FEED_NAME = 'subscriptions'
4459     _TESTS = [{
4460         'url': ':ytsubs',
4461         'only_matching': True,
4462     }, {
4463         'url': ':ytsubscriptions',
4464         'only_matching': True,
4465     }]
4466
4467
4468 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4469     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4470     _VALID_URL = r':ythis(?:tory)?'
4471     _FEED_NAME = 'history'
4472     _TESTS = [{
4473         'url': ':ythistory',
4474         'only_matching': True,
4475     }]
4476
4477
4478 class YoutubeTruncatedURLIE(InfoExtractor):
4479     IE_NAME = 'youtube:truncated_url'
4480     IE_DESC = False  # Do not list
4481     _VALID_URL = r'''(?x)
4482         (?:https?://)?
4483         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4484         (?:watch\?(?:
4485             feature=[a-z_]+|
4486             annotation_id=annotation_[^&]+|
4487             x-yt-cl=[0-9]+|
4488             hl=[^&]*|
4489             t=[0-9]+
4490         )?
4491         |
4492             attribution_link\?a=[^&]+
4493         )
4494         $
4495     '''
4496
4497     _TESTS = [{
4498         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4499         'only_matching': True,
4500     }, {
4501         'url': 'https://www.youtube.com/watch?',
4502         'only_matching': True,
4503     }, {
4504         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4505         'only_matching': True,
4506     }, {
4507         'url': 'https://www.youtube.com/watch?feature=foo',
4508         'only_matching': True,
4509     }, {
4510         'url': 'https://www.youtube.com/watch?hl=en-GB',
4511         'only_matching': True,
4512     }, {
4513         'url': 'https://www.youtube.com/watch?t=2372',
4514         'only_matching': True,
4515     }]
4516
4517     def _real_extract(self, url):
4518         raise ExtractorError(
4519             'Did you forget to quote the URL? Remember that & is a meta '
4520             'character in most shells, so you want to put the URL in quotes, '
4521             'like  youtube-dl '
4522             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4523             ' or simply  youtube-dl BaW_jenozKc  .',
4524             expected=True)
4525
4526
4527 class YoutubeTruncatedIDIE(InfoExtractor):
4528     IE_NAME = 'youtube:truncated_id'
4529     IE_DESC = False  # Do not list
4530     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4531
4532     _TESTS = [{
4533         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4534         'only_matching': True,
4535     }]
4536
4537     def _real_extract(self, url):
4538         video_id = self._match_id(url)
4539         raise ExtractorError(
4540             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4541             expected=True)