yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     is_html,
  42     mimetype2ext,
  43     network_exceptions,
  44     orderedSet,
  45     parse_codecs,
  46     parse_count,
  47     parse_duration,
  48     parse_iso8601,
  49     parse_qs,
  50     qualities,
  51     remove_end,
  52     remove_start,
  53     smuggle_url,
  54     str_or_none,
  55     str_to_int,
  56     traverse_obj,
  57     try_get,
  58     unescapeHTML,
  59     unified_strdate,
  60     unsmuggle_url,
  61     update_url_query,
  62     url_or_none,
  63     urljoin,
  64     variadic,
  65 )
  66
  67
  68 # any clients starting with _ cannot be explicity requested by the user
  69 INNERTUBE_CLIENTS = {
  70     'web': {
  71         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  72         'INNERTUBE_CONTEXT': {
  73             'client': {
  74                 'clientName': 'WEB',
  75                 'clientVersion': '2.20210622.10.00',
  76             }
  77         },
  78         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  79     },
  80     'web_embedded': {
  81         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  82         'INNERTUBE_CONTEXT': {
  83             'client': {
  84                 'clientName': 'WEB_EMBEDDED_PLAYER',
  85                 'clientVersion': '1.20210620.0.1',
  86             },
  87         },
  88         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  89     },
  90     'web_music': {
  91         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  92         'INNERTUBE_HOST': 'music.youtube.com',
  93         'INNERTUBE_CONTEXT': {
  94             'client': {
  95                 'clientName': 'WEB_REMIX',
  96                 'clientVersion': '1.20210621.00.00',
  97             }
  98         },
  99         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 100     },
 101     'web_creator': {
 102         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 103         'INNERTUBE_CONTEXT': {
 104             'client': {
 105                 'clientName': 'WEB_CREATOR',
 106                 'clientVersion': '1.20210621.00.00',
 107             }
 108         },
 109         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 110     },
 111     'android': {
 112         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 113         'INNERTUBE_CONTEXT': {
 114             'client': {
 115                 'clientName': 'ANDROID',
 116                 'clientVersion': '16.20',
 117             }
 118         },
 119         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 120         'REQUIRE_JS_PLAYER': False
 121     },
 122     'android_embedded': {
 123         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 124         'INNERTUBE_CONTEXT': {
 125             'client': {
 126                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 127                 'clientVersion': '16.20',
 128             },
 129         },
 130         'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
 131         'REQUIRE_JS_PLAYER': False
 132     },
 133     'android_music': {
 134         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 135         'INNERTUBE_HOST': 'music.youtube.com',
 136         'INNERTUBE_CONTEXT': {
 137             'client': {
 138                 'clientName': 'ANDROID_MUSIC',
 139                 'clientVersion': '4.32',
 140             }
 141         },
 142         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 143         'REQUIRE_JS_PLAYER': False
 144     },
 145     'android_creator': {
 146         'INNERTUBE_CONTEXT': {
 147             'client': {
 148                 'clientName': 'ANDROID_CREATOR',
 149                 'clientVersion': '21.24.100',
 150             },
 151         },
 152         'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
 153         'REQUIRE_JS_PLAYER': False
 154     },
 155     # ios has HLS live streams
 156     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 157     'ios': {
 158         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 159         'INNERTUBE_CONTEXT': {
 160             'client': {
 161                 'clientName': 'IOS',
 162                 'clientVersion': '16.20',
 163             }
 164         },
 165         'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
 166         'REQUIRE_JS_PLAYER': False
 167     },
 168     'ios_embedded': {
 169         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 170         'INNERTUBE_CONTEXT': {
 171             'client': {
 172                 'clientName': 'IOS_MESSAGES_EXTENSION',
 173                 'clientVersion': '16.20',
 174             },
 175         },
 176         'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
 177         'REQUIRE_JS_PLAYER': False
 178     },
 179     'ios_music': {
 180         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 181         'INNERTUBE_HOST': 'music.youtube.com',
 182         'INNERTUBE_CONTEXT': {
 183             'client': {
 184                 'clientName': 'IOS_MUSIC',
 185                 'clientVersion': '4.32',
 186             },
 187         },
 188         'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
 189         'REQUIRE_JS_PLAYER': False
 190     },
 191     'ios_creator': {
 192         'INNERTUBE_CONTEXT': {
 193             'client': {
 194                 'clientName': 'IOS_CREATOR',
 195                 'clientVersion': '21.24.100',
 196             },
 197         },
 198         'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
 199         'REQUIRE_JS_PLAYER': False
 200     },
 201     # mweb has 'ultralow' formats
 202     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 203     'mweb': {
 204         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 205         'INNERTUBE_CONTEXT': {
 206             'client': {
 207                 'clientName': 'MWEB',
 208                 'clientVersion': '2.20210721.07.00',
 209             }
 210         },
 211         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 212     },
 213 }
 214
 215
 216 def build_innertube_clients():
 217     third_party = {
 218         'embedUrl': 'https://google.com',  # Can be any valid URL
 219     }
 220     base_clients = ('android', 'web', 'ios', 'mweb')
 221     priority = qualities(base_clients[::-1])
 222
 223     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 224         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 225         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 226         ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
 227         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 228         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 229
 230         if client in base_clients:
 231             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 232             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 233             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 234             agegate_ytcfg['priority'] -= 1
 235         elif client.endswith('_embedded'):
 236             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 237             ytcfg['priority'] -= 2
 238         else:
 239             ytcfg['priority'] -= 3
 240
 241
 242 build_innertube_clients()
 243
 244
 245 class YoutubeBaseInfoExtractor(InfoExtractor):
 246     """Provide base functions for Youtube extractors"""
 247
 248     _RESERVED_NAMES = (
 249         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
 250         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 251         r'browse|oembed|get_video_info|iframe_api|s/player|'
 252         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 253
 254     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 255
 256     _NETRC_MACHINE = 'youtube'
 257
 258     # If True it will raise an error if no login info is provided
 259     _LOGIN_REQUIRED = False
 260
 261     r'''  # Unused since login is broken
 262     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 263     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
 264
 265     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
 266     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
 267     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 268     '''
 269
 270     def _login(self):
 271         """
 272         Attempt to log in to YouTube.
 273         True is returned if successful or skipped.
 274         False is returned if login failed.
 275
 276         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 277         """
 278
 279         def warn(message):
 280             self.report_warning(message)
 281
 282         # username+password login is broken
 283         if (self._LOGIN_REQUIRED
 284                 and self.get_param('cookiefile') is None
 285                 and self.get_param('cookiesfrombrowser') is None):
 286             self.raise_login_required(
 287                 'Login details are needed to download this content', method='cookies')
 288         username, password = self._get_login_info()
 289         if username:
 290             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 291         return
 292
 293         # Everything below this is broken!
 294         r'''
 295         # No authentication to be performed
 296         if username is None:
 297             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 298                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 299             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 300             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 301             return True
 302
 303         login_page = self._download_webpage(
 304             self._LOGIN_URL, None,
 305             note='Downloading login page',
 306             errnote='unable to fetch login page', fatal=False)
 307         if login_page is False:
 308             return
 309
 310         login_form = self._hidden_inputs(login_page)
 311
 312         def req(url, f_req, note, errnote):
 313             data = login_form.copy()
 314             data.update({
 315                 'pstMsg': 1,
 316                 'checkConnection': 'youtube',
 317                 'checkedDomains': 'youtube',
 318                 'hl': 'en',
 319                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 320                 'f.req': json.dumps(f_req),
 321                 'flowName': 'GlifWebSignIn',
 322                 'flowEntry': 'ServiceLogin',
 323                 # TODO: reverse actual botguard identifier generation algo
 324                 'bgRequest': '["identifier",""]',
 325             })
 326             return self._download_json(
 327                 url, None, note=note, errnote=errnote,
 328                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 329                 fatal=False,
 330                 data=urlencode_postdata(data), headers={
 331                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 332                     'Google-Accounts-XSRF': 1,
 333                 })
 334
 335         lookup_req = [
 336             username,
 337             None, [], None, 'US', None, None, 2, False, True,
 338             [
 339                 None, None,
 340                 [2, 1, None, 1,
 341                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 342                  None, [], 4],
 343                 1, [None, None, []], None, None, None, True
 344             ],
 345             username,
 346         ]
 347
 348         lookup_results = req(
 349             self._LOOKUP_URL, lookup_req,
 350             'Looking up account info', 'Unable to look up account info')
 351
 352         if lookup_results is False:
 353             return False
 354
 355         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 356         if not user_hash:
 357             warn('Unable to extract user hash')
 358             return False
 359
 360         challenge_req = [
 361             user_hash,
 362             None, 1, None, [1, None, None, None, [password, None, True]],
 363             [
 364                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 365                 1, [None, None, []], None, None, None, True
 366             ]]
 367
 368         challenge_results = req(
 369             self._CHALLENGE_URL, challenge_req,
 370             'Logging in', 'Unable to log in')
 371
 372         if challenge_results is False:
 373             return
 374
 375         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 376         if login_res:
 377             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 378             warn(
 379                 'Unable to login: %s' % 'Invalid password'
 380                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 381             return False
 382
 383         res = try_get(challenge_results, lambda x: x[0][-1], list)
 384         if not res:
 385             warn('Unable to extract result entry')
 386             return False
 387
 388         login_challenge = try_get(res, lambda x: x[0][0], list)
 389         if login_challenge:
 390             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 391             if challenge_str == 'TWO_STEP_VERIFICATION':
 392                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 393                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 394                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 395                 if status == 'QUOTA_EXCEEDED':
 396                     warn('Exceeded the limit of TFA codes, try later')
 397                     return False
 398
 399                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 400                 if not tl:
 401                     warn('Unable to extract TL')
 402                     return False
 403
 404                 tfa_code = self._get_tfa_info('2-step verification code')
 405
 406                 if not tfa_code:
 407                     warn(
 408                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 409                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 410                     return False
 411
 412                 tfa_code = remove_start(tfa_code, 'G-')
 413
 414                 tfa_req = [
 415                     user_hash, None, 2, None,
 416                     [
 417                         9, None, None, None, None, None, None, None,
 418                         [None, tfa_code, True, 2]
 419                     ]]
 420
 421                 tfa_results = req(
 422                     self._TFA_URL.format(tl), tfa_req,
 423                     'Submitting TFA code', 'Unable to submit TFA code')
 424
 425                 if tfa_results is False:
 426                     return False
 427
 428                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 429                 if tfa_res:
 430                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 431                     warn(
 432                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 433                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 434                     return False
 435
 436                 check_cookie_url = try_get(
 437                     tfa_results, lambda x: x[0][-1][2], compat_str)
 438             else:
 439                 CHALLENGES = {
 440                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 441                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 442                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 443                 }
 444                 challenge = CHALLENGES.get(
 445                     challenge_str,
 446                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 447                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 448                 return False
 449         else:
 450             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 451
 452         if not check_cookie_url:
 453             warn('Unable to extract CheckCookie URL')
 454             return False
 455
 456         check_cookie_results = self._download_webpage(
 457             check_cookie_url, None, 'Checking cookie', fatal=False)
 458
 459         if check_cookie_results is False:
 460             return False
 461
 462         if 'https://myaccount.google.com/' not in check_cookie_results:
 463             warn('Unable to log in')
 464             return False
 465
 466         return True
 467         '''
 468
 469     def _initialize_consent(self):
 470         cookies = self._get_cookies('https://www.youtube.com/')
 471         if cookies.get('__Secure-3PSID'):
 472             return
 473         consent_id = None
 474         consent = cookies.get('CONSENT')
 475         if consent:
 476             if 'YES' in consent.value:
 477                 return
 478             consent_id = self._search_regex(
 479                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 480         if not consent_id:
 481             consent_id = random.randint(100, 999)
 482         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 483
 484     def _real_initialize(self):
 485         self._initialize_consent()
 486         if self._downloader is None:
 487             return
 488         if not self._login():
 489             return
 490
 491     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 492     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 493     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 494
 495     def _get_default_ytcfg(self, client='web'):
 496         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 497
 498     def _get_innertube_host(self, client='web'):
 499         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 500
 501     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 502         # try_get but with fallback to default ytcfg client values when present
 503         _func = lambda y: try_get(y, getter, expected_type)
 504         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 505
 506     def _extract_client_name(self, ytcfg, default_client='web'):
 507         return self._ytcfg_get_safe(
 508             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 509                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 510
 511     @staticmethod
 512     def _extract_session_index(*data):
 513         for ytcfg in data:
 514             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 515             if session_index is not None:
 516                 return session_index
 517
 518     def _extract_client_version(self, ytcfg, default_client='web'):
 519         return self._ytcfg_get_safe(
 520             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 521                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 522
 523     def _extract_api_key(self, ytcfg=None, default_client='web'):
 524         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 525
 526     def _extract_context(self, ytcfg=None, default_client='web'):
 527         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 528         context = _get_context(ytcfg)
 529         if context:
 530             return context
 531
 532         context = _get_context(self._get_default_ytcfg(default_client))
 533         if not ytcfg:
 534             return context
 535
 536         # Recreate the client context (required)
 537         context['client'].update({
 538             'clientVersion': self._extract_client_version(ytcfg, default_client),
 539             'clientName': self._extract_client_name(ytcfg, default_client),
 540         })
 541         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 542         if visitor_data:
 543             context['client']['visitorData'] = visitor_data
 544         return context
 545
 546     _SAPISID = None
 547
 548     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 549         time_now = round(time.time())
 550         if self._SAPISID is None:
 551             yt_cookies = self._get_cookies('https://www.youtube.com')
 552             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 553             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 554             sapisid_cookie = dict_get(
 555                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 556             if sapisid_cookie and sapisid_cookie.value:
 557                 self._SAPISID = sapisid_cookie.value
 558                 self.write_debug('Extracted SAPISID cookie')
 559                 # SAPISID cookie is required if not already present
 560                 if not yt_cookies.get('SAPISID'):
 561                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 562                     self._set_cookie(
 563                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 564             else:
 565                 self._SAPISID = False
 566         if not self._SAPISID:
 567             return None
 568         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 569         sapisidhash = hashlib.sha1(
 570             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 571         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 572
 573     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 574                   note='Downloading API JSON', errnote='Unable to download API page',
 575                   context=None, api_key=None, api_hostname=None, default_client='web'):
 576
 577         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 578         data.update(query)
 579         real_headers = self.generate_api_headers(default_client=default_client)
 580         real_headers.update({'content-type': 'application/json'})
 581         if headers:
 582             real_headers.update(headers)
 583         return self._download_json(
 584             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 585             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 586             data=json.dumps(data).encode('utf8'), headers=real_headers,
 587             query={'key': api_key or self._extract_api_key()})
 588
 589     def extract_yt_initial_data(self, video_id, webpage):
 590         return self._parse_json(
 591             self._search_regex(
 592                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 593                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 594             video_id)
 595
 596     def _extract_identity_token(self, webpage, item_id):
 597         if not webpage:
 598             return None
 599         ytcfg = self.extract_ytcfg(item_id, webpage)
 600         if ytcfg:
 601             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 602             if token:
 603                 return token
 604         return self._search_regex(
 605             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 606             'identity token', default=None)
 607
 608     @staticmethod
 609     def _extract_account_syncid(*args):
 610         """
 611         Extract syncId required to download private playlists of secondary channels
 612         @params response and/or ytcfg
 613         """
 614         for data in args:
 615             # ytcfg includes channel_syncid if on secondary channel
 616             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 617             if delegated_sid:
 618                 return delegated_sid
 619             sync_ids = (try_get(
 620                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 621                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 622             if len(sync_ids) >= 2 and sync_ids[1]:
 623                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 624                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 625                 return sync_ids[0]
 626
 627     def extract_ytcfg(self, video_id, webpage):
 628         if not webpage:
 629             return {}
 630         return self._parse_json(
 631             self._search_regex(
 632                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 633                 default='{}'), video_id, fatal=False) or {}
 634
 635     def generate_api_headers(
 636             self, ytcfg=None, identity_token=None, account_syncid=None,
 637             visitor_data=None, api_hostname=None, default_client='web', session_index=None):
 638         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 639         headers = {
 640             'X-YouTube-Client-Name': compat_str(
 641                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 642             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 643             'Origin': origin
 644         }
 645         if not visitor_data and ytcfg:
 646             visitor_data = try_get(
 647                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 648         if identity_token:
 649             headers['X-Youtube-Identity-Token'] = identity_token
 650         if account_syncid:
 651             headers['X-Goog-PageId'] = account_syncid
 652         if session_index is None and ytcfg:
 653             session_index = self._extract_session_index(ytcfg)
 654         if account_syncid or session_index is not None:
 655             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 656         if visitor_data:
 657             headers['X-Goog-Visitor-Id'] = visitor_data
 658         auth = self._generate_sapisidhash_header(origin)
 659         if auth is not None:
 660             headers['Authorization'] = auth
 661             headers['X-Origin'] = origin
 662         return headers
 663
 664     @staticmethod
 665     def _build_api_continuation_query(continuation, ctp=None):
 666         query = {
 667             'continuation': continuation
 668         }
 669         # TODO: Inconsistency with clickTrackingParams.
 670         # Currently we have a fixed ctp contained within context (from ytcfg)
 671         # and a ctp in root query for continuation.
 672         if ctp:
 673             query['clickTracking'] = {'clickTrackingParams': ctp}
 674         return query
 675
 676     @classmethod
 677     def _extract_next_continuation_data(cls, renderer):
 678         next_continuation = try_get(
 679             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 680                        lambda x: x['continuation']['reloadContinuationData']), dict)
 681         if not next_continuation:
 682             return
 683         continuation = next_continuation.get('continuation')
 684         if not continuation:
 685             return
 686         ctp = next_continuation.get('clickTrackingParams')
 687         return cls._build_api_continuation_query(continuation, ctp)
 688
 689     @classmethod
 690     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 691         if isinstance(continuation_ep, dict):
 692             continuation = try_get(
 693                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 694             if not continuation:
 695                 return
 696             ctp = continuation_ep.get('clickTrackingParams')
 697             return cls._build_api_continuation_query(continuation, ctp)
 698
 699     @classmethod
 700     def _extract_continuation(cls, renderer):
 701         next_continuation = cls._extract_next_continuation_data(renderer)
 702         if next_continuation:
 703             return next_continuation
 704
 705         contents = []
 706         for key in ('contents', 'items'):
 707             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 708
 709         for content in contents:
 710             if not isinstance(content, dict):
 711                 continue
 712             continuation_ep = try_get(
 713                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 714                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 715                 dict)
 716             continuation = cls._extract_continuation_ep_data(continuation_ep)
 717             if continuation:
 718                 return continuation
 719
 720     @classmethod
 721     def _extract_alerts(cls, data):
 722         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 723             if not isinstance(alert_dict, dict):
 724                 continue
 725             for alert in alert_dict.values():
 726                 alert_type = alert.get('type')
 727                 if not alert_type:
 728                     continue
 729                 message = cls._get_text(alert, 'text')
 730                 if message:
 731                     yield alert_type, message
 732
 733     def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
 734         errors = []
 735         warnings = []
 736         for alert_type, alert_message in alerts:
 737             if alert_type.lower() == 'error' and fatal:
 738                 errors.append([alert_type, alert_message])
 739             else:
 740                 warnings.append([alert_type, alert_message])
 741
 742         for alert_type, alert_message in (warnings + errors[:-1]):
 743             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
 744         if errors:
 745             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 746
 747     def _extract_and_report_alerts(self, data, *args, **kwargs):
 748         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 749
 750     def _extract_badges(self, renderer: dict):
 751         badges = set()
 752         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 753             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 754             if label:
 755                 badges.add(label.lower())
 756         return badges
 757
 758     @staticmethod
 759     def _get_text(data, *path_list, max_runs=None):
 760         for path in path_list or [None]:
 761             if path is None:
 762                 obj = [data]
 763             else:
 764                 obj = traverse_obj(data, path, default=[])
 765                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 766                     obj = [obj]
 767             for item in obj:
 768                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 769                 if text:
 770                     return text
 771                 runs = try_get(item, lambda x: x['runs'], list) or []
 772                 if not runs and isinstance(item, list):
 773                     runs = item
 774
 775                 runs = runs[:min(len(runs), max_runs or len(runs))]
 776                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 777                 if text:
 778                     return text
 779
 780     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 781                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 782                           default_client='web'):
 783         response = None
 784         last_error = None
 785         count = -1
 786         retries = self.get_param('extractor_retries', 3)
 787         if check_get_keys is None:
 788             check_get_keys = []
 789         while count < retries:
 790             count += 1
 791             if last_error:
 792                 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
 793             try:
 794                 response = self._call_api(
 795                     ep=ep, fatal=True, headers=headers,
 796                     video_id=item_id, query=query,
 797                     context=self._extract_context(ytcfg, default_client),
 798                     api_key=self._extract_api_key(ytcfg, default_client),
 799                     api_hostname=api_hostname, default_client=default_client,
 800                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 801             except ExtractorError as e:
 802                 if isinstance(e.cause, network_exceptions):
 803                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 804                         e.cause.seek(0)
 805                         yt_error = try_get(
 806                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 807                             lambda x: x['error']['message'], compat_str)
 808                         if yt_error:
 809                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 810                     # Downloading page may result in intermittent 5xx HTTP error
 811                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 812                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 813                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 814                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 815                         last_error = error_to_compat_str(e.cause or e.msg)
 816                         if count < retries:
 817                             continue
 818                 if fatal:
 819                     raise
 820                 else:
 821                     self.report_warning(error_to_compat_str(e))
 822                     return
 823
 824             else:
 825                 # Youtube may send alerts if there was an issue with the continuation page
 826                 try:
 827                     self._extract_and_report_alerts(response, expected=False, only_once=True)
 828                 except ExtractorError as e:
 829                     # YouTube servers may return errors we want to retry on in a 200 OK response
 830                     # See: https://github.com/yt-dlp/yt-dlp/issues/839
 831                     if 'unknown error' in e.msg.lower():
 832                         last_error = e.msg
 833                         continue
 834                     if fatal:
 835                         raise
 836                     self.report_warning(error_to_compat_str(e))
 837                     return
 838                 if not check_get_keys or dict_get(response, check_get_keys):
 839                     break
 840                 # Youtube sometimes sends incomplete data
 841                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 842                 last_error = 'Incomplete data received'
 843                 if count >= retries:
 844                     if fatal:
 845                         raise ExtractorError(last_error)
 846                     else:
 847                         self.report_warning(last_error)
 848                         return
 849         return response
 850
 851     @staticmethod
 852     def is_music_url(url):
 853         return re.match(r'https?://music\.youtube\.com/', url) is not None
 854
 855     def _extract_video(self, renderer):
 856         video_id = renderer.get('videoId')
 857         title = self._get_text(renderer, 'title')
 858         description = self._get_text(renderer, 'descriptionSnippet')
 859         duration = parse_duration(self._get_text(
 860             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 861         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 862         view_count = str_to_int(self._search_regex(
 863             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 864             'view count', default=None))
 865
 866         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 867
 868         return {
 869             '_type': 'url',
 870             'ie_key': YoutubeIE.ie_key(),
 871             'id': video_id,
 872             'url': f'https://www.youtube.com/watch?v={video_id}',
 873             'title': title,
 874             'description': description,
 875             'duration': duration,
 876             'view_count': view_count,
 877             'uploader': uploader,
 878         }
 879
 880
 881 class YoutubeIE(YoutubeBaseInfoExtractor):
 882     IE_DESC = 'YouTube.com'
 883     _INVIDIOUS_SITES = (
 884         # invidious-redirect websites
 885         r'(?:www\.)?redirect\.invidious\.io',
 886         r'(?:(?:www|dev)\.)?invidio\.us',
 887         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 888         r'(?:www\.)?invidious\.pussthecat\.org',
 889         r'(?:www\.)?invidious\.zee\.li',
 890         r'(?:www\.)?invidious\.ethibox\.fr',
 891         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 892         # youtube-dl invidious instances list
 893         r'(?:(?:www|no)\.)?invidiou\.sh',
 894         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 895         r'(?:www\.)?invidious\.kabi\.tk',
 896         r'(?:www\.)?invidious\.mastodon\.host',
 897         r'(?:www\.)?invidious\.zapashcanon\.fr',
 898         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 899         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 900         r'(?:www\.)?invidious\.himiko\.cloud',
 901         r'(?:www\.)?invidious\.reallyancient\.tech',
 902         r'(?:www\.)?invidious\.tube',
 903         r'(?:www\.)?invidiou\.site',
 904         r'(?:www\.)?invidious\.site',
 905         r'(?:www\.)?invidious\.xyz',
 906         r'(?:www\.)?invidious\.nixnet\.xyz',
 907         r'(?:www\.)?invidious\.048596\.xyz',
 908         r'(?:www\.)?invidious\.drycat\.fr',
 909         r'(?:www\.)?inv\.skyn3t\.in',
 910         r'(?:www\.)?tube\.poal\.co',
 911         r'(?:www\.)?tube\.connect\.cafe',
 912         r'(?:www\.)?vid\.wxzm\.sx',
 913         r'(?:www\.)?vid\.mint\.lgbt',
 914         r'(?:www\.)?vid\.puffyan\.us',
 915         r'(?:www\.)?yewtu\.be',
 916         r'(?:www\.)?yt\.elukerio\.org',
 917         r'(?:www\.)?yt\.lelux\.fi',
 918         r'(?:www\.)?invidious\.ggc-project\.de',
 919         r'(?:www\.)?yt\.maisputain\.ovh',
 920         r'(?:www\.)?ytprivate\.com',
 921         r'(?:www\.)?invidious\.13ad\.de',
 922         r'(?:www\.)?invidious\.toot\.koeln',
 923         r'(?:www\.)?invidious\.fdn\.fr',
 924         r'(?:www\.)?watch\.nettohikari\.com',
 925         r'(?:www\.)?invidious\.namazso\.eu',
 926         r'(?:www\.)?invidious\.silkky\.cloud',
 927         r'(?:www\.)?invidious\.exonip\.de',
 928         r'(?:www\.)?invidious\.riverside\.rocks',
 929         r'(?:www\.)?invidious\.blamefran\.net',
 930         r'(?:www\.)?invidious\.moomoo\.de',
 931         r'(?:www\.)?ytb\.trom\.tf',
 932         r'(?:www\.)?yt\.cyberhost\.uk',
 933         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 934         r'(?:www\.)?qklhadlycap4cnod\.onion',
 935         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 936         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 937         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 938         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 939         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 940         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 941         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 942         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 943         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 944         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 945     )
 946     _VALID_URL = r"""(?x)^
 947                      (
 948                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 949                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 950                             (?:www\.)?deturl\.com/www\.youtube\.com|
 951                             (?:www\.)?pwnyoutube\.com|
 952                             (?:www\.)?hooktube\.com|
 953                             (?:www\.)?yourepeat\.com|
 954                             tube\.majestyc\.net|
 955                             %(invidious)s|
 956                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 957                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 958                          (?:                                                  # the various things that can precede the ID:
 959                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 960                              |(?:                                             # or the v= param in all its forms
 961                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 962                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 963                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 964                                  v=
 965                              )
 966                          ))
 967                          |(?:
 968                             youtu\.be|                                        # just youtu.be/xxxx
 969                             vid\.plus|                                        # or vid.plus/xxxx
 970                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 971                             %(invidious)s
 972                          )/
 973                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 974                          )
 975                      )?                                                       # all until now is optional -> you can pass the naked ID
 976                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 977                      (?(1).+)?                                                # if we found the ID, everything can follow
 978                      (?:\#|$)""" % {
 979         'invidious': '|'.join(_INVIDIOUS_SITES),
 980     }
 981     _PLAYER_INFO_RE = (
 982         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 983         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 984         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 985     )
 986     _formats = {
 987         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 988         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 989         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 990         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 991         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 992         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 993         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 994         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 995         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 996         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 997         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 998         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 999         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1000         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1001         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1002         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1003         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1004         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1005
1006
1007         # 3D videos
1008         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1009         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1010         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1011         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1012         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1013         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1014         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1015
1016         # Apple HTTP Live Streaming
1017         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1018         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1019         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1020         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1021         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1022         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1023         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1024         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1025
1026         # DASH mp4 video
1027         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1028         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1029         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1030         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1031         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1032         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1033         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1034         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1035         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1036         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1037         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1038         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1039
1040         # Dash mp4 audio
1041         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1042         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1043         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1044         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1045         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1046         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1047         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1048
1049         # Dash webm
1050         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1051         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1052         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1053         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1054         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1055         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1056         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1057         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1058         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1059         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1060         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1061         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1062         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1063         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1064         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1065         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1066         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1067         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1068         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1069         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1070         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1071         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1072
1073         # Dash webm audio
1074         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1075         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1076
1077         # Dash webm audio with opus inside
1078         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1079         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1080         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1081
1082         # RTMP (unnamed)
1083         '_rtmp': {'protocol': 'rtmp'},
1084
1085         # av01 video only formats sometimes served with "unknown" codecs
1086         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1087         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1088         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1089         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1090         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1091         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1092         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1093         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1094     }
1095     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1096
1097     _GEO_BYPASS = False
1098
1099     IE_NAME = 'youtube'
1100     _TESTS = [
1101         {
1102             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1103             'info_dict': {
1104                 'id': 'BaW_jenozKc',
1105                 'ext': 'mp4',
1106                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1107                 'uploader': 'Philipp Hagemeister',
1108                 'uploader_id': 'phihag',
1109                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1110                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1111                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1112                 'upload_date': '20121002',
1113                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1114                 'categories': ['Science & Technology'],
1115                 'tags': ['youtube-dl'],
1116                 'duration': 10,
1117                 'view_count': int,
1118                 'like_count': int,
1119                 'dislike_count': int,
1120                 'start_time': 1,
1121                 'end_time': 9,
1122             }
1123         },
1124         {
1125             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1126             'note': 'Embed-only video (#1746)',
1127             'info_dict': {
1128                 'id': 'yZIXLfi8CZQ',
1129                 'ext': 'mp4',
1130                 'upload_date': '20120608',
1131                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1132                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1133                 'uploader': 'SET India',
1134                 'uploader_id': 'setindia',
1135                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1136                 'age_limit': 18,
1137             },
1138             'skip': 'Private video',
1139         },
1140         {
1141             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1142             'note': 'Use the first video ID in the URL',
1143             'info_dict': {
1144                 'id': 'BaW_jenozKc',
1145                 'ext': 'mp4',
1146                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1147                 'uploader': 'Philipp Hagemeister',
1148                 'uploader_id': 'phihag',
1149                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1150                 'upload_date': '20121002',
1151                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1152                 'categories': ['Science & Technology'],
1153                 'tags': ['youtube-dl'],
1154                 'duration': 10,
1155                 'view_count': int,
1156                 'like_count': int,
1157                 'dislike_count': int,
1158             },
1159             'params': {
1160                 'skip_download': True,
1161             },
1162         },
1163         {
1164             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1165             'note': '256k DASH audio (format 141) via DASH manifest',
1166             'info_dict': {
1167                 'id': 'a9LDPn-MO4I',
1168                 'ext': 'm4a',
1169                 'upload_date': '20121002',
1170                 'uploader_id': '8KVIDEO',
1171                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1172                 'description': '',
1173                 'uploader': '8KVIDEO',
1174                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1175             },
1176             'params': {
1177                 'youtube_include_dash_manifest': True,
1178                 'format': '141',
1179             },
1180             'skip': 'format 141 not served anymore',
1181         },
1182         # DASH manifest with encrypted signature
1183         {
1184             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1185             'info_dict': {
1186                 'id': 'IB3lcPjvWLA',
1187                 'ext': 'm4a',
1188                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1189                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1190                 'duration': 244,
1191                 'uploader': 'AfrojackVEVO',
1192                 'uploader_id': 'AfrojackVEVO',
1193                 'upload_date': '20131011',
1194                 'abr': 129.495,
1195             },
1196             'params': {
1197                 'youtube_include_dash_manifest': True,
1198                 'format': '141/bestaudio[ext=m4a]',
1199             },
1200         },
1201         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1202         {
1203             'note': 'Embed allowed age-gate video',
1204             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1205             'info_dict': {
1206                 'id': 'HtVdAasjOgU',
1207                 'ext': 'mp4',
1208                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1209                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1210                 'duration': 142,
1211                 'uploader': 'The Witcher',
1212                 'uploader_id': 'WitcherGame',
1213                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1214                 'upload_date': '20140605',
1215                 'age_limit': 18,
1216             },
1217         },
1218         {
1219             'note': 'Age-gate video with embed allowed in public site',
1220             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1221             'info_dict': {
1222                 'id': 'HsUATh_Nc2U',
1223                 'ext': 'mp4',
1224                 'title': 'Godzilla 2 (Official Video)',
1225                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1226                 'upload_date': '20200408',
1227                 'uploader_id': 'FlyingKitty900',
1228                 'uploader': 'FlyingKitty',
1229                 'age_limit': 18,
1230             },
1231         },
1232         {
1233             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1234             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1235             'info_dict': {
1236                 'id': 'Tq92D6wQ1mg',
1237                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1238                 'ext': 'mp4',
1239                 'upload_date': '20191227',
1240                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1241                 'uploader': 'Projekt Melody',
1242                 'description': 'md5:17eccca93a786d51bc67646756894066',
1243                 'age_limit': 18,
1244             },
1245         },
1246         {
1247             'note': 'Non-Agegated non-embeddable video',
1248             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1249             'info_dict': {
1250                 'id': 'MeJVWBSsPAY',
1251                 'ext': 'mp4',
1252                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1253                 'uploader': 'Herr Lurik',
1254                 'uploader_id': 'st3in234',
1255                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1256                 'upload_date': '20130730',
1257             },
1258         },
1259         {
1260             'note': 'Non-bypassable age-gated video',
1261             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1262             'only_matching': True,
1263         },
1264         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1265         # YouTube Red ad is not captured for creator
1266         {
1267             'url': '__2ABJjxzNo',
1268             'info_dict': {
1269                 'id': '__2ABJjxzNo',
1270                 'ext': 'mp4',
1271                 'duration': 266,
1272                 'upload_date': '20100430',
1273                 'uploader_id': 'deadmau5',
1274                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1275                 'creator': 'deadmau5',
1276                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1277                 'uploader': 'deadmau5',
1278                 'title': 'Deadmau5 - Some Chords (HD)',
1279                 'alt_title': 'Some Chords',
1280             },
1281             'expected_warnings': [
1282                 'DASH manifest missing',
1283             ]
1284         },
1285         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1286         {
1287             'url': 'lqQg6PlCWgI',
1288             'info_dict': {
1289                 'id': 'lqQg6PlCWgI',
1290                 'ext': 'mp4',
1291                 'duration': 6085,
1292                 'upload_date': '20150827',
1293                 'uploader_id': 'olympic',
1294                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1295                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1296                 'uploader': 'Olympics',
1297                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1298             },
1299             'params': {
1300                 'skip_download': 'requires avconv',
1301             }
1302         },
1303         # Non-square pixels
1304         {
1305             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1306             'info_dict': {
1307                 'id': '_b-2C3KPAM0',
1308                 'ext': 'mp4',
1309                 'stretched_ratio': 16 / 9.,
1310                 'duration': 85,
1311                 'upload_date': '20110310',
1312                 'uploader_id': 'AllenMeow',
1313                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1314                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1315                 'uploader': '孫ᄋᄅ',
1316                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1317             },
1318         },
1319         # url_encoded_fmt_stream_map is empty string
1320         {
1321             'url': 'qEJwOuvDf7I',
1322             'info_dict': {
1323                 'id': 'qEJwOuvDf7I',
1324                 'ext': 'webm',
1325                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1326                 'description': '',
1327                 'upload_date': '20150404',
1328                 'uploader_id': 'spbelect',
1329                 'uploader': 'Наблюдатели Петербурга',
1330             },
1331             'params': {
1332                 'skip_download': 'requires avconv',
1333             },
1334             'skip': 'This live event has ended.',
1335         },
1336         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1337         {
1338             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1339             'info_dict': {
1340                 'id': 'FIl7x6_3R5Y',
1341                 'ext': 'webm',
1342                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1343                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1344                 'duration': 220,
1345                 'upload_date': '20150625',
1346                 'uploader_id': 'dorappi2000',
1347                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1348                 'uploader': 'dorappi2000',
1349                 'formats': 'mincount:31',
1350             },
1351             'skip': 'not actual anymore',
1352         },
1353         # DASH manifest with segment_list
1354         {
1355             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1356             'md5': '8ce563a1d667b599d21064e982ab9e31',
1357             'info_dict': {
1358                 'id': 'CsmdDsKjzN8',
1359                 'ext': 'mp4',
1360                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1361                 'uploader': 'Airtek',
1362                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1363                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1364                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1365             },
1366             'params': {
1367                 'youtube_include_dash_manifest': True,
1368                 'format': '135',  # bestvideo
1369             },
1370             'skip': 'This live event has ended.',
1371         },
1372         {
1373             # Multifeed videos (multiple cameras), URL is for Main Camera
1374             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1375             'info_dict': {
1376                 'id': 'jvGDaLqkpTg',
1377                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1378                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1379             },
1380             'playlist': [{
1381                 'info_dict': {
1382                     'id': 'jvGDaLqkpTg',
1383                     'ext': 'mp4',
1384                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1385                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1386                     'duration': 10643,
1387                     'upload_date': '20161111',
1388                     'uploader': 'Team PGP',
1389                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1390                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1391                 },
1392             }, {
1393                 'info_dict': {
1394                     'id': '3AKt1R1aDnw',
1395                     'ext': 'mp4',
1396                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1397                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1398                     'duration': 10991,
1399                     'upload_date': '20161111',
1400                     'uploader': 'Team PGP',
1401                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1402                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1403                 },
1404             }, {
1405                 'info_dict': {
1406                     'id': 'RtAMM00gpVc',
1407                     'ext': 'mp4',
1408                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1409                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1410                     'duration': 10995,
1411                     'upload_date': '20161111',
1412                     'uploader': 'Team PGP',
1413                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1414                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1415                 },
1416             }, {
1417                 'info_dict': {
1418                     'id': '6N2fdlP3C5U',
1419                     'ext': 'mp4',
1420                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1421                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1422                     'duration': 10990,
1423                     'upload_date': '20161111',
1424                     'uploader': 'Team PGP',
1425                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1426                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1427                 },
1428             }],
1429             'params': {
1430                 'skip_download': True,
1431             },
1432             'skip': 'Not multifeed anymore',
1433         },
1434         {
1435             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1436             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1437             'info_dict': {
1438                 'id': 'gVfLd0zydlo',
1439                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1440             },
1441             'playlist_count': 2,
1442             'skip': 'Not multifeed anymore',
1443         },
1444         {
1445             'url': 'https://vid.plus/FlRa-iH7PGw',
1446             'only_matching': True,
1447         },
1448         {
1449             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1450             'only_matching': True,
1451         },
1452         {
1453             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1454             # Also tests cut-off URL expansion in video description (see
1455             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1456             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1457             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1458             'info_dict': {
1459                 'id': 'lsguqyKfVQg',
1460                 'ext': 'mp4',
1461                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1462                 'alt_title': 'Dark Walk',
1463                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1464                 'duration': 133,
1465                 'upload_date': '20151119',
1466                 'uploader_id': 'IronSoulElf',
1467                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1468                 'uploader': 'IronSoulElf',
1469                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1470                 'track': 'Dark Walk',
1471                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1472                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1473             },
1474             'params': {
1475                 'skip_download': True,
1476             },
1477         },
1478         {
1479             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1480             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1481             'only_matching': True,
1482         },
1483         {
1484             # Video with yt:stretch=17:0
1485             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1486             'info_dict': {
1487                 'id': 'Q39EVAstoRM',
1488                 'ext': 'mp4',
1489                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1490                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1491                 'upload_date': '20151107',
1492                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1493                 'uploader': 'CH GAMER DROID',
1494             },
1495             'params': {
1496                 'skip_download': True,
1497             },
1498             'skip': 'This video does not exist.',
1499         },
1500         {
1501             # Video with incomplete 'yt:stretch=16:'
1502             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1503             'only_matching': True,
1504         },
1505         {
1506             # Video licensed under Creative Commons
1507             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1508             'info_dict': {
1509                 'id': 'M4gD1WSo5mA',
1510                 'ext': 'mp4',
1511                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1512                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1513                 'duration': 721,
1514                 'upload_date': '20150127',
1515                 'uploader_id': 'BerkmanCenter',
1516                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1517                 'uploader': 'The Berkman Klein Center for Internet & Society',
1518                 'license': 'Creative Commons Attribution license (reuse allowed)',
1519             },
1520             'params': {
1521                 'skip_download': True,
1522             },
1523         },
1524         {
1525             # Channel-like uploader_url
1526             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1527             'info_dict': {
1528                 'id': 'eQcmzGIKrzg',
1529                 'ext': 'mp4',
1530                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1531                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1532                 'duration': 4060,
1533                 'upload_date': '20151119',
1534                 'uploader': 'Bernie Sanders',
1535                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1536                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1537                 'license': 'Creative Commons Attribution license (reuse allowed)',
1538             },
1539             'params': {
1540                 'skip_download': True,
1541             },
1542         },
1543         {
1544             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1545             'only_matching': True,
1546         },
1547         {
1548             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1549             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1550             'only_matching': True,
1551         },
1552         {
1553             # Rental video preview
1554             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1555             'info_dict': {
1556                 'id': 'uGpuVWrhIzE',
1557                 'ext': 'mp4',
1558                 'title': 'Piku - Trailer',
1559                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1560                 'upload_date': '20150811',
1561                 'uploader': 'FlixMatrix',
1562                 'uploader_id': 'FlixMatrixKaravan',
1563                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1564                 'license': 'Standard YouTube License',
1565             },
1566             'params': {
1567                 'skip_download': True,
1568             },
1569             'skip': 'This video is not available.',
1570         },
1571         {
1572             # YouTube Red video with episode data
1573             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1574             'info_dict': {
1575                 'id': 'iqKdEhx-dD4',
1576                 'ext': 'mp4',
1577                 'title': 'Isolation - Mind Field (Ep 1)',
1578                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1579                 'duration': 2085,
1580                 'upload_date': '20170118',
1581                 'uploader': 'Vsauce',
1582                 'uploader_id': 'Vsauce',
1583                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1584                 'series': 'Mind Field',
1585                 'season_number': 1,
1586                 'episode_number': 1,
1587             },
1588             'params': {
1589                 'skip_download': True,
1590             },
1591             'expected_warnings': [
1592                 'Skipping DASH manifest',
1593             ],
1594         },
1595         {
1596             # The following content has been identified by the YouTube community
1597             # as inappropriate or offensive to some audiences.
1598             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1599             'info_dict': {
1600                 'id': '6SJNVb0GnPI',
1601                 'ext': 'mp4',
1602                 'title': 'Race Differences in Intelligence',
1603                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1604                 'duration': 965,
1605                 'upload_date': '20140124',
1606                 'uploader': 'New Century Foundation',
1607                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1608                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1609             },
1610             'params': {
1611                 'skip_download': True,
1612             },
1613             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1614         },
1615         {
1616             # itag 212
1617             'url': '1t24XAntNCY',
1618             'only_matching': True,
1619         },
1620         {
1621             # geo restricted to JP
1622             'url': 'sJL6WA-aGkQ',
1623             'only_matching': True,
1624         },
1625         {
1626             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1627             'only_matching': True,
1628         },
1629         {
1630             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1631             'only_matching': True,
1632         },
1633         {
1634             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1635             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1636             'only_matching': True,
1637         },
1638         {
1639             # DRM protected
1640             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1641             'only_matching': True,
1642         },
1643         {
1644             # Video with unsupported adaptive stream type formats
1645             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1646             'info_dict': {
1647                 'id': 'Z4Vy8R84T1U',
1648                 'ext': 'mp4',
1649                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1650                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1651                 'duration': 433,
1652                 'upload_date': '20130923',
1653                 'uploader': 'Amelia Putri Harwita',
1654                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1655                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1656                 'formats': 'maxcount:10',
1657             },
1658             'params': {
1659                 'skip_download': True,
1660                 'youtube_include_dash_manifest': False,
1661             },
1662             'skip': 'not actual anymore',
1663         },
1664         {
1665             # Youtube Music Auto-generated description
1666             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1667             'info_dict': {
1668                 'id': 'MgNrAu2pzNs',
1669                 'ext': 'mp4',
1670                 'title': 'Voyeur Girl',
1671                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1672                 'upload_date': '20190312',
1673                 'uploader': 'Stephen - Topic',
1674                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1675                 'artist': 'Stephen',
1676                 'track': 'Voyeur Girl',
1677                 'album': 'it\'s too much love to know my dear',
1678                 'release_date': '20190313',
1679                 'release_year': 2019,
1680             },
1681             'params': {
1682                 'skip_download': True,
1683             },
1684         },
1685         {
1686             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1687             'only_matching': True,
1688         },
1689         {
1690             # invalid -> valid video id redirection
1691             'url': 'DJztXj2GPfl',
1692             'info_dict': {
1693                 'id': 'DJztXj2GPfk',
1694                 'ext': 'mp4',
1695                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1696                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1697                 'upload_date': '20090125',
1698                 'uploader': 'Prochorowka',
1699                 'uploader_id': 'Prochorowka',
1700                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1701                 'artist': 'Panjabi MC',
1702                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1703                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1704             },
1705             'params': {
1706                 'skip_download': True,
1707             },
1708             'skip': 'Video unavailable',
1709         },
1710         {
1711             # empty description results in an empty string
1712             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1713             'info_dict': {
1714                 'id': 'x41yOUIvK2k',
1715                 'ext': 'mp4',
1716                 'title': 'IMG 3456',
1717                 'description': '',
1718                 'upload_date': '20170613',
1719                 'uploader_id': 'ElevageOrVert',
1720                 'uploader': 'ElevageOrVert',
1721             },
1722             'params': {
1723                 'skip_download': True,
1724             },
1725         },
1726         {
1727             # with '};' inside yt initial data (see [1])
1728             # see [2] for an example with '};' inside ytInitialPlayerResponse
1729             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1730             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1731             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1732             'info_dict': {
1733                 'id': 'CHqg6qOn4no',
1734                 'ext': 'mp4',
1735                 'title': 'Part 77   Sort a list of simple types in c#',
1736                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1737                 'upload_date': '20130831',
1738                 'uploader_id': 'kudvenkat',
1739                 'uploader': 'kudvenkat',
1740             },
1741             'params': {
1742                 'skip_download': True,
1743             },
1744         },
1745         {
1746             # another example of '};' in ytInitialData
1747             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1748             'only_matching': True,
1749         },
1750         {
1751             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1752             'only_matching': True,
1753         },
1754         {
1755             # https://github.com/ytdl-org/youtube-dl/pull/28094
1756             'url': 'OtqTfy26tG0',
1757             'info_dict': {
1758                 'id': 'OtqTfy26tG0',
1759                 'ext': 'mp4',
1760                 'title': 'Burn Out',
1761                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1762                 'upload_date': '20141120',
1763                 'uploader': 'The Cinematic Orchestra - Topic',
1764                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1765                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1766                 'artist': 'The Cinematic Orchestra',
1767                 'track': 'Burn Out',
1768                 'album': 'Every Day',
1769                 'release_data': None,
1770                 'release_year': None,
1771             },
1772             'params': {
1773                 'skip_download': True,
1774             },
1775         },
1776         {
1777             # controversial video, only works with bpctr when authenticated with cookies
1778             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1779             'only_matching': True,
1780         },
1781         {
1782             # controversial video, requires bpctr/contentCheckOk
1783             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1784             'info_dict': {
1785                 'id': 'SZJvDhaSDnc',
1786                 'ext': 'mp4',
1787                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1788                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1789                 'uploader': 'CBS This Morning',
1790                 'uploader_id': 'CBSThisMorning',
1791                 'upload_date': '20140716',
1792                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1793             }
1794         },
1795         {
1796             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1797             'url': 'cBvYw8_A0vQ',
1798             'info_dict': {
1799                 'id': 'cBvYw8_A0vQ',
1800                 'ext': 'mp4',
1801                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1802                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1803                 'upload_date': '20201120',
1804                 'uploader': 'Walk around Japan',
1805                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1806                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1807             },
1808             'params': {
1809                 'skip_download': True,
1810             },
1811         }, {
1812             # Has multiple audio streams
1813             'url': 'WaOKSUlf4TM',
1814             'only_matching': True
1815         }, {
1816             # Requires Premium: has format 141 when requested using YTM url
1817             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1818             'only_matching': True
1819         }, {
1820             # multiple subtitles with same lang_code
1821             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1822             'only_matching': True,
1823         }, {
1824             # Force use android client fallback
1825             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1826             'info_dict': {
1827                 'id': 'YOelRv7fMxY',
1828                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1829                 'ext': '3gp',
1830                 'upload_date': '20210624',
1831                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1832                 'uploader': 'colinfurze',
1833                 'uploader_id': 'colinfurze',
1834                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1835                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1836             },
1837             'params': {
1838                 'format': '17',  # 3gp format available on android
1839                 'extractor_args': {'youtube': {'player_client': ['android']}},
1840             },
1841         },
1842         {
1843             # Skip download of additional client configs (remix client config in this case)
1844             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1845             'only_matching': True,
1846             'params': {
1847                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1848             },
1849         }, {
1850             # shorts
1851             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1852             'only_matching': True,
1853         },
1854     ]
1855
1856     @classmethod
1857     def suitable(cls, url):
1858         from ..utils import parse_qs
1859
1860         qs = parse_qs(url)
1861         if qs.get('list', [None])[0]:
1862             return False
1863         return super(YoutubeIE, cls).suitable(url)
1864
1865     def __init__(self, *args, **kwargs):
1866         super(YoutubeIE, self).__init__(*args, **kwargs)
1867         self._code_cache = {}
1868         self._player_cache = {}
1869
1870     def _extract_player_url(self, *ytcfgs, webpage=None):
1871         player_url = traverse_obj(
1872             ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1873             get_all=False, expected_type=compat_str)
1874         if not player_url:
1875             return
1876         if player_url.startswith('//'):
1877             player_url = 'https:' + player_url
1878         elif not re.match(r'https?://', player_url):
1879             player_url = compat_urlparse.urljoin(
1880                 'https://www.youtube.com', player_url)
1881         return player_url
1882
1883     def _download_player_url(self, video_id, fatal=False):
1884         res = self._download_webpage(
1885             'https://www.youtube.com/iframe_api',
1886             note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1887         if res:
1888             player_version = self._search_regex(
1889                 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1890             if player_version:
1891                 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1892
1893     def _signature_cache_id(self, example_sig):
1894         """ Return a string representation of a signature """
1895         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1896
1897     @classmethod
1898     def _extract_player_info(cls, player_url):
1899         for player_re in cls._PLAYER_INFO_RE:
1900             id_m = re.search(player_re, player_url)
1901             if id_m:
1902                 break
1903         else:
1904             raise ExtractorError('Cannot identify player %r' % player_url)
1905         return id_m.group('id')
1906
1907     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1908         player_id = self._extract_player_info(player_url)
1909         if player_id not in self._code_cache:
1910             self._code_cache[player_id] = self._download_webpage(
1911                 player_url, video_id, fatal=fatal,
1912                 note='Downloading player ' + player_id,
1913                 errnote='Download of %s failed' % player_url)
1914         return player_id in self._code_cache
1915
1916     def _extract_signature_function(self, video_id, player_url, example_sig):
1917         player_id = self._extract_player_info(player_url)
1918
1919         # Read from filesystem cache
1920         func_id = 'js_%s_%s' % (
1921             player_id, self._signature_cache_id(example_sig))
1922         assert os.path.basename(func_id) == func_id
1923
1924         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1925         if cache_spec is not None:
1926             return lambda s: ''.join(s[i] for i in cache_spec)
1927
1928         if self._load_player(video_id, player_url):
1929             code = self._code_cache[player_id]
1930             res = self._parse_sig_js(code)
1931
1932             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1933             cache_res = res(test_string)
1934             cache_spec = [ord(c) for c in cache_res]
1935
1936             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1937             return res
1938
1939     def _print_sig_code(self, func, example_sig):
1940         def gen_sig_code(idxs):
1941             def _genslice(start, end, step):
1942                 starts = '' if start == 0 else str(start)
1943                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1944                 steps = '' if step == 1 else (':%d' % step)
1945                 return 's[%s%s%s]' % (starts, ends, steps)
1946
1947             step = None
1948             # Quelch pyflakes warnings - start will be set when step is set
1949             start = '(Never used)'
1950             for i, prev in zip(idxs[1:], idxs[:-1]):
1951                 if step is not None:
1952                     if i - prev == step:
1953                         continue
1954                     yield _genslice(start, prev, step)
1955                     step = None
1956                     continue
1957                 if i - prev in [-1, 1]:
1958                     step = i - prev
1959                     start = prev
1960                     continue
1961                 else:
1962                     yield 's[%d]' % prev
1963             if step is None:
1964                 yield 's[%d]' % i
1965             else:
1966                 yield _genslice(start, i, step)
1967
1968         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1969         cache_res = func(test_string)
1970         cache_spec = [ord(c) for c in cache_res]
1971         expr_code = ' + '.join(gen_sig_code(cache_spec))
1972         signature_id_tuple = '(%s)' % (
1973             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1974         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1975                 '    return %s\n') % (signature_id_tuple, expr_code)
1976         self.to_screen('Extracted signature function:\n' + code)
1977
1978     def _parse_sig_js(self, jscode):
1979         funcname = self._search_regex(
1980             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1981              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1982              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1983              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1984              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1985              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1986              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1987              # Obsolete patterns
1988              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1989              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1990              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1991              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1992              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1993              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1994              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1995              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1996             jscode, 'Initial JS player signature function name', group='sig')
1997
1998         jsi = JSInterpreter(jscode)
1999         initial_function = jsi.extract_function(funcname)
2000         return lambda s: initial_function([s])
2001
2002     def _decrypt_signature(self, s, video_id, player_url):
2003         """Turn the encrypted s field into a working signature"""
2004
2005         if player_url is None:
2006             raise ExtractorError('Cannot decrypt signature without player_url')
2007
2008         try:
2009             player_id = (player_url, self._signature_cache_id(s))
2010             if player_id not in self._player_cache:
2011                 func = self._extract_signature_function(
2012                     video_id, player_url, s
2013                 )
2014                 self._player_cache[player_id] = func
2015             func = self._player_cache[player_id]
2016             if self.get_param('youtube_print_sig_code'):
2017                 self._print_sig_code(func, s)
2018             return func(s)
2019         except Exception as e:
2020             tb = traceback.format_exc()
2021             raise ExtractorError(
2022                 'Signature extraction failed: ' + tb, cause=e)
2023
2024     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2025         """
2026         Extract signatureTimestamp (sts)
2027         Required to tell API what sig/player version is in use.
2028         """
2029         sts = None
2030         if isinstance(ytcfg, dict):
2031             sts = int_or_none(ytcfg.get('STS'))
2032
2033         if not sts:
2034             # Attempt to extract from player
2035             if player_url is None:
2036                 error_msg = 'Cannot extract signature timestamp without player_url.'
2037                 if fatal:
2038                     raise ExtractorError(error_msg)
2039                 self.report_warning(error_msg)
2040                 return
2041             if self._load_player(video_id, player_url, fatal=fatal):
2042                 player_id = self._extract_player_info(player_url)
2043                 code = self._code_cache[player_id]
2044                 sts = int_or_none(self._search_regex(
2045                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2046                     'JS player signature timestamp', group='sts', fatal=fatal))
2047         return sts
2048
2049     def _mark_watched(self, video_id, player_responses):
2050         playback_url = traverse_obj(
2051             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2052             expected_type=url_or_none, get_all=False)
2053         if not playback_url:
2054             self.report_warning('Unable to mark watched')
2055             return
2056         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2057         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2058
2059         # cpn generation algorithm is reverse engineered from base.js.
2060         # In fact it works even with dummy cpn.
2061         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2062         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2063
2064         qs.update({
2065             'ver': ['2'],
2066             'cpn': [cpn],
2067         })
2068         playback_url = compat_urlparse.urlunparse(
2069             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2070
2071         self._download_webpage(
2072             playback_url, video_id, 'Marking watched',
2073             'Unable to mark watched', fatal=False)
2074
2075     @staticmethod
2076     def _extract_urls(webpage):
2077         # Embedded YouTube player
2078         entries = [
2079             unescapeHTML(mobj.group('url'))
2080             for mobj in re.finditer(r'''(?x)
2081             (?:
2082                 <iframe[^>]+?src=|
2083                 data-video-url=|
2084                 <embed[^>]+?src=|
2085                 embedSWF\(?:\s*|
2086                 <object[^>]+data=|
2087                 new\s+SWFObject\(
2088             )
2089             (["\'])
2090                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2091                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2092             \1''', webpage)]
2093
2094         # lazyYT YouTube embed
2095         entries.extend(list(map(
2096             unescapeHTML,
2097             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2098
2099         # Wordpress "YouTube Video Importer" plugin
2100         matches = re.findall(r'''(?x)<div[^>]+
2101             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2102             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2103         entries.extend(m[-1] for m in matches)
2104
2105         return entries
2106
2107     @staticmethod
2108     def _extract_url(webpage):
2109         urls = YoutubeIE._extract_urls(webpage)
2110         return urls[0] if urls else None
2111
2112     @classmethod
2113     def extract_id(cls, url):
2114         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2115         if mobj is None:
2116             raise ExtractorError('Invalid URL: %s' % url)
2117         return mobj.group('id')
2118
2119     def _extract_chapters_from_json(self, data, duration):
2120         chapter_list = traverse_obj(
2121             data, (
2122                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2123                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2124             ), expected_type=list)
2125
2126         return self._extract_chapters(
2127             chapter_list,
2128             chapter_time=lambda chapter: float_or_none(
2129                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2130             chapter_title=lambda chapter: traverse_obj(
2131                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2132             duration=duration)
2133
2134     def _extract_chapters_from_engagement_panel(self, data, duration):
2135         content_list = traverse_obj(
2136             data,
2137             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2138             expected_type=list, default=[])
2139         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2140         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2141
2142         return next((
2143             filter(None, (
2144                 self._extract_chapters(
2145                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2146                     chapter_time, chapter_title, duration)
2147                 for contents in content_list
2148             ))), [])
2149
2150     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2151         chapters = []
2152         last_chapter = {'start_time': 0}
2153         for idx, chapter in enumerate(chapter_list or []):
2154             title = chapter_title(chapter)
2155             start_time = chapter_time(chapter)
2156             if start_time is None:
2157                 continue
2158             last_chapter['end_time'] = start_time
2159             if start_time < last_chapter['start_time']:
2160                 if idx == 1:
2161                     chapters.pop()
2162                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2163                 else:
2164                     self.report_warning(f'Invalid start time for chapter "{title}"')
2165                     continue
2166             last_chapter = {'start_time': start_time, 'title': title}
2167             chapters.append(last_chapter)
2168         last_chapter['end_time'] = duration
2169         return chapters
2170
2171     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2172         return self._parse_json(self._search_regex(
2173             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2174              regex), webpage, name, default='{}'), video_id, fatal=False)
2175
2176     @staticmethod
2177     def parse_time_text(time_text):
2178         """
2179         Parse the comment time text
2180         time_text is in the format 'X units ago (edited)'
2181         """
2182         time_text_split = time_text.split(' ')
2183         if len(time_text_split) >= 3:
2184             try:
2185                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2186             except ValueError:
2187                 return None
2188
2189     def _extract_comment(self, comment_renderer, parent=None):
2190         comment_id = comment_renderer.get('commentId')
2191         if not comment_id:
2192             return
2193
2194         text = self._get_text(comment_renderer, 'contentText')
2195
2196         # note: timestamp is an estimate calculated from the current time and time_text
2197         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2198         time_text_dt = self.parse_time_text(time_text)
2199         if isinstance(time_text_dt, datetime.datetime):
2200             timestamp = calendar.timegm(time_text_dt.timetuple())
2201         author = self._get_text(comment_renderer, 'authorText')
2202         author_id = try_get(comment_renderer,
2203                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2204
2205         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2206                                                        lambda x: x['likeCount']), compat_str)) or 0
2207         author_thumbnail = try_get(comment_renderer,
2208                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2209
2210         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2211         is_favorited = 'creatorHeart' in (try_get(
2212             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2213         return {
2214             'id': comment_id,
2215             'text': text,
2216             'timestamp': timestamp,
2217             'time_text': time_text,
2218             'like_count': votes,
2219             'is_favorited': is_favorited,
2220             'author': author,
2221             'author_id': author_id,
2222             'author_thumbnail': author_thumbnail,
2223             'author_is_uploader': author_is_uploader,
2224             'parent': parent or 'root'
2225         }
2226
2227     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2228                          ytcfg, video_id, parent=None, comment_counts=None):
2229
2230         def extract_header(contents):
2231             _total_comments = 0
2232             _continuation = None
2233             for content in contents:
2234                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2235                 expected_comment_count = parse_count(self._get_text(
2236                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2237
2238                 if expected_comment_count:
2239                     comment_counts[1] = expected_comment_count
2240                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2241                     _total_comments = comment_counts[1]
2242                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2243                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2244
2245                 sort_menu_item = try_get(
2246                     comments_header_renderer,
2247                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2248                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2249
2250                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2251                 if not _continuation:
2252                     continue
2253
2254                 sort_text = sort_menu_item.get('title')
2255                 if isinstance(sort_text, compat_str):
2256                     sort_text = sort_text.lower()
2257                 else:
2258                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2259                 self.to_screen('Sorting comments by %s' % sort_text)
2260                 break
2261             return _total_comments, _continuation
2262
2263         def extract_thread(contents):
2264             if not parent:
2265                 comment_counts[2] = 0
2266             for content in contents:
2267                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2268                 comment_renderer = try_get(
2269                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2270                     content, (lambda x: x['commentRenderer'], dict))
2271
2272                 if not comment_renderer:
2273                     continue
2274                 comment = self._extract_comment(comment_renderer, parent)
2275                 if not comment:
2276                     continue
2277                 comment_counts[0] += 1
2278                 yield comment
2279                 # Attempt to get the replies
2280                 comment_replies_renderer = try_get(
2281                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2282
2283                 if comment_replies_renderer:
2284                     comment_counts[2] += 1
2285                     comment_entries_iter = self._comment_entries(
2286                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2287                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2288
2289                     for reply_comment in comment_entries_iter:
2290                         yield reply_comment
2291
2292         # YouTube comments have a max depth of 2
2293         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2294         if max_depth == 1 and parent:
2295             return
2296         if not comment_counts:
2297             # comment so far, est. total comments, current comment thread #
2298             comment_counts = [0, 0, 0]
2299
2300         continuation = self._extract_continuation(root_continuation_data)
2301         if continuation and len(continuation['continuation']) < 27:
2302             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2303             continuation_token = self._generate_comment_continuation(video_id)
2304             continuation = self._build_api_continuation_query(continuation_token, None)
2305
2306         visitor_data = None
2307         is_first_continuation = parent is None
2308
2309         for page_num in itertools.count(0):
2310             if not continuation:
2311                 break
2312             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2313             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2314             if page_num == 0:
2315                 if is_first_continuation:
2316                     note_prefix = 'Downloading comment section API JSON'
2317                 else:
2318                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2319                         comment_counts[2], comment_prog_str)
2320             else:
2321                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2322                     '       ' if parent else '', ' replies' if parent else '',
2323                     page_num, comment_prog_str)
2324
2325             response = self._extract_response(
2326                 item_id=None, query=continuation,
2327                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2328                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2329             if not response:
2330                 break
2331             visitor_data = try_get(
2332                 response,
2333                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2334                 compat_str) or visitor_data
2335
2336             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2337
2338             continuation = None
2339             if isinstance(continuation_contents, list):
2340                 for continuation_section in continuation_contents:
2341                     if not isinstance(continuation_section, dict):
2342                         continue
2343                     continuation_items = try_get(
2344                         continuation_section,
2345                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2346                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2347                         list) or []
2348                     if is_first_continuation:
2349                         total_comments, continuation = extract_header(continuation_items)
2350                         if total_comments:
2351                             yield total_comments
2352                         is_first_continuation = False
2353                         if continuation:
2354                             break
2355                         continue
2356                     count = 0
2357                     for count, entry in enumerate(extract_thread(continuation_items)):
2358                         yield entry
2359                     continuation = self._extract_continuation({'contents': continuation_items})
2360                     if continuation:
2361                         # Sometimes YouTube provides a continuation without any comments
2362                         # In most cases we end up just downloading these with very little comments to come.
2363                         if count == 0:
2364                             if not parent:
2365                                 self.report_warning('No comments received - assuming end of comments')
2366                             continuation = None
2367                         break
2368
2369             # Deprecated response structure
2370             elif isinstance(continuation_contents, dict):
2371                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2372                 for key, continuation_renderer in continuation_contents.items():
2373                     if key not in known_continuation_renderers:
2374                         continue
2375                     if not isinstance(continuation_renderer, dict):
2376                         continue
2377                     if is_first_continuation:
2378                         header_continuation_items = [continuation_renderer.get('header') or {}]
2379                         total_comments, continuation = extract_header(header_continuation_items)
2380                         if total_comments:
2381                             yield total_comments
2382                         is_first_continuation = False
2383                         if continuation:
2384                             break
2385
2386                     # Sometimes YouTube provides a continuation without any comments
2387                     # In most cases we end up just downloading these with very little comments to come.
2388                     count = 0
2389                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2390                         yield entry
2391                     continuation = self._extract_continuation(continuation_renderer)
2392                     if count == 0:
2393                         if not parent:
2394                             self.report_warning('No comments received - assuming end of comments')
2395                         continuation = None
2396                     break
2397
2398     @staticmethod
2399     def _generate_comment_continuation(video_id):
2400         """
2401         Generates initial comment section continuation token from given video id
2402         """
2403         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2404         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2405         new_continuation_intlist = list(itertools.chain.from_iterable(
2406             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2407         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2408
2409     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2410         """Entry for comment extraction"""
2411         def _real_comment_extract(contents):
2412             if isinstance(contents, list):
2413                 for entry in contents:
2414                     for key, renderer in entry.items():
2415                         if key not in known_entry_comment_renderers:
2416                             continue
2417                         yield from self._comment_entries(
2418                             renderer, video_id=video_id, ytcfg=ytcfg,
2419                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2420                             account_syncid=self._extract_account_syncid(ytcfg))
2421                         break
2422         comments = []
2423         known_entry_comment_renderers = ('itemSectionRenderer',)
2424         estimated_total = 0
2425         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2426         # Force English regardless of account setting to prevent parsing issues
2427         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2428         ytcfg = copy.deepcopy(ytcfg)
2429         traverse_obj(
2430             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2431         try:
2432             for comment in _real_comment_extract(contents):
2433                 if len(comments) >= max_comments:
2434                     break
2435                 if isinstance(comment, int):
2436                     estimated_total = comment
2437                     continue
2438                 comments.append(comment)
2439         except KeyboardInterrupt:
2440             self.to_screen('Interrupted by user')
2441         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2442         return {
2443             'comments': comments,
2444             'comment_count': len(comments),
2445         }
2446
2447     @staticmethod
2448     def _generate_player_context(sts=None):
2449         context = {
2450             'html5Preference': 'HTML5_PREF_WANTS',
2451         }
2452         if sts is not None:
2453             context['signatureTimestamp'] = sts
2454         return {
2455             'playbackContext': {
2456                 'contentPlaybackContext': context
2457             },
2458             'contentCheckOk': True,
2459             'racyCheckOk': True
2460         }
2461
2462     @staticmethod
2463     def _is_agegated(player_response):
2464         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2465             return True
2466
2467         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2468         AGE_GATE_REASONS = (
2469             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2470             'age_verification_required', 'age_check_required',  # status
2471         )
2472         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2473
2474     @staticmethod
2475     def _is_unplayable(player_response):
2476         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2477
2478     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2479
2480         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2481         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2482         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2483         headers = self.generate_api_headers(
2484             player_ytcfg, identity_token, syncid,
2485             default_client=client, session_index=session_index)
2486
2487         yt_query = {'videoId': video_id}
2488         yt_query.update(self._generate_player_context(sts))
2489         return self._extract_response(
2490             item_id=video_id, ep='player', query=yt_query,
2491             ytcfg=player_ytcfg, headers=headers, fatal=True,
2492             default_client=client,
2493             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2494         ) or None
2495
2496     def _get_requested_clients(self, url, smuggled_data):
2497         requested_clients = []
2498         allowed_clients = sorted(
2499             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2500             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2501         for client in self._configuration_arg('player_client'):
2502             if client in allowed_clients:
2503                 requested_clients.append(client)
2504             elif client == 'all':
2505                 requested_clients.extend(allowed_clients)
2506             else:
2507                 self.report_warning(f'Skipping unsupported client {client}')
2508         if not requested_clients:
2509             requested_clients = ['android', 'web']
2510
2511         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2512             requested_clients.extend(
2513                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2514
2515         return orderedSet(requested_clients)
2516
2517     def _extract_player_ytcfg(self, client, video_id):
2518         url = {
2519             'web_music': 'https://music.youtube.com',
2520             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2521         }.get(client)
2522         if not url:
2523             return {}
2524         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2525         return self.extract_ytcfg(video_id, webpage) or {}
2526
2527     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, identity_token):
2528         initial_pr = None
2529         if webpage:
2530             initial_pr = self._extract_yt_initial_variable(
2531                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2532                 video_id, 'initial player response')
2533
2534         original_clients = clients
2535         clients = clients[::-1]
2536         prs = []
2537
2538         def append_client(client_name):
2539             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2540                 clients.append(client_name)
2541
2542         # Android player_response does not have microFormats which are needed for
2543         # extraction of some data. So we return the initial_pr with formats
2544         # stripped out even if not requested by the user
2545         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2546         if initial_pr:
2547             pr = dict(initial_pr)
2548             pr['streamingData'] = None
2549             prs.append(pr)
2550
2551         last_error = None
2552         tried_iframe_fallback = False
2553         player_url = None
2554         while clients:
2555             client = clients.pop()
2556             player_ytcfg = master_ytcfg if client == 'web' else {}
2557             if 'configs' not in self._configuration_arg('player_skip'):
2558                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2559
2560             player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2561             require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2562             if 'js' in self._configuration_arg('player_skip'):
2563                 require_js_player = False
2564                 player_url = None
2565
2566             if not player_url and not tried_iframe_fallback and require_js_player:
2567                 player_url = self._download_player_url(video_id)
2568                 tried_iframe_fallback = True
2569
2570             try:
2571                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2572                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url if require_js_player else None, initial_pr)
2573             except ExtractorError as e:
2574                 if last_error:
2575                     self.report_warning(last_error)
2576                 last_error = e
2577                 continue
2578
2579             if pr:
2580                 prs.append(pr)
2581
2582             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2583             if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
2584                 append_client(client.replace('_agegate', '_creator'))
2585             elif self._is_agegated(pr):
2586                 append_client(f'{client}_agegate')
2587
2588         if last_error:
2589             if not len(prs):
2590                 raise last_error
2591             self.report_warning(last_error)
2592         return prs, player_url
2593
2594     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2595         itags, stream_ids = [], []
2596         itag_qualities, res_qualities = {}, {}
2597         q = qualities([
2598             # Normally tiny is the smallest video-only formats. But
2599             # audio-only formats with unknown quality may get tagged as tiny
2600             'tiny',
2601             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2602             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2603         ])
2604         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2605
2606         for fmt in streaming_formats:
2607             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2608                 continue
2609
2610             itag = str_or_none(fmt.get('itag'))
2611             audio_track = fmt.get('audioTrack') or {}
2612             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2613             if stream_id in stream_ids:
2614                 continue
2615
2616             quality = fmt.get('quality')
2617             height = int_or_none(fmt.get('height'))
2618             if quality == 'tiny' or not quality:
2619                 quality = fmt.get('audioQuality', '').lower() or quality
2620             # The 3gp format (17) in android client has a quality of "small",
2621             # but is actually worse than other formats
2622             if itag == '17':
2623                 quality = 'tiny'
2624             if quality:
2625                 if itag:
2626                     itag_qualities[itag] = quality
2627                 if height:
2628                     res_qualities[height] = quality
2629             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2630             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2631             # number of fragment that would subsequently requested with (`&sq=N`)
2632             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2633                 continue
2634
2635             fmt_url = fmt.get('url')
2636             if not fmt_url:
2637                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2638                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2639                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2640                 if not (sc and fmt_url and encrypted_sig):
2641                     continue
2642                 if not player_url:
2643                     continue
2644                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2645                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2646                 fmt_url += '&' + sp + '=' + signature
2647
2648             if itag:
2649                 itags.append(itag)
2650                 stream_ids.append(stream_id)
2651
2652             tbr = float_or_none(
2653                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2654             dct = {
2655                 'asr': int_or_none(fmt.get('audioSampleRate')),
2656                 'filesize': int_or_none(fmt.get('contentLength')),
2657                 'format_id': itag,
2658                 'format_note': ', '.join(filter(None, (
2659                     '%s%s' % (audio_track.get('displayName') or '',
2660                               ' (default)' if audio_track.get('audioIsDefault') else ''),
2661                     fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2662                 'fps': int_or_none(fmt.get('fps')),
2663                 'height': height,
2664                 'quality': q(quality),
2665                 'tbr': tbr,
2666                 'url': fmt_url,
2667                 'width': int_or_none(fmt.get('width')),
2668                 'language': audio_track.get('id', '').split('.')[0],
2669                 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2670             }
2671             mime_mobj = re.match(
2672                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2673             if mime_mobj:
2674                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2675                 dct.update(parse_codecs(mime_mobj.group(2)))
2676             no_audio = dct.get('acodec') == 'none'
2677             no_video = dct.get('vcodec') == 'none'
2678             if no_audio:
2679                 dct['vbr'] = tbr
2680             if no_video:
2681                 dct['abr'] = tbr
2682             if no_audio or no_video:
2683                 dct['downloader_options'] = {
2684                     # Youtube throttles chunks >~10M
2685                     'http_chunk_size': 10485760,
2686                 }
2687                 if dct.get('ext'):
2688                     dct['container'] = dct['ext'] + '_dash'
2689             yield dct
2690
2691         skip_manifests = self._configuration_arg('skip')
2692         get_dash = (
2693             (not is_live or self._configuration_arg('include_live_dash'))
2694             and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2695         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2696
2697         def guess_quality(f):
2698             for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2699                 if val in qdict:
2700                     return q(qdict[val])
2701             return -1
2702
2703         for sd in streaming_data:
2704             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2705             if hls_manifest_url:
2706                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2707                     itag = self._search_regex(
2708                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2709                     if itag in itags:
2710                         continue
2711                     if itag:
2712                         f['format_id'] = itag
2713                         itags.append(itag)
2714                     f['quality'] = guess_quality(f)
2715                     yield f
2716
2717             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2718             if dash_manifest_url:
2719                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2720                     itag = f['format_id']
2721                     if itag in itags:
2722                         continue
2723                     if itag:
2724                         itags.append(itag)
2725                     f['quality'] = guess_quality(f)
2726                     filesize = int_or_none(self._search_regex(
2727                         r'/clen/(\d+)', f.get('fragment_base_url')
2728                         or f['url'], 'file size', default=None))
2729                     if filesize:
2730                         f['filesize'] = filesize
2731                     yield f
2732
2733     def _real_extract(self, url):
2734         url, smuggled_data = unsmuggle_url(url, {})
2735         video_id = self._match_id(url)
2736
2737         base_url = self.http_scheme() + '//www.youtube.com/'
2738         webpage_url = base_url + 'watch?v=' + video_id
2739         webpage = None
2740         if 'webpage' not in self._configuration_arg('player_skip'):
2741             webpage = self._download_webpage(
2742                 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2743
2744         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2745         identity_token = self._extract_identity_token(webpage, video_id)
2746
2747         player_responses, player_url = self._extract_player_responses(
2748             self._get_requested_clients(url, smuggled_data),
2749             video_id, webpage, master_ytcfg, identity_token)
2750
2751         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2752
2753         playability_statuses = traverse_obj(
2754             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2755
2756         trailer_video_id = get_first(
2757             playability_statuses,
2758             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2759             expected_type=str)
2760         if trailer_video_id:
2761             return self.url_result(
2762                 trailer_video_id, self.ie_key(), trailer_video_id)
2763
2764         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2765                        if webpage else (lambda x: None))
2766
2767         video_details = traverse_obj(
2768             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2769         microformats = traverse_obj(
2770             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2771             expected_type=dict, default=[])
2772         video_title = (
2773             get_first(video_details, 'title')
2774             or self._get_text(microformats, (..., 'title'))
2775             or search_meta(['og:title', 'twitter:title', 'title']))
2776         video_description = get_first(video_details, 'shortDescription')
2777
2778         if not smuggled_data.get('force_singlefeed', False):
2779             if not self.get_param('noplaylist'):
2780                 multifeed_metadata_list = get_first(
2781                     player_responses,
2782                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2783                     expected_type=str)
2784                 if multifeed_metadata_list:
2785                     entries = []
2786                     feed_ids = []
2787                     for feed in multifeed_metadata_list.split(','):
2788                         # Unquote should take place before split on comma (,) since textual
2789                         # fields may contain comma as well (see
2790                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2791                         feed_data = compat_parse_qs(
2792                             compat_urllib_parse_unquote_plus(feed))
2793
2794                         def feed_entry(name):
2795                             return try_get(
2796                                 feed_data, lambda x: x[name][0], compat_str)
2797
2798                         feed_id = feed_entry('id')
2799                         if not feed_id:
2800                             continue
2801                         feed_title = feed_entry('title')
2802                         title = video_title
2803                         if feed_title:
2804                             title += ' (%s)' % feed_title
2805                         entries.append({
2806                             '_type': 'url_transparent',
2807                             'ie_key': 'Youtube',
2808                             'url': smuggle_url(
2809                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2810                                 {'force_singlefeed': True}),
2811                             'title': title,
2812                         })
2813                         feed_ids.append(feed_id)
2814                     self.to_screen(
2815                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2816                         % (', '.join(feed_ids), video_id))
2817                     return self.playlist_result(
2818                         entries, video_id, video_title, video_description)
2819             else:
2820                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2821
2822         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2823         is_live = get_first(video_details, 'isLive')
2824         if is_live is None:
2825             is_live = get_first(live_broadcast_details, 'isLiveNow')
2826
2827         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2828         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2829
2830         if not formats:
2831             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2832                 self.report_drm(video_id)
2833             pemr = get_first(
2834                 playability_statuses,
2835                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2836             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2837             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2838             if subreason:
2839                 if subreason == 'The uploader has not made this video available in your country.':
2840                     countries = get_first(microformats, 'availableCountries')
2841                     if not countries:
2842                         regions_allowed = search_meta('regionsAllowed')
2843                         countries = regions_allowed.split(',') if regions_allowed else None
2844                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2845                 reason += f'. {subreason}'
2846             if reason:
2847                 self.raise_no_formats(reason, expected=True)
2848
2849         for f in formats:
2850             if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']:  # throttled
2851                 f['source_preference'] = -10
2852                 # TODO: this method is not reliable
2853                 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2854
2855         # Source is given priority since formats that throttle are given lower source_preference
2856         # When throttling issue is fully fixed, remove this
2857         self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
2858
2859         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2860         if not keywords and webpage:
2861             keywords = [
2862                 unescapeHTML(m.group('content'))
2863                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2864         for keyword in keywords:
2865             if keyword.startswith('yt:stretch='):
2866                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2867                 if mobj:
2868                     # NB: float is intentional for forcing float division
2869                     w, h = (float(v) for v in mobj.groups())
2870                     if w > 0 and h > 0:
2871                         ratio = w / h
2872                         for f in formats:
2873                             if f.get('vcodec') != 'none':
2874                                 f['stretched_ratio'] = ratio
2875                         break
2876
2877         thumbnails = []
2878         thumbnail_dicts = traverse_obj(
2879             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2880             expected_type=dict, default=[])
2881         for thumbnail in thumbnail_dicts:
2882             thumbnail_url = thumbnail.get('url')
2883             if not thumbnail_url:
2884                 continue
2885             # Sometimes youtube gives a wrong thumbnail URL. See:
2886             # https://github.com/yt-dlp/yt-dlp/issues/233
2887             # https://github.com/ytdl-org/youtube-dl/issues/28023
2888             if 'maxresdefault' in thumbnail_url:
2889                 thumbnail_url = thumbnail_url.split('?')[0]
2890             thumbnails.append({
2891                 'url': thumbnail_url,
2892                 'height': int_or_none(thumbnail.get('height')),
2893                 'width': int_or_none(thumbnail.get('width')),
2894             })
2895         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2896         if thumbnail_url:
2897             thumbnails.append({
2898                 'url': thumbnail_url,
2899             })
2900         # The best resolution thumbnails sometimes does not appear in the webpage
2901         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2902         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2903         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2904         # TODO: Test them also? - For some videos, even these don't exist
2905         guaranteed_thumbnail_names = [
2906             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2907             'mqdefault', 'mq1', 'mq2', 'mq3',
2908             'default', '1', '2', '3'
2909         ]
2910         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2911         n_thumbnail_names = len(thumbnail_names)
2912
2913         thumbnails.extend({
2914             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2915                 video_id=video_id, name=name, ext=ext,
2916                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2917             '_test_url': name in hq_thumbnail_names,
2918         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2919         for thumb in thumbnails:
2920             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2921             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2922         self._remove_duplicate_formats(thumbnails)
2923
2924         category = get_first(microformats, 'category') or search_meta('genre')
2925         channel_id = str_or_none(
2926             get_first(video_details, 'channelId')
2927             or get_first(microformats, 'externalChannelId')
2928             or search_meta('channelId'))
2929         duration = int_or_none(
2930             get_first(video_details, 'lengthSeconds')
2931             or get_first(microformats, 'lengthSeconds')
2932             or parse_duration(search_meta('duration'))) or None
2933         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2934
2935         live_content = get_first(video_details, 'isLiveContent')
2936         is_upcoming = get_first(video_details, 'isUpcoming')
2937         if is_live is None:
2938             if is_upcoming or live_content is False:
2939                 is_live = False
2940         if is_upcoming is None and (live_content or is_live):
2941             is_upcoming = False
2942         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2943         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2944         if not duration and live_endtime and live_starttime:
2945             duration = live_endtime - live_starttime
2946
2947         info = {
2948             'id': video_id,
2949             'title': self._live_title(video_title) if is_live else video_title,
2950             'formats': formats,
2951             'thumbnails': thumbnails,
2952             'description': video_description,
2953             'upload_date': unified_strdate(
2954                 get_first(microformats, 'uploadDate')
2955                 or search_meta('uploadDate')),
2956             'uploader': get_first(video_details, 'author'),
2957             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2958             'uploader_url': owner_profile_url,
2959             'channel_id': channel_id,
2960             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2961             'duration': duration,
2962             'view_count': int_or_none(
2963                 get_first((video_details, microformats), (..., 'viewCount'))
2964                 or search_meta('interactionCount')),
2965             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2966             'age_limit': 18 if (
2967                 get_first(microformats, 'isFamilySafe') is False
2968                 or search_meta('isFamilyFriendly') == 'false'
2969                 or search_meta('og:restrictions:age') == '18+') else 0,
2970             'webpage_url': webpage_url,
2971             'categories': [category] if category else None,
2972             'tags': keywords,
2973             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2974             'is_live': is_live,
2975             'was_live': (False if is_live or is_upcoming or live_content is False
2976                          else None if is_live is None or is_upcoming is None
2977                          else live_content),
2978             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2979             'release_timestamp': live_starttime,
2980         }
2981
2982         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2983         # Converted into dicts to remove duplicates
2984         captions = {
2985             sub.get('baseUrl'): sub
2986             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2987         translation_languages = {
2988             lang.get('languageCode'): lang.get('languageName')
2989             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2990         subtitles = {}
2991         if pctr:
2992             def process_language(container, base_url, lang_code, sub_name, query):
2993                 lang_subs = container.setdefault(lang_code, [])
2994                 for fmt in self._SUBTITLE_FORMATS:
2995                     query.update({
2996                         'fmt': fmt,
2997                     })
2998                     lang_subs.append({
2999                         'ext': fmt,
3000                         'url': update_url_query(base_url, query),
3001                         'name': sub_name,
3002                     })
3003
3004             for base_url, caption_track in captions.items():
3005                 if not base_url:
3006                     continue
3007                 if caption_track.get('kind') != 'asr':
3008                     lang_code = (
3009                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
3010                         or caption_track.get('languageCode'))
3011                     if not lang_code:
3012                         continue
3013                     process_language(
3014                         subtitles, base_url, lang_code,
3015                         traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False),
3016                         {})
3017                     continue
3018                 automatic_captions = {}
3019                 for trans_code, trans_name in translation_languages.items():
3020                     if not trans_code:
3021                         continue
3022                     process_language(
3023                         automatic_captions, base_url, trans_code,
3024                         self._get_text(trans_name, max_runs=1),
3025                         {'tlang': trans_code})
3026                 info['automatic_captions'] = automatic_captions
3027         info['subtitles'] = subtitles
3028
3029         parsed_url = compat_urllib_parse_urlparse(url)
3030         for component in [parsed_url.fragment, parsed_url.query]:
3031             query = compat_parse_qs(component)
3032             for k, v in query.items():
3033                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3034                     d_k += '_time'
3035                     if d_k not in info and k in s_ks:
3036                         info[d_k] = parse_duration(query[k][0])
3037
3038         # Youtube Music Auto-generated description
3039         if video_description:
3040             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3041             if mobj:
3042                 release_year = mobj.group('release_year')
3043                 release_date = mobj.group('release_date')
3044                 if release_date:
3045                     release_date = release_date.replace('-', '')
3046                     if not release_year:
3047                         release_year = release_date[:4]
3048                 info.update({
3049                     'album': mobj.group('album'.strip()),
3050                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3051                     'track': mobj.group('track').strip(),
3052                     'release_date': release_date,
3053                     'release_year': int_or_none(release_year),
3054                 })
3055
3056         initial_data = None
3057         if webpage:
3058             initial_data = self._extract_yt_initial_variable(
3059                 webpage, self._YT_INITIAL_DATA_RE, video_id,
3060                 'yt initial data')
3061         if not initial_data:
3062             headers = self.generate_api_headers(
3063                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
3064                 session_index=self._extract_session_index(master_ytcfg))
3065
3066             initial_data = self._extract_response(
3067                 item_id=video_id, ep='next', fatal=False,
3068                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
3069                 note='Downloading initial data API JSON')
3070
3071         try:
3072             # This will error if there is no livechat
3073             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3074             info['subtitles']['live_chat'] = [{
3075                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3076                 'video_id': video_id,
3077                 'ext': 'json',
3078                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3079             }]
3080         except (KeyError, IndexError, TypeError):
3081             pass
3082
3083         if initial_data:
3084             info['chapters'] = (
3085                 self._extract_chapters_from_json(initial_data, duration)
3086                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3087                 or None)
3088
3089             contents = try_get(
3090                 initial_data,
3091                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3092                 list) or []
3093             for content in contents:
3094                 vpir = content.get('videoPrimaryInfoRenderer')
3095                 if vpir:
3096                     stl = vpir.get('superTitleLink')
3097                     if stl:
3098                         stl = self._get_text(stl)
3099                         if try_get(
3100                                 vpir,
3101                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3102                             info['location'] = stl
3103                         else:
3104                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3105                             if mobj:
3106                                 info.update({
3107                                     'series': mobj.group(1),
3108                                     'season_number': int(mobj.group(2)),
3109                                     'episode_number': int(mobj.group(3)),
3110                                 })
3111                     for tlb in (try_get(
3112                             vpir,
3113                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3114                             list) or []):
3115                         tbr = tlb.get('toggleButtonRenderer') or {}
3116                         for getter, regex in [(
3117                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3118                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3119                                     lambda x: x['accessibility'],
3120                                     lambda x: x['accessibilityData']['accessibilityData'],
3121                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3122                             label = (try_get(tbr, getter, dict) or {}).get('label')
3123                             if label:
3124                                 mobj = re.match(regex, label)
3125                                 if mobj:
3126                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3127                                     break
3128                     sbr_tooltip = try_get(
3129                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3130                     if sbr_tooltip:
3131                         like_count, dislike_count = sbr_tooltip.split(' / ')
3132                         info.update({
3133                             'like_count': str_to_int(like_count),
3134                             'dislike_count': str_to_int(dislike_count),
3135                         })
3136                 vsir = content.get('videoSecondaryInfoRenderer')
3137                 if vsir:
3138                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3139                     rows = try_get(
3140                         vsir,
3141                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3142                         list) or []
3143                     multiple_songs = False
3144                     for row in rows:
3145                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3146                             multiple_songs = True
3147                             break
3148                     for row in rows:
3149                         mrr = row.get('metadataRowRenderer') or {}
3150                         mrr_title = mrr.get('title')
3151                         if not mrr_title:
3152                             continue
3153                         mrr_title = self._get_text(mrr, 'title')
3154                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3155                         if mrr_title == 'License':
3156                             info['license'] = mrr_contents_text
3157                         elif not multiple_songs:
3158                             if mrr_title == 'Album':
3159                                 info['album'] = mrr_contents_text
3160                             elif mrr_title == 'Artist':
3161                                 info['artist'] = mrr_contents_text
3162                             elif mrr_title == 'Song':
3163                                 info['track'] = mrr_contents_text
3164
3165         fallbacks = {
3166             'channel': 'uploader',
3167             'channel_id': 'uploader_id',
3168             'channel_url': 'uploader_url',
3169         }
3170         for to, frm in fallbacks.items():
3171             if not info.get(to):
3172                 info[to] = info.get(frm)
3173
3174         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3175             v = info.get(s_k)
3176             if v:
3177                 info[d_k] = v
3178
3179         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3180         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3181         is_membersonly = None
3182         is_premium = None
3183         if initial_data and is_private is not None:
3184             is_membersonly = False
3185             is_premium = False
3186             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3187             badge_labels = set()
3188             for content in contents:
3189                 if not isinstance(content, dict):
3190                     continue
3191                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3192             for badge_label in badge_labels:
3193                 if badge_label.lower() == 'members only':
3194                     is_membersonly = True
3195                 elif badge_label.lower() == 'premium':
3196                     is_premium = True
3197                 elif badge_label.lower() == 'unlisted':
3198                     is_unlisted = True
3199
3200         info['availability'] = self._availability(
3201             is_private=is_private,
3202             needs_premium=is_premium,
3203             needs_subscription=is_membersonly,
3204             needs_auth=info['age_limit'] >= 18,
3205             is_unlisted=None if is_private is None else is_unlisted)
3206
3207         if self.get_param('getcomments', False):
3208             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3209
3210         self.mark_watched(video_id, player_responses)
3211
3212         return info
3213
3214
3215 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3216     IE_DESC = 'YouTube.com tab'
3217     _VALID_URL = r'''(?x)
3218                     https?://
3219                         (?:\w+\.)?
3220                         (?:
3221                             youtube(?:kids)?\.com|
3222                             invidio\.us
3223                         )/
3224                         (?:
3225                             (?P<channel_type>channel|c|user|browse)/|
3226                             (?P<not_channel>
3227                                 feed/|hashtag/|
3228                                 (?:playlist|watch)\?.*?\blist=
3229                             )|
3230                             (?!(?:%s)\b)  # Direct URLs
3231                         )
3232                         (?P<id>[^/?\#&]+)
3233                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3234     IE_NAME = 'youtube:tab'
3235
3236     _TESTS = [{
3237         'note': 'playlists, multipage',
3238         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3239         'playlist_mincount': 94,
3240         'info_dict': {
3241             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3242             'title': 'Игорь Клейнер - Playlists',
3243             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3244             'uploader': 'Игорь Клейнер',
3245             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3246         },
3247     }, {
3248         'note': 'playlists, multipage, different order',
3249         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3250         'playlist_mincount': 94,
3251         'info_dict': {
3252             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3253             'title': 'Игорь Клейнер - Playlists',
3254             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3255             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3256             'uploader': 'Игорь Клейнер',
3257         },
3258     }, {
3259         'note': 'playlists, series',
3260         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3261         'playlist_mincount': 5,
3262         'info_dict': {
3263             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3264             'title': '3Blue1Brown - Playlists',
3265             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3266             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3267             'uploader': '3Blue1Brown',
3268         },
3269     }, {
3270         'note': 'playlists, singlepage',
3271         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3272         'playlist_mincount': 4,
3273         'info_dict': {
3274             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3275             'title': 'ThirstForScience - Playlists',
3276             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3277             'uploader': 'ThirstForScience',
3278             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3279         }
3280     }, {
3281         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3282         'only_matching': True,
3283     }, {
3284         'note': 'basic, single video playlist',
3285         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3286         'info_dict': {
3287             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3288             'uploader': 'Sergey M.',
3289             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3290             'title': 'youtube-dl public playlist',
3291         },
3292         'playlist_count': 1,
3293     }, {
3294         'note': 'empty playlist',
3295         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3296         'info_dict': {
3297             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3298             'uploader': 'Sergey M.',
3299             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3300             'title': 'youtube-dl empty playlist',
3301         },
3302         'playlist_count': 0,
3303     }, {
3304         'note': 'Home tab',
3305         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3306         'info_dict': {
3307             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3308             'title': 'lex will - Home',
3309             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3310             'uploader': 'lex will',
3311             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3312         },
3313         'playlist_mincount': 2,
3314     }, {
3315         'note': 'Videos tab',
3316         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3317         'info_dict': {
3318             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3319             'title': 'lex will - Videos',
3320             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3321             'uploader': 'lex will',
3322             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3323         },
3324         'playlist_mincount': 975,
3325     }, {
3326         'note': 'Videos tab, sorted by popular',
3327         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3328         'info_dict': {
3329             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3330             'title': 'lex will - Videos',
3331             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3332             'uploader': 'lex will',
3333             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3334         },
3335         'playlist_mincount': 199,
3336     }, {
3337         'note': 'Playlists tab',
3338         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3339         'info_dict': {
3340             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3341             'title': 'lex will - Playlists',
3342             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3343             'uploader': 'lex will',
3344             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3345         },
3346         'playlist_mincount': 17,
3347     }, {
3348         'note': 'Community tab',
3349         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3350         'info_dict': {
3351             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3352             'title': 'lex will - Community',
3353             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3354             'uploader': 'lex will',
3355             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3356         },
3357         'playlist_mincount': 18,
3358     }, {
3359         'note': 'Channels tab',
3360         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3361         'info_dict': {
3362             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3363             'title': 'lex will - Channels',
3364             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3365             'uploader': 'lex will',
3366             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3367         },
3368         'playlist_mincount': 12,
3369     }, {
3370         'note': 'Search tab',
3371         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3372         'playlist_mincount': 40,
3373         'info_dict': {
3374             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3375             'title': '3Blue1Brown - Search - linear algebra',
3376             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3377             'uploader': '3Blue1Brown',
3378             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3379         },
3380     }, {
3381         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3382         'only_matching': True,
3383     }, {
3384         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3385         'only_matching': True,
3386     }, {
3387         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3388         'only_matching': True,
3389     }, {
3390         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3391         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3392         'info_dict': {
3393             'title': '29C3: Not my department',
3394             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3395             'uploader': 'Christiaan008',
3396             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3397             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3398         },
3399         'playlist_count': 96,
3400     }, {
3401         'note': 'Large playlist',
3402         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3403         'info_dict': {
3404             'title': 'Uploads from Cauchemar',
3405             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3406             'uploader': 'Cauchemar',
3407             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3408         },
3409         'playlist_mincount': 1123,
3410     }, {
3411         'note': 'even larger playlist, 8832 videos',
3412         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3413         'only_matching': True,
3414     }, {
3415         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3416         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3417         'info_dict': {
3418             'title': 'Uploads from Interstellar Movie',
3419             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3420             'uploader': 'Interstellar Movie',
3421             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3422         },
3423         'playlist_mincount': 21,
3424     }, {
3425         'note': 'Playlist with "show unavailable videos" button',
3426         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3427         'info_dict': {
3428             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3429             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3430             'uploader': 'Phim Siêu Nhân Nhật Bản',
3431             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3432         },
3433         'playlist_mincount': 200,
3434     }, {
3435         'note': 'Playlist with unavailable videos in page 7',
3436         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3437         'info_dict': {
3438             'title': 'Uploads from BlankTV',
3439             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3440             'uploader': 'BlankTV',
3441             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3442         },
3443         'playlist_mincount': 1000,
3444     }, {
3445         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3446         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3447         'info_dict': {
3448             'title': 'Data Analysis with Dr Mike Pound',
3449             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3450             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3451             'uploader': 'Computerphile',
3452             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3453         },
3454         'playlist_mincount': 11,
3455     }, {
3456         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3457         'only_matching': True,
3458     }, {
3459         'note': 'Playlist URL that does not actually serve a playlist',
3460         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3461         'info_dict': {
3462             'id': 'FqZTN594JQw',
3463             'ext': 'webm',
3464             'title': "Smiley's People 01 detective, Adventure Series, Action",
3465             'uploader': 'STREEM',
3466             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3467             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3468             'upload_date': '20150526',
3469             'license': 'Standard YouTube License',
3470             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3471             'categories': ['People & Blogs'],
3472             'tags': list,
3473             'view_count': int,
3474             'like_count': int,
3475             'dislike_count': int,
3476         },
3477         'params': {
3478             'skip_download': True,
3479         },
3480         'skip': 'This video is not available.',
3481         'add_ie': [YoutubeIE.ie_key()],
3482     }, {
3483         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3484         'only_matching': True,
3485     }, {
3486         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3487         'only_matching': True,
3488     }, {
3489         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3490         'info_dict': {
3491             'id': '3yImotZU3tw',  # This will keep changing
3492             'ext': 'mp4',
3493             'title': compat_str,
3494             'uploader': 'Sky News',
3495             'uploader_id': 'skynews',
3496             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3497             'upload_date': r're:\d{8}',
3498             'description': compat_str,
3499             'categories': ['News & Politics'],
3500             'tags': list,
3501             'like_count': int,
3502             'dislike_count': int,
3503         },
3504         'params': {
3505             'skip_download': True,
3506         },
3507         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3508     }, {
3509         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3510         'info_dict': {
3511             'id': 'a48o2S1cPoo',
3512             'ext': 'mp4',
3513             'title': 'The Young Turks - Live Main Show',
3514             'uploader': 'The Young Turks',
3515             'uploader_id': 'TheYoungTurks',
3516             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3517             'upload_date': '20150715',
3518             'license': 'Standard YouTube License',
3519             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3520             'categories': ['News & Politics'],
3521             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3522             'like_count': int,
3523             'dislike_count': int,
3524         },
3525         'params': {
3526             'skip_download': True,
3527         },
3528         'only_matching': True,
3529     }, {
3530         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3531         'only_matching': True,
3532     }, {
3533         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3534         'only_matching': True,
3535     }, {
3536         'note': 'A channel that is not live. Should raise error',
3537         'url': 'https://www.youtube.com/user/numberphile/live',
3538         'only_matching': True,
3539     }, {
3540         'url': 'https://www.youtube.com/feed/trending',
3541         'only_matching': True,
3542     }, {
3543         'url': 'https://www.youtube.com/feed/library',
3544         'only_matching': True,
3545     }, {
3546         'url': 'https://www.youtube.com/feed/history',
3547         'only_matching': True,
3548     }, {
3549         'url': 'https://www.youtube.com/feed/subscriptions',
3550         'only_matching': True,
3551     }, {
3552         'url': 'https://www.youtube.com/feed/watch_later',
3553         'only_matching': True,
3554     }, {
3555         'note': 'Recommended - redirects to home page',
3556         'url': 'https://www.youtube.com/feed/recommended',
3557         'only_matching': True,
3558     }, {
3559         'note': 'inline playlist with not always working continuations',
3560         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3561         'only_matching': True,
3562     }, {
3563         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3564         'only_matching': True,
3565     }, {
3566         'url': 'https://www.youtube.com/course',
3567         'only_matching': True,
3568     }, {
3569         'url': 'https://www.youtube.com/zsecurity',
3570         'only_matching': True,
3571     }, {
3572         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3573         'only_matching': True,
3574     }, {
3575         'url': 'https://www.youtube.com/TheYoungTurks/live',
3576         'only_matching': True,
3577     }, {
3578         'url': 'https://www.youtube.com/hashtag/cctv9',
3579         'info_dict': {
3580             'id': 'cctv9',
3581             'title': '#cctv9',
3582         },
3583         'playlist_mincount': 350,
3584     }, {
3585         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3586         'only_matching': True,
3587     }, {
3588         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3589         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3590         'only_matching': True
3591     }, {
3592         'note': '/browse/ should redirect to /channel/',
3593         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3594         'only_matching': True
3595     }, {
3596         'note': 'VLPL, should redirect to playlist?list=PL...',
3597         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3598         'info_dict': {
3599             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3600             'uploader': 'NoCopyrightSounds',
3601             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3602             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3603             'title': 'NCS Releases',
3604         },
3605         'playlist_mincount': 166,
3606     }, {
3607         'note': 'Topic, should redirect to playlist?list=UU...',
3608         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3609         'info_dict': {
3610             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3611             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3612             'title': 'Uploads from Royalty Free Music - Topic',
3613             'uploader': 'Royalty Free Music - Topic',
3614         },
3615         'expected_warnings': [
3616             'A channel/user page was given',
3617             'The URL does not have a videos tab',
3618         ],
3619         'playlist_mincount': 101,
3620     }, {
3621         'note': 'Topic without a UU playlist',
3622         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3623         'info_dict': {
3624             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3625             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3626         },
3627         'expected_warnings': [
3628             'A channel/user page was given',
3629             'The URL does not have a videos tab',
3630             'Falling back to channel URL',
3631         ],
3632         'playlist_mincount': 9,
3633     }, {
3634         'note': 'Youtube music Album',
3635         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3636         'info_dict': {
3637             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3638             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3639         },
3640         'playlist_count': 50,
3641     }, {
3642         'note': 'unlisted single video playlist',
3643         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3644         'info_dict': {
3645             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3646             'uploader': 'colethedj',
3647             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3648             'title': 'yt-dlp unlisted playlist test',
3649             'availability': 'unlisted'
3650         },
3651         'playlist_count': 1,
3652     }]
3653
3654     @classmethod
3655     def suitable(cls, url):
3656         return False if YoutubeIE.suitable(url) else super(
3657             YoutubeTabIE, cls).suitable(url)
3658
3659     def _extract_channel_id(self, webpage):
3660         channel_id = self._html_search_meta(
3661             'channelId', webpage, 'channel id', default=None)
3662         if channel_id:
3663             return channel_id
3664         channel_url = self._html_search_meta(
3665             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3666              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3667              'twitter:app:url:googleplay'), webpage, 'channel url')
3668         return self._search_regex(
3669             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3670             channel_url, 'channel id')
3671
3672     @staticmethod
3673     def _extract_basic_item_renderer(item):
3674         # Modified from _extract_grid_item_renderer
3675         known_basic_renderers = (
3676             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3677         )
3678         for key, renderer in item.items():
3679             if not isinstance(renderer, dict):
3680                 continue
3681             elif key in known_basic_renderers:
3682                 return renderer
3683             elif key.startswith('grid') and key.endswith('Renderer'):
3684                 return renderer
3685
3686     def _grid_entries(self, grid_renderer):
3687         for item in grid_renderer['items']:
3688             if not isinstance(item, dict):
3689                 continue
3690             renderer = self._extract_basic_item_renderer(item)
3691             if not isinstance(renderer, dict):
3692                 continue
3693             title = self._get_text(renderer, 'title')
3694
3695             # playlist
3696             playlist_id = renderer.get('playlistId')
3697             if playlist_id:
3698                 yield self.url_result(
3699                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3700                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3701                     video_title=title)
3702                 continue
3703             # video
3704             video_id = renderer.get('videoId')
3705             if video_id:
3706                 yield self._extract_video(renderer)
3707                 continue
3708             # channel
3709             channel_id = renderer.get('channelId')
3710             if channel_id:
3711                 yield self.url_result(
3712                     'https://www.youtube.com/channel/%s' % channel_id,
3713                     ie=YoutubeTabIE.ie_key(), video_title=title)
3714                 continue
3715             # generic endpoint URL support
3716             ep_url = urljoin('https://www.youtube.com/', try_get(
3717                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3718                 compat_str))
3719             if ep_url:
3720                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3721                     if ie.suitable(ep_url):
3722                         yield self.url_result(
3723                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3724                         break
3725
3726     def _shelf_entries_from_content(self, shelf_renderer):
3727         content = shelf_renderer.get('content')
3728         if not isinstance(content, dict):
3729             return
3730         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3731         if renderer:
3732             # TODO: add support for nested playlists so each shelf is processed
3733             # as separate playlist
3734             # TODO: this includes only first N items
3735             for entry in self._grid_entries(renderer):
3736                 yield entry
3737         renderer = content.get('horizontalListRenderer')
3738         if renderer:
3739             # TODO
3740             pass
3741
3742     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3743         ep = try_get(
3744             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3745             compat_str)
3746         shelf_url = urljoin('https://www.youtube.com', ep)
3747         if shelf_url:
3748             # Skipping links to another channels, note that checking for
3749             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3750             # will not work
3751             if skip_channels and '/channels?' in shelf_url:
3752                 return
3753             title = self._get_text(shelf_renderer, 'title')
3754             yield self.url_result(shelf_url, video_title=title)
3755         # Shelf may not contain shelf URL, fallback to extraction from content
3756         for entry in self._shelf_entries_from_content(shelf_renderer):
3757             yield entry
3758
3759     def _playlist_entries(self, video_list_renderer):
3760         for content in video_list_renderer['contents']:
3761             if not isinstance(content, dict):
3762                 continue
3763             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3764             if not isinstance(renderer, dict):
3765                 continue
3766             video_id = renderer.get('videoId')
3767             if not video_id:
3768                 continue
3769             yield self._extract_video(renderer)
3770
3771     def _rich_entries(self, rich_grid_renderer):
3772         renderer = try_get(
3773             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3774         video_id = renderer.get('videoId')
3775         if not video_id:
3776             return
3777         yield self._extract_video(renderer)
3778
3779     def _video_entry(self, video_renderer):
3780         video_id = video_renderer.get('videoId')
3781         if video_id:
3782             return self._extract_video(video_renderer)
3783
3784     def _post_thread_entries(self, post_thread_renderer):
3785         post_renderer = try_get(
3786             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3787         if not post_renderer:
3788             return
3789         # video attachment
3790         video_renderer = try_get(
3791             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3792         video_id = video_renderer.get('videoId')
3793         if video_id:
3794             entry = self._extract_video(video_renderer)
3795             if entry:
3796                 yield entry
3797         # playlist attachment
3798         playlist_id = try_get(
3799             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3800         if playlist_id:
3801             yield self.url_result(
3802                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3803                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3804         # inline video links
3805         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3806         for run in runs:
3807             if not isinstance(run, dict):
3808                 continue
3809             ep_url = try_get(
3810                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3811             if not ep_url:
3812                 continue
3813             if not YoutubeIE.suitable(ep_url):
3814                 continue
3815             ep_video_id = YoutubeIE._match_id(ep_url)
3816             if video_id == ep_video_id:
3817                 continue
3818             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3819
3820     def _post_thread_continuation_entries(self, post_thread_continuation):
3821         contents = post_thread_continuation.get('contents')
3822         if not isinstance(contents, list):
3823             return
3824         for content in contents:
3825             renderer = content.get('backstagePostThreadRenderer')
3826             if not isinstance(renderer, dict):
3827                 continue
3828             for entry in self._post_thread_entries(renderer):
3829                 yield entry
3830
3831     r''' # unused
3832     def _rich_grid_entries(self, contents):
3833         for content in contents:
3834             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3835             if video_renderer:
3836                 entry = self._video_entry(video_renderer)
3837                 if entry:
3838                     yield entry
3839     '''
3840     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3841
3842         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3843             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3844             for content in contents:
3845                 if not isinstance(content, dict):
3846                     continue
3847                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3848                 if not is_renderer:
3849                     renderer = content.get('richItemRenderer')
3850                     if renderer:
3851                         for entry in self._rich_entries(renderer):
3852                             yield entry
3853                         continuation_list[0] = self._extract_continuation(parent_renderer)
3854                     continue
3855                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3856                 for isr_content in isr_contents:
3857                     if not isinstance(isr_content, dict):
3858                         continue
3859
3860                     known_renderers = {
3861                         'playlistVideoListRenderer': self._playlist_entries,
3862                         'gridRenderer': self._grid_entries,
3863                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3864                         'backstagePostThreadRenderer': self._post_thread_entries,
3865                         'videoRenderer': lambda x: [self._video_entry(x)],
3866                     }
3867                     for key, renderer in isr_content.items():
3868                         if key not in known_renderers:
3869                             continue
3870                         for entry in known_renderers[key](renderer):
3871                             if entry:
3872                                 yield entry
3873                         continuation_list[0] = self._extract_continuation(renderer)
3874                         break
3875
3876                 if not continuation_list[0]:
3877                     continuation_list[0] = self._extract_continuation(is_renderer)
3878
3879             if not continuation_list[0]:
3880                 continuation_list[0] = self._extract_continuation(parent_renderer)
3881
3882         continuation_list = [None]  # Python 2 doesnot support nonlocal
3883         tab_content = try_get(tab, lambda x: x['content'], dict)
3884         if not tab_content:
3885             return
3886         parent_renderer = (
3887             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3888             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3889         for entry in extract_entries(parent_renderer):
3890             yield entry
3891         continuation = continuation_list[0]
3892         visitor_data = None
3893
3894         for page_num in itertools.count(1):
3895             if not continuation:
3896                 break
3897             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3898             response = self._extract_response(
3899                 item_id='%s page %s' % (item_id, page_num),
3900                 query=continuation, headers=headers, ytcfg=ytcfg,
3901                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3902
3903             if not response:
3904                 break
3905             visitor_data = try_get(
3906                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3907
3908             known_continuation_renderers = {
3909                 'playlistVideoListContinuation': self._playlist_entries,
3910                 'gridContinuation': self._grid_entries,
3911                 'itemSectionContinuation': self._post_thread_continuation_entries,
3912                 'sectionListContinuation': extract_entries,  # for feeds
3913             }
3914             continuation_contents = try_get(
3915                 response, lambda x: x['continuationContents'], dict) or {}
3916             continuation_renderer = None
3917             for key, value in continuation_contents.items():
3918                 if key not in known_continuation_renderers:
3919                     continue
3920                 continuation_renderer = value
3921                 continuation_list = [None]
3922                 for entry in known_continuation_renderers[key](continuation_renderer):
3923                     yield entry
3924                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3925                 break
3926             if continuation_renderer:
3927                 continue
3928
3929             known_renderers = {
3930                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3931                 'gridVideoRenderer': (self._grid_entries, 'items'),
3932                 'gridChannelRenderer': (self._grid_entries, 'items'),
3933                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3934                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3935                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3936                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3937             }
3938             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3939             continuation_items = try_get(
3940                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3941             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3942             video_items_renderer = None
3943             for key, value in continuation_item.items():
3944                 if key not in known_renderers:
3945                     continue
3946                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3947                 continuation_list = [None]
3948                 for entry in known_renderers[key][0](video_items_renderer):
3949                     yield entry
3950                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3951                 break
3952             if video_items_renderer:
3953                 continue
3954             break
3955
3956     @staticmethod
3957     def _extract_selected_tab(tabs):
3958         for tab in tabs:
3959             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3960             if renderer.get('selected') is True:
3961                 return renderer
3962         else:
3963             raise ExtractorError('Unable to find selected tab')
3964
3965     @classmethod
3966     def _extract_uploader(cls, data):
3967         uploader = {}
3968         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3969         owner = try_get(
3970             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3971         if owner:
3972             uploader['uploader'] = owner.get('text')
3973             uploader['uploader_id'] = try_get(
3974                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3975             uploader['uploader_url'] = urljoin(
3976                 'https://www.youtube.com/',
3977                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3978         return {k: v for k, v in uploader.items() if v is not None}
3979
3980     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3981         playlist_id = title = description = channel_url = channel_name = channel_id = None
3982         thumbnails_list = tags = []
3983
3984         selected_tab = self._extract_selected_tab(tabs)
3985         renderer = try_get(
3986             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3987         if renderer:
3988             channel_name = renderer.get('title')
3989             channel_url = renderer.get('channelUrl')
3990             channel_id = renderer.get('externalId')
3991         else:
3992             renderer = try_get(
3993                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3994
3995         if renderer:
3996             title = renderer.get('title')
3997             description = renderer.get('description', '')
3998             playlist_id = channel_id
3999             tags = renderer.get('keywords', '').split()
4000             thumbnails_list = (
4001                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
4002                 or try_get(
4003                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
4004                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
4005                     list)
4006                 or [])
4007
4008         thumbnails = []
4009         for t in thumbnails_list:
4010             if not isinstance(t, dict):
4011                 continue
4012             thumbnail_url = url_or_none(t.get('url'))
4013             if not thumbnail_url:
4014                 continue
4015             thumbnails.append({
4016                 'url': thumbnail_url,
4017                 'width': int_or_none(t.get('width')),
4018                 'height': int_or_none(t.get('height')),
4019             })
4020         if playlist_id is None:
4021             playlist_id = item_id
4022         if title is None:
4023             title = (
4024                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
4025                 or playlist_id)
4026         title += format_field(selected_tab, 'title', ' - %s')
4027         title += format_field(selected_tab, 'expandedText', ' - %s')
4028         metadata = {
4029             'playlist_id': playlist_id,
4030             'playlist_title': title,
4031             'playlist_description': description,
4032             'uploader': channel_name,
4033             'uploader_id': channel_id,
4034             'uploader_url': channel_url,
4035             'thumbnails': thumbnails,
4036             'tags': tags,
4037         }
4038         availability = self._extract_availability(data)
4039         if availability:
4040             metadata['availability'] = availability
4041         if not channel_id:
4042             metadata.update(self._extract_uploader(data))
4043         metadata.update({
4044             'channel': metadata['uploader'],
4045             'channel_id': metadata['uploader_id'],
4046             'channel_url': metadata['uploader_url']})
4047         ytcfg = self.extract_ytcfg(item_id, webpage)
4048         return self.playlist_result(
4049             self._entries(
4050                 selected_tab, playlist_id,
4051                 self._extract_identity_token(webpage, item_id),
4052                 self._extract_account_syncid(ytcfg, data), ytcfg),
4053             **metadata)
4054
4055     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4056         first_id = last_id = None
4057         ytcfg = self.extract_ytcfg(playlist_id, webpage)
4058         headers = self.generate_api_headers(
4059             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4060             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
4061         for page_num in itertools.count(1):
4062             videos = list(self._playlist_entries(playlist))
4063             if not videos:
4064                 return
4065             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4066             if start >= len(videos):
4067                 return
4068             for video in videos[start:]:
4069                 if video['id'] == first_id:
4070                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4071                     return
4072                 yield video
4073             first_id = first_id or videos[0]['id']
4074             last_id = videos[-1]['id']
4075             watch_endpoint = try_get(
4076                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4077             query = {
4078                 'playlistId': playlist_id,
4079                 'videoId': watch_endpoint.get('videoId') or last_id,
4080                 'index': watch_endpoint.get('index') or len(videos),
4081                 'params': watch_endpoint.get('params') or 'OAE%3D'
4082             }
4083             response = self._extract_response(
4084                 item_id='%s page %d' % (playlist_id, page_num),
4085                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4086                 check_get_keys='contents'
4087             )
4088             playlist = try_get(
4089                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4090
4091     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4092         title = playlist.get('title') or try_get(
4093             data, lambda x: x['titleText']['simpleText'], compat_str)
4094         playlist_id = playlist.get('playlistId') or item_id
4095
4096         # Delegating everything except mix playlists to regular tab-based playlist URL
4097         playlist_url = urljoin(url, try_get(
4098             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4099             compat_str))
4100         if playlist_url and playlist_url != url:
4101             return self.url_result(
4102                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4103                 video_title=title)
4104
4105         return self.playlist_result(
4106             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4107             playlist_id=playlist_id, playlist_title=title)
4108
4109     def _extract_availability(self, data):
4110         """
4111         Gets the availability of a given playlist/tab.
4112         Note: Unless YouTube tells us explicitly, we do not assume it is public
4113         @param data: response
4114         """
4115         is_private = is_unlisted = None
4116         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4117         badge_labels = self._extract_badges(renderer)
4118
4119         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4120         privacy_dropdown_entries = try_get(
4121             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4122         for renderer_dict in privacy_dropdown_entries:
4123             is_selected = try_get(
4124                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4125             if not is_selected:
4126                 continue
4127             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4128             if label:
4129                 badge_labels.add(label.lower())
4130                 break
4131
4132         for badge_label in badge_labels:
4133             if badge_label == 'unlisted':
4134                 is_unlisted = True
4135             elif badge_label == 'private':
4136                 is_private = True
4137             elif badge_label == 'public':
4138                 is_unlisted = is_private = False
4139         return self._availability(is_private, False, False, False, is_unlisted)
4140
4141     @staticmethod
4142     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4143         sidebar_renderer = try_get(
4144             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4145         for item in sidebar_renderer:
4146             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4147             if renderer:
4148                 return renderer
4149
4150     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4151         """
4152         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4153         """
4154         browse_id = params = None
4155         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4156         if not renderer:
4157             return
4158         menu_renderer = try_get(
4159             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4160         for menu_item in menu_renderer:
4161             if not isinstance(menu_item, dict):
4162                 continue
4163             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4164             text = try_get(
4165                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4166             if not text or text.lower() != 'show unavailable videos':
4167                 continue
4168             browse_endpoint = try_get(
4169                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4170             browse_id = browse_endpoint.get('browseId')
4171             params = browse_endpoint.get('params')
4172             break
4173
4174         ytcfg = self.extract_ytcfg(item_id, webpage)
4175         headers = self.generate_api_headers(
4176             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4177             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4178             visitor_data=try_get(
4179                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4180         query = {
4181             'params': params or 'wgYCCAA=',
4182             'browseId': browse_id or 'VL%s' % item_id
4183         }
4184         return self._extract_response(
4185             item_id=item_id, headers=headers, query=query,
4186             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4187             note='Downloading API JSON with unavailable videos')
4188
4189     def _extract_webpage(self, url, item_id):
4190         retries = self.get_param('extractor_retries', 3)
4191         count = -1
4192         last_error = 'Incomplete yt initial data recieved'
4193         while count < retries:
4194             count += 1
4195             # Sometimes youtube returns a webpage with incomplete ytInitialData
4196             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4197             if count:
4198                 self.report_warning('%s. Retrying ...' % last_error)
4199             webpage = self._download_webpage(
4200                 url, item_id,
4201                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4202             data = self.extract_yt_initial_data(item_id, webpage)
4203             if data.get('contents') or data.get('currentVideoEndpoint'):
4204                 break
4205             # Extract alerts here only when there is error
4206             self._extract_and_report_alerts(data)
4207             if count >= retries:
4208                 raise ExtractorError(last_error)
4209         return webpage, data
4210
4211     @staticmethod
4212     def _smuggle_data(entries, data):
4213         for entry in entries:
4214             if data:
4215                 entry['url'] = smuggle_url(entry['url'], data)
4216             yield entry
4217
4218     def _real_extract(self, url):
4219         url, smuggled_data = unsmuggle_url(url, {})
4220         if self.is_music_url(url):
4221             smuggled_data['is_music_url'] = True
4222         info_dict = self.__real_extract(url, smuggled_data)
4223         if info_dict.get('entries'):
4224             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4225         return info_dict
4226
4227     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4228
4229     def __real_extract(self, url, smuggled_data):
4230         item_id = self._match_id(url)
4231         url = compat_urlparse.urlunparse(
4232             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4233         compat_opts = self.get_param('compat_opts', [])
4234
4235         def get_mobj(url):
4236             mobj = self._url_re.match(url).groupdict()
4237             mobj.update((k, '') for k, v in mobj.items() if v is None)
4238             return mobj
4239
4240         mobj = get_mobj(url)
4241         # Youtube returns incomplete data if tabname is not lower case
4242         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4243
4244         if is_channel:
4245             if smuggled_data.get('is_music_url'):
4246                 if item_id[:2] == 'VL':
4247                     # Youtube music VL channels have an equivalent playlist
4248                     item_id = item_id[2:]
4249                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4250                 elif item_id[:2] == 'MP':
4251                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4252                     item_id = self._search_regex(
4253                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4254                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4255                         'playlist id')
4256                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4257                 elif mobj['channel_type'] == 'browse':
4258                     # Youtube music /browse/ should be changed to /channel/
4259                     pre = 'https://www.youtube.com/channel/%s' % item_id
4260         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4261             # Home URLs should redirect to /videos/
4262             self.report_warning(
4263                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4264                 'To download only the videos in the home page, add a "/featured" to the URL')
4265             tab = '/videos'
4266
4267         url = ''.join((pre, tab, post))
4268         mobj = get_mobj(url)
4269
4270         # Handle both video/playlist URLs
4271         qs = parse_qs(url)
4272         video_id = qs.get('v', [None])[0]
4273         playlist_id = qs.get('list', [None])[0]
4274
4275         if not video_id and mobj['not_channel'].startswith('watch'):
4276             if not playlist_id:
4277                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4278                 raise ExtractorError('Unable to recognize tab page')
4279             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4280             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4281             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4282             mobj = get_mobj(url)
4283
4284         if video_id and playlist_id:
4285             if self.get_param('noplaylist'):
4286                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4287                 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4288             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4289
4290         webpage, data = self._extract_webpage(url, item_id)
4291
4292         tabs = try_get(
4293             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4294         if tabs:
4295             selected_tab = self._extract_selected_tab(tabs)
4296             tab_name = selected_tab.get('title', '')
4297             if 'no-youtube-channel-redirect' not in compat_opts:
4298                 if mobj['tab'] == '/live':
4299                     # Live tab should have redirected to the video
4300                     raise ExtractorError('The channel is not currently live', expected=True)
4301                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4302                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4303                         # Topic channels don't have /videos. Use the equivalent playlist instead
4304                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4305                         pl_id = 'UU%s' % item_id[2:]
4306                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4307                         try:
4308                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4309                             for alert_type, alert_message in self._extract_alerts(pl_data):
4310                                 if alert_type == 'error':
4311                                     raise ExtractorError('Youtube said: %s' % alert_message)
4312                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4313                         except ExtractorError:
4314                             self.report_warning('The playlist gave error. Falling back to channel URL')
4315                     else:
4316                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4317
4318         self.write_debug('Final URL: %s' % url)
4319
4320         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4321         if 'no-youtube-unavailable-videos' not in compat_opts:
4322             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4323         self._extract_and_report_alerts(data, only_once=True)
4324         tabs = try_get(
4325             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4326         if tabs:
4327             return self._extract_from_tabs(item_id, webpage, data, tabs)
4328
4329         playlist = try_get(
4330             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4331         if playlist:
4332             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4333
4334         video_id = try_get(
4335             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4336             compat_str) or video_id
4337         if video_id:
4338             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4339                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4340             return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4341
4342         raise ExtractorError('Unable to recognize tab page')
4343
4344
4345 class YoutubePlaylistIE(InfoExtractor):
4346     IE_DESC = 'YouTube.com playlists'
4347     _VALID_URL = r'''(?x)(?:
4348                         (?:https?://)?
4349                         (?:\w+\.)?
4350                         (?:
4351                             (?:
4352                                 youtube(?:kids)?\.com|
4353                                 invidio\.us
4354                             )
4355                             /.*?\?.*?\blist=
4356                         )?
4357                         (?P<id>%(playlist_id)s)
4358                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4359     IE_NAME = 'youtube:playlist'
4360     _TESTS = [{
4361         'note': 'issue #673',
4362         'url': 'PLBB231211A4F62143',
4363         'info_dict': {
4364             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4365             'id': 'PLBB231211A4F62143',
4366             'uploader': 'Wickydoo',
4367             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4368             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4369         },
4370         'playlist_mincount': 29,
4371     }, {
4372         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4373         'info_dict': {
4374             'title': 'YDL_safe_search',
4375             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4376         },
4377         'playlist_count': 2,
4378         'skip': 'This playlist is private',
4379     }, {
4380         'note': 'embedded',
4381         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4382         'playlist_count': 4,
4383         'info_dict': {
4384             'title': 'JODA15',
4385             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4386             'uploader': 'milan',
4387             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4388         }
4389     }, {
4390         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4391         'playlist_mincount': 654,
4392         'info_dict': {
4393             'title': '2018 Chinese New Singles (11/6 updated)',
4394             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4395             'uploader': 'LBK',
4396             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4397             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4398         }
4399     }, {
4400         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4401         'only_matching': True,
4402     }, {
4403         # music album playlist
4404         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4405         'only_matching': True,
4406     }]
4407
4408     @classmethod
4409     def suitable(cls, url):
4410         if YoutubeTabIE.suitable(url):
4411             return False
4412         # Hack for lazy extractors until more generic solution is implemented
4413         # (see #28780)
4414         from .youtube import parse_qs
4415         qs = parse_qs(url)
4416         if qs.get('v', [None])[0]:
4417             return False
4418         return super(YoutubePlaylistIE, cls).suitable(url)
4419
4420     def _real_extract(self, url):
4421         playlist_id = self._match_id(url)
4422         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4423         url = update_url_query(
4424             'https://www.youtube.com/playlist',
4425             parse_qs(url) or {'list': playlist_id})
4426         if is_music_url:
4427             url = smuggle_url(url, {'is_music_url': True})
4428         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4429
4430
4431 class YoutubeYtBeIE(InfoExtractor):
4432     IE_DESC = 'youtu.be'
4433     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4434     _TESTS = [{
4435         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4436         'info_dict': {
4437             'id': 'yeWKywCrFtk',
4438             'ext': 'mp4',
4439             'title': 'Small Scale Baler and Braiding Rugs',
4440             'uploader': 'Backus-Page House Museum',
4441             'uploader_id': 'backuspagemuseum',
4442             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4443             'upload_date': '20161008',
4444             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4445             'categories': ['Nonprofits & Activism'],
4446             'tags': list,
4447             'like_count': int,
4448             'dislike_count': int,
4449         },
4450         'params': {
4451             'noplaylist': True,
4452             'skip_download': True,
4453         },
4454     }, {
4455         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4456         'only_matching': True,
4457     }]
4458
4459     def _real_extract(self, url):
4460         mobj = self._match_valid_url(url)
4461         video_id = mobj.group('id')
4462         playlist_id = mobj.group('playlist_id')
4463         return self.url_result(
4464             update_url_query('https://www.youtube.com/watch', {
4465                 'v': video_id,
4466                 'list': playlist_id,
4467                 'feature': 'youtu.be',
4468             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4469
4470
4471 class YoutubeYtUserIE(InfoExtractor):
4472     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4473     _VALID_URL = r'ytuser:(?P<id>.+)'
4474     _TESTS = [{
4475         'url': 'ytuser:phihag',
4476         'only_matching': True,
4477     }]
4478
4479     def _real_extract(self, url):
4480         user_id = self._match_id(url)
4481         return self.url_result(
4482             'https://www.youtube.com/user/%s' % user_id,
4483             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4484
4485
4486 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4487     IE_NAME = 'youtube:favorites'
4488     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4489     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4490     _LOGIN_REQUIRED = True
4491     _TESTS = [{
4492         'url': ':ytfav',
4493         'only_matching': True,
4494     }, {
4495         'url': ':ytfavorites',
4496         'only_matching': True,
4497     }]
4498
4499     def _real_extract(self, url):
4500         return self.url_result(
4501             'https://www.youtube.com/playlist?list=LL',
4502             ie=YoutubeTabIE.ie_key())
4503
4504
4505 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4506     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4507     # there doesn't appear to be a real limit, for example if you search for
4508     # 'python' you get more than 8.000.000 results
4509     _MAX_RESULTS = float('inf')
4510     IE_NAME = 'youtube:search'
4511     _SEARCH_KEY = 'ytsearch'
4512     _SEARCH_PARAMS = None
4513     _TESTS = []
4514
4515     def _entries(self, query, n):
4516         data = {'query': query}
4517         if self._SEARCH_PARAMS:
4518             data['params'] = self._SEARCH_PARAMS
4519         total = 0
4520         continuation = {}
4521         for page_num in itertools.count(1):
4522             data.update(continuation)
4523             search = self._extract_response(
4524                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4525                 check_get_keys=('contents', 'onResponseReceivedCommands')
4526             )
4527             if not search:
4528                 break
4529             slr_contents = try_get(
4530                 search,
4531                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4532                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4533                 list)
4534             if not slr_contents:
4535                 break
4536
4537             # Youtube sometimes adds promoted content to searches,
4538             # changing the index location of videos and token.
4539             # So we search through all entries till we find them.
4540             continuation = None
4541             for slr_content in slr_contents:
4542                 if not continuation:
4543                     continuation = self._extract_continuation({'contents': [slr_content]})
4544
4545                 isr_contents = try_get(
4546                     slr_content,
4547                     lambda x: x['itemSectionRenderer']['contents'],
4548                     list)
4549                 if not isr_contents:
4550                     continue
4551                 for content in isr_contents:
4552                     if not isinstance(content, dict):
4553                         continue
4554                     video = content.get('videoRenderer')
4555                     if not isinstance(video, dict):
4556                         continue
4557                     video_id = video.get('videoId')
4558                     if not video_id:
4559                         continue
4560
4561                     yield self._extract_video(video)
4562                     total += 1
4563                     if total == n:
4564                         return
4565
4566             if not continuation:
4567                 break
4568
4569     def _get_n_results(self, query, n):
4570         """Get a specified number of results for a query"""
4571         return self.playlist_result(self._entries(query, n), query, query)
4572
4573
4574 class YoutubeSearchDateIE(YoutubeSearchIE):
4575     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4576     _SEARCH_KEY = 'ytsearchdate'
4577     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4578     _SEARCH_PARAMS = 'CAI%3D'
4579
4580
4581 class YoutubeSearchURLIE(YoutubeSearchIE):
4582     IE_DESC = 'YouTube.com search URLs'
4583     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4584     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4585     # _MAX_RESULTS = 100
4586     _TESTS = [{
4587         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4588         'playlist_mincount': 5,
4589         'info_dict': {
4590             'id': 'youtube-dl test video',
4591             'title': 'youtube-dl test video',
4592         }
4593     }, {
4594         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4595         'only_matching': True,
4596     }]
4597
4598     @classmethod
4599     def _make_valid_url(cls):
4600         return cls._VALID_URL
4601
4602     def _real_extract(self, url):
4603         qs = parse_qs(url)
4604         query = (qs.get('search_query') or qs.get('q'))[0]
4605         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4606         return self._get_n_results(query, self._MAX_RESULTS)
4607
4608
4609 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4610     """
4611     Base class for feed extractors
4612     Subclasses must define the _FEED_NAME property.
4613     """
4614     _LOGIN_REQUIRED = True
4615     _TESTS = []
4616
4617     @property
4618     def IE_NAME(self):
4619         return 'youtube:%s' % self._FEED_NAME
4620
4621     def _real_extract(self, url):
4622         return self.url_result(
4623             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4624             ie=YoutubeTabIE.ie_key())
4625
4626
4627 class YoutubeWatchLaterIE(InfoExtractor):
4628     IE_NAME = 'youtube:watchlater'
4629     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4630     _VALID_URL = r':ytwatchlater'
4631     _TESTS = [{
4632         'url': ':ytwatchlater',
4633         'only_matching': True,
4634     }]
4635
4636     def _real_extract(self, url):
4637         return self.url_result(
4638             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4639
4640
4641 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4642     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4643     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4644     _FEED_NAME = 'recommended'
4645     _LOGIN_REQUIRED = False
4646     _TESTS = [{
4647         'url': ':ytrec',
4648         'only_matching': True,
4649     }, {
4650         'url': ':ytrecommended',
4651         'only_matching': True,
4652     }, {
4653         'url': 'https://youtube.com',
4654         'only_matching': True,
4655     }]
4656
4657
4658 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4659     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4660     _VALID_URL = r':ytsub(?:scription)?s?'
4661     _FEED_NAME = 'subscriptions'
4662     _TESTS = [{
4663         'url': ':ytsubs',
4664         'only_matching': True,
4665     }, {
4666         'url': ':ytsubscriptions',
4667         'only_matching': True,
4668     }]
4669
4670
4671 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4672     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4673     _VALID_URL = r':ythis(?:tory)?'
4674     _FEED_NAME = 'history'
4675     _TESTS = [{
4676         'url': ':ythistory',
4677         'only_matching': True,
4678     }]
4679
4680
4681 class YoutubeTruncatedURLIE(InfoExtractor):
4682     IE_NAME = 'youtube:truncated_url'
4683     IE_DESC = False  # Do not list
4684     _VALID_URL = r'''(?x)
4685         (?:https?://)?
4686         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4687         (?:watch\?(?:
4688             feature=[a-z_]+|
4689             annotation_id=annotation_[^&]+|
4690             x-yt-cl=[0-9]+|
4691             hl=[^&]*|
4692             t=[0-9]+
4693         )?
4694         |
4695             attribution_link\?a=[^&]+
4696         )
4697         $
4698     '''
4699
4700     _TESTS = [{
4701         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4702         'only_matching': True,
4703     }, {
4704         'url': 'https://www.youtube.com/watch?',
4705         'only_matching': True,
4706     }, {
4707         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4708         'only_matching': True,
4709     }, {
4710         'url': 'https://www.youtube.com/watch?feature=foo',
4711         'only_matching': True,
4712     }, {
4713         'url': 'https://www.youtube.com/watch?hl=en-GB',
4714         'only_matching': True,
4715     }, {
4716         'url': 'https://www.youtube.com/watch?t=2372',
4717         'only_matching': True,
4718     }]
4719
4720     def _real_extract(self, url):
4721         raise ExtractorError(
4722             'Did you forget to quote the URL? Remember that & is a meta '
4723             'character in most shells, so you want to put the URL in quotes, '
4724             'like  youtube-dl '
4725             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4726             ' or simply  youtube-dl BaW_jenozKc  .',
4727             expected=True)
4728
4729
4730 class YoutubeClipIE(InfoExtractor):
4731     IE_NAME = 'youtube:clip'
4732     IE_DESC = False  # Do not list
4733     _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4734
4735     def _real_extract(self, url):
4736         self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4737         return self.url_result(url, 'Generic')
4738
4739
4740 class YoutubeTruncatedIDIE(InfoExtractor):
4741     IE_NAME = 'youtube:truncated_id'
4742     IE_DESC = False  # Do not list
4743     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4744
4745     _TESTS = [{
4746         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4747         'only_matching': True,
4748     }]
4749
4750     def _real_extract(self, url):
4751         video_id = self._match_id(url)
4752         raise ExtractorError(
4753             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4754             expected=True)