yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     is_html,
  42     mimetype2ext,
  43     network_exceptions,
  44     orderedSet,
  45     parse_codecs,
  46     parse_count,
  47     parse_duration,
  48     parse_iso8601,
  49     parse_qs,
  50     qualities,
  51     remove_end,
  52     remove_start,
  53     smuggle_url,
  54     str_or_none,
  55     str_to_int,
  56     traverse_obj,
  57     try_get,
  58     unescapeHTML,
  59     unified_strdate,
  60     unsmuggle_url,
  61     update_url_query,
  62     url_or_none,
  63     urljoin,
  64     variadic,
  65 )
  66
  67
  68 # any clients starting with _ cannot be explicity requested by the user
  69 INNERTUBE_CLIENTS = {
  70     'web': {
  71         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  72         'INNERTUBE_CONTEXT': {
  73             'client': {
  74                 'clientName': 'WEB',
  75                 'clientVersion': '2.20210622.10.00',
  76             }
  77         },
  78         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  79     },
  80     'web_embedded': {
  81         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  82         'INNERTUBE_CONTEXT': {
  83             'client': {
  84                 'clientName': 'WEB_EMBEDDED_PLAYER',
  85                 'clientVersion': '1.20210620.0.1',
  86             },
  87         },
  88         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  89     },
  90     'web_music': {
  91         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  92         'INNERTUBE_HOST': 'music.youtube.com',
  93         'INNERTUBE_CONTEXT': {
  94             'client': {
  95                 'clientName': 'WEB_REMIX',
  96                 'clientVersion': '1.20210621.00.00',
  97             }
  98         },
  99         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 100     },
 101     'web_creator': {
 102         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 103         'INNERTUBE_CONTEXT': {
 104             'client': {
 105                 'clientName': 'WEB_CREATOR',
 106                 'clientVersion': '1.20210621.00.00',
 107             }
 108         },
 109         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 110     },
 111     'android': {
 112         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 113         'INNERTUBE_CONTEXT': {
 114             'client': {
 115                 'clientName': 'ANDROID',
 116                 'clientVersion': '16.20',
 117             }
 118         },
 119         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 120         'REQUIRE_JS_PLAYER': False
 121     },
 122     'android_embedded': {
 123         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 124         'INNERTUBE_CONTEXT': {
 125             'client': {
 126                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 127                 'clientVersion': '16.20',
 128             },
 129         },
 130         'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
 131         'REQUIRE_JS_PLAYER': False
 132     },
 133     'android_music': {
 134         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 135         'INNERTUBE_HOST': 'music.youtube.com',
 136         'INNERTUBE_CONTEXT': {
 137             'client': {
 138                 'clientName': 'ANDROID_MUSIC',
 139                 'clientVersion': '4.32',
 140             }
 141         },
 142         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 143         'REQUIRE_JS_PLAYER': False
 144     },
 145     'android_creator': {
 146         'INNERTUBE_CONTEXT': {
 147             'client': {
 148                 'clientName': 'ANDROID_CREATOR',
 149                 'clientVersion': '21.24.100',
 150             },
 151         },
 152         'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
 153         'REQUIRE_JS_PLAYER': False
 154     },
 155     # ios has HLS live streams
 156     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 157     'ios': {
 158         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 159         'INNERTUBE_CONTEXT': {
 160             'client': {
 161                 'clientName': 'IOS',
 162                 'clientVersion': '16.20',
 163             }
 164         },
 165         'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
 166         'REQUIRE_JS_PLAYER': False
 167     },
 168     'ios_embedded': {
 169         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 170         'INNERTUBE_CONTEXT': {
 171             'client': {
 172                 'clientName': 'IOS_MESSAGES_EXTENSION',
 173                 'clientVersion': '16.20',
 174             },
 175         },
 176         'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
 177         'REQUIRE_JS_PLAYER': False
 178     },
 179     'ios_music': {
 180         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 181         'INNERTUBE_HOST': 'music.youtube.com',
 182         'INNERTUBE_CONTEXT': {
 183             'client': {
 184                 'clientName': 'IOS_MUSIC',
 185                 'clientVersion': '4.32',
 186             },
 187         },
 188         'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
 189         'REQUIRE_JS_PLAYER': False
 190     },
 191     'ios_creator': {
 192         'INNERTUBE_CONTEXT': {
 193             'client': {
 194                 'clientName': 'IOS_CREATOR',
 195                 'clientVersion': '21.24.100',
 196             },
 197         },
 198         'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
 199         'REQUIRE_JS_PLAYER': False
 200     },
 201     # mweb has 'ultralow' formats
 202     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 203     'mweb': {
 204         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 205         'INNERTUBE_CONTEXT': {
 206             'client': {
 207                 'clientName': 'MWEB',
 208                 'clientVersion': '2.20210721.07.00',
 209             }
 210         },
 211         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 212     },
 213 }
 214
 215
 216 def build_innertube_clients():
 217     third_party = {
 218         'embedUrl': 'https://google.com',  # Can be any valid URL
 219     }
 220     base_clients = ('android', 'web', 'ios', 'mweb')
 221     priority = qualities(base_clients[::-1])
 222
 223     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 224         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 225         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 226         ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
 227         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 228         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 229
 230         if client in base_clients:
 231             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 232             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 233             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 234             agegate_ytcfg['priority'] -= 1
 235         elif client.endswith('_embedded'):
 236             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 237             ytcfg['priority'] -= 2
 238         else:
 239             ytcfg['priority'] -= 3
 240
 241
 242 build_innertube_clients()
 243
 244
 245 class YoutubeBaseInfoExtractor(InfoExtractor):
 246     """Provide base functions for Youtube extractors"""
 247
 248     _RESERVED_NAMES = (
 249         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
 250         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 251         r'browse|oembed|get_video_info|iframe_api|s/player|'
 252         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 253
 254     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 255
 256     _NETRC_MACHINE = 'youtube'
 257
 258     # If True it will raise an error if no login info is provided
 259     _LOGIN_REQUIRED = False
 260
 261     r'''  # Unused since login is broken
 262     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 263     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
 264
 265     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
 266     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
 267     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 268     '''
 269
 270     def _login(self):
 271         """
 272         Attempt to log in to YouTube.
 273         True is returned if successful or skipped.
 274         False is returned if login failed.
 275
 276         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 277         """
 278
 279         def warn(message):
 280             self.report_warning(message)
 281
 282         # username+password login is broken
 283         if (self._LOGIN_REQUIRED
 284                 and self.get_param('cookiefile') is None
 285                 and self.get_param('cookiesfrombrowser') is None):
 286             self.raise_login_required(
 287                 'Login details are needed to download this content', method='cookies')
 288         username, password = self._get_login_info()
 289         if username:
 290             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 291         return
 292
 293         # Everything below this is broken!
 294         r'''
 295         # No authentication to be performed
 296         if username is None:
 297             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 298                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 299             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 300             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 301             return True
 302
 303         login_page = self._download_webpage(
 304             self._LOGIN_URL, None,
 305             note='Downloading login page',
 306             errnote='unable to fetch login page', fatal=False)
 307         if login_page is False:
 308             return
 309
 310         login_form = self._hidden_inputs(login_page)
 311
 312         def req(url, f_req, note, errnote):
 313             data = login_form.copy()
 314             data.update({
 315                 'pstMsg': 1,
 316                 'checkConnection': 'youtube',
 317                 'checkedDomains': 'youtube',
 318                 'hl': 'en',
 319                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 320                 'f.req': json.dumps(f_req),
 321                 'flowName': 'GlifWebSignIn',
 322                 'flowEntry': 'ServiceLogin',
 323                 # TODO: reverse actual botguard identifier generation algo
 324                 'bgRequest': '["identifier",""]',
 325             })
 326             return self._download_json(
 327                 url, None, note=note, errnote=errnote,
 328                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 329                 fatal=False,
 330                 data=urlencode_postdata(data), headers={
 331                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 332                     'Google-Accounts-XSRF': 1,
 333                 })
 334
 335         lookup_req = [
 336             username,
 337             None, [], None, 'US', None, None, 2, False, True,
 338             [
 339                 None, None,
 340                 [2, 1, None, 1,
 341                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 342                  None, [], 4],
 343                 1, [None, None, []], None, None, None, True
 344             ],
 345             username,
 346         ]
 347
 348         lookup_results = req(
 349             self._LOOKUP_URL, lookup_req,
 350             'Looking up account info', 'Unable to look up account info')
 351
 352         if lookup_results is False:
 353             return False
 354
 355         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 356         if not user_hash:
 357             warn('Unable to extract user hash')
 358             return False
 359
 360         challenge_req = [
 361             user_hash,
 362             None, 1, None, [1, None, None, None, [password, None, True]],
 363             [
 364                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 365                 1, [None, None, []], None, None, None, True
 366             ]]
 367
 368         challenge_results = req(
 369             self._CHALLENGE_URL, challenge_req,
 370             'Logging in', 'Unable to log in')
 371
 372         if challenge_results is False:
 373             return
 374
 375         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 376         if login_res:
 377             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 378             warn(
 379                 'Unable to login: %s' % 'Invalid password'
 380                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 381             return False
 382
 383         res = try_get(challenge_results, lambda x: x[0][-1], list)
 384         if not res:
 385             warn('Unable to extract result entry')
 386             return False
 387
 388         login_challenge = try_get(res, lambda x: x[0][0], list)
 389         if login_challenge:
 390             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 391             if challenge_str == 'TWO_STEP_VERIFICATION':
 392                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 393                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 394                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 395                 if status == 'QUOTA_EXCEEDED':
 396                     warn('Exceeded the limit of TFA codes, try later')
 397                     return False
 398
 399                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 400                 if not tl:
 401                     warn('Unable to extract TL')
 402                     return False
 403
 404                 tfa_code = self._get_tfa_info('2-step verification code')
 405
 406                 if not tfa_code:
 407                     warn(
 408                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 409                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 410                     return False
 411
 412                 tfa_code = remove_start(tfa_code, 'G-')
 413
 414                 tfa_req = [
 415                     user_hash, None, 2, None,
 416                     [
 417                         9, None, None, None, None, None, None, None,
 418                         [None, tfa_code, True, 2]
 419                     ]]
 420
 421                 tfa_results = req(
 422                     self._TFA_URL.format(tl), tfa_req,
 423                     'Submitting TFA code', 'Unable to submit TFA code')
 424
 425                 if tfa_results is False:
 426                     return False
 427
 428                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 429                 if tfa_res:
 430                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 431                     warn(
 432                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 433                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 434                     return False
 435
 436                 check_cookie_url = try_get(
 437                     tfa_results, lambda x: x[0][-1][2], compat_str)
 438             else:
 439                 CHALLENGES = {
 440                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 441                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 442                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 443                 }
 444                 challenge = CHALLENGES.get(
 445                     challenge_str,
 446                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 447                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 448                 return False
 449         else:
 450             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 451
 452         if not check_cookie_url:
 453             warn('Unable to extract CheckCookie URL')
 454             return False
 455
 456         check_cookie_results = self._download_webpage(
 457             check_cookie_url, None, 'Checking cookie', fatal=False)
 458
 459         if check_cookie_results is False:
 460             return False
 461
 462         if 'https://myaccount.google.com/' not in check_cookie_results:
 463             warn('Unable to log in')
 464             return False
 465
 466         return True
 467         '''
 468
 469     def _initialize_consent(self):
 470         cookies = self._get_cookies('https://www.youtube.com/')
 471         if cookies.get('__Secure-3PSID'):
 472             return
 473         consent_id = None
 474         consent = cookies.get('CONSENT')
 475         if consent:
 476             if 'YES' in consent.value:
 477                 return
 478             consent_id = self._search_regex(
 479                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 480         if not consent_id:
 481             consent_id = random.randint(100, 999)
 482         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 483
 484     def _real_initialize(self):
 485         self._initialize_consent()
 486         if self._downloader is None:
 487             return
 488         if not self._login():
 489             return
 490
 491     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 492     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 493     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 494
 495     def _get_default_ytcfg(self, client='web'):
 496         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 497
 498     def _get_innertube_host(self, client='web'):
 499         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 500
 501     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 502         # try_get but with fallback to default ytcfg client values when present
 503         _func = lambda y: try_get(y, getter, expected_type)
 504         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 505
 506     def _extract_client_name(self, ytcfg, default_client='web'):
 507         return self._ytcfg_get_safe(
 508             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 509                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 510
 511     def _extract_client_version(self, ytcfg, default_client='web'):
 512         return self._ytcfg_get_safe(
 513             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 514                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 515
 516     def _extract_api_key(self, ytcfg=None, default_client='web'):
 517         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 518
 519     def _extract_context(self, ytcfg=None, default_client='web'):
 520         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 521         context = _get_context(ytcfg)
 522         if context:
 523             return context
 524
 525         context = _get_context(self._get_default_ytcfg(default_client))
 526         if not ytcfg:
 527             return context
 528
 529         # Recreate the client context (required)
 530         context['client'].update({
 531             'clientVersion': self._extract_client_version(ytcfg, default_client),
 532             'clientName': self._extract_client_name(ytcfg, default_client),
 533         })
 534         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 535         if visitor_data:
 536             context['client']['visitorData'] = visitor_data
 537         return context
 538
 539     _SAPISID = None
 540
 541     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 542         time_now = round(time.time())
 543         if self._SAPISID is None:
 544             yt_cookies = self._get_cookies('https://www.youtube.com')
 545             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 546             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 547             sapisid_cookie = dict_get(
 548                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 549             if sapisid_cookie and sapisid_cookie.value:
 550                 self._SAPISID = sapisid_cookie.value
 551                 self.write_debug('Extracted SAPISID cookie')
 552                 # SAPISID cookie is required if not already present
 553                 if not yt_cookies.get('SAPISID'):
 554                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 555                     self._set_cookie(
 556                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 557             else:
 558                 self._SAPISID = False
 559         if not self._SAPISID:
 560             return None
 561         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 562         sapisidhash = hashlib.sha1(
 563             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 564         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 565
 566     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 567                   note='Downloading API JSON', errnote='Unable to download API page',
 568                   context=None, api_key=None, api_hostname=None, default_client='web'):
 569
 570         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 571         data.update(query)
 572         real_headers = self.generate_api_headers(default_client=default_client)
 573         real_headers.update({'content-type': 'application/json'})
 574         if headers:
 575             real_headers.update(headers)
 576         return self._download_json(
 577             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 578             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 579             data=json.dumps(data).encode('utf8'), headers=real_headers,
 580             query={'key': api_key or self._extract_api_key()})
 581
 582     def extract_yt_initial_data(self, video_id, webpage):
 583         return self._parse_json(
 584             self._search_regex(
 585                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 586                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 587             video_id)
 588
 589     @staticmethod
 590     def _extract_session_index(*data):
 591         """
 592         Index of current account in account list.
 593         See: https://github.com/yt-dlp/yt-dlp/pull/519
 594         """
 595         for ytcfg in data:
 596             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 597             if session_index is not None:
 598                 return session_index
 599
 600     # Deprecated?
 601     def _extract_identity_token(self, ytcfg=None, webpage=None):
 602         if ytcfg:
 603             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 604             if token:
 605                 return token
 606         if webpage:
 607             return self._search_regex(
 608                 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 609                 'identity token', default=None, fatal=False)
 610
 611     @staticmethod
 612     def _extract_account_syncid(*args):
 613         """
 614         Extract syncId required to download private playlists of secondary channels
 615         @params response and/or ytcfg
 616         """
 617         for data in args:
 618             # ytcfg includes channel_syncid if on secondary channel
 619             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 620             if delegated_sid:
 621                 return delegated_sid
 622             sync_ids = (try_get(
 623                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 624                        lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
 625             if len(sync_ids) >= 2 and sync_ids[1]:
 626                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 627                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 628                 return sync_ids[0]
 629
 630     @property
 631     def is_authenticated(self):
 632         return bool(self._generate_sapisidhash_header())
 633
 634     def extract_ytcfg(self, video_id, webpage):
 635         if not webpage:
 636             return {}
 637         return self._parse_json(
 638             self._search_regex(
 639                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 640                 default='{}'), video_id, fatal=False) or {}
 641
 642     def generate_api_headers(
 643             self, *, ytcfg=None, account_syncid=None, session_index=None,
 644             visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
 645
 646         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 647         headers = {
 648             'X-YouTube-Client-Name': compat_str(
 649                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 650             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 651             'Origin': origin,
 652             'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
 653             'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
 654             'X-Goog-Visitor-Id': visitor_data or try_get(
 655                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 656         }
 657         if session_index is None:
 658             session_index = self._extract_session_index(ytcfg)
 659         if account_syncid or session_index is not None:
 660             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 661
 662         auth = self._generate_sapisidhash_header(origin)
 663         if auth is not None:
 664             headers['Authorization'] = auth
 665             headers['X-Origin'] = origin
 666         return {h: v for h, v in headers.items() if v is not None}
 667
 668     @staticmethod
 669     def _build_api_continuation_query(continuation, ctp=None):
 670         query = {
 671             'continuation': continuation
 672         }
 673         # TODO: Inconsistency with clickTrackingParams.
 674         # Currently we have a fixed ctp contained within context (from ytcfg)
 675         # and a ctp in root query for continuation.
 676         if ctp:
 677             query['clickTracking'] = {'clickTrackingParams': ctp}
 678         return query
 679
 680     @classmethod
 681     def _extract_next_continuation_data(cls, renderer):
 682         next_continuation = try_get(
 683             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 684                        lambda x: x['continuation']['reloadContinuationData']), dict)
 685         if not next_continuation:
 686             return
 687         continuation = next_continuation.get('continuation')
 688         if not continuation:
 689             return
 690         ctp = next_continuation.get('clickTrackingParams')
 691         return cls._build_api_continuation_query(continuation, ctp)
 692
 693     @classmethod
 694     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 695         if isinstance(continuation_ep, dict):
 696             continuation = try_get(
 697                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 698             if not continuation:
 699                 return
 700             ctp = continuation_ep.get('clickTrackingParams')
 701             return cls._build_api_continuation_query(continuation, ctp)
 702
 703     @classmethod
 704     def _extract_continuation(cls, renderer):
 705         next_continuation = cls._extract_next_continuation_data(renderer)
 706         if next_continuation:
 707             return next_continuation
 708
 709         contents = []
 710         for key in ('contents', 'items'):
 711             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 712
 713         for content in contents:
 714             if not isinstance(content, dict):
 715                 continue
 716             continuation_ep = try_get(
 717                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 718                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 719                 dict)
 720             continuation = cls._extract_continuation_ep_data(continuation_ep)
 721             if continuation:
 722                 return continuation
 723
 724     @classmethod
 725     def _extract_alerts(cls, data):
 726         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 727             if not isinstance(alert_dict, dict):
 728                 continue
 729             for alert in alert_dict.values():
 730                 alert_type = alert.get('type')
 731                 if not alert_type:
 732                     continue
 733                 message = cls._get_text(alert, 'text')
 734                 if message:
 735                     yield alert_type, message
 736
 737     def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
 738         errors = []
 739         warnings = []
 740         for alert_type, alert_message in alerts:
 741             if alert_type.lower() == 'error' and fatal:
 742                 errors.append([alert_type, alert_message])
 743             else:
 744                 warnings.append([alert_type, alert_message])
 745
 746         for alert_type, alert_message in (warnings + errors[:-1]):
 747             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
 748         if errors:
 749             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 750
 751     def _extract_and_report_alerts(self, data, *args, **kwargs):
 752         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 753
 754     def _extract_badges(self, renderer: dict):
 755         badges = set()
 756         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 757             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 758             if label:
 759                 badges.add(label.lower())
 760         return badges
 761
 762     @staticmethod
 763     def _get_text(data, *path_list, max_runs=None):
 764         for path in path_list or [None]:
 765             if path is None:
 766                 obj = [data]
 767             else:
 768                 obj = traverse_obj(data, path, default=[])
 769                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 770                     obj = [obj]
 771             for item in obj:
 772                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 773                 if text:
 774                     return text
 775                 runs = try_get(item, lambda x: x['runs'], list) or []
 776                 if not runs and isinstance(item, list):
 777                     runs = item
 778
 779                 runs = runs[:min(len(runs), max_runs or len(runs))]
 780                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 781                 if text:
 782                     return text
 783
 784     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 785                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 786                           default_client='web'):
 787         response = None
 788         last_error = None
 789         count = -1
 790         retries = self.get_param('extractor_retries', 3)
 791         if check_get_keys is None:
 792             check_get_keys = []
 793         while count < retries:
 794             count += 1
 795             if last_error:
 796                 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
 797             try:
 798                 response = self._call_api(
 799                     ep=ep, fatal=True, headers=headers,
 800                     video_id=item_id, query=query,
 801                     context=self._extract_context(ytcfg, default_client),
 802                     api_key=self._extract_api_key(ytcfg, default_client),
 803                     api_hostname=api_hostname, default_client=default_client,
 804                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 805             except ExtractorError as e:
 806                 if isinstance(e.cause, network_exceptions):
 807                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 808                         e.cause.seek(0)
 809                         yt_error = try_get(
 810                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 811                             lambda x: x['error']['message'], compat_str)
 812                         if yt_error:
 813                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 814                     # Downloading page may result in intermittent 5xx HTTP error
 815                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 816                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 817                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 818                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 819                         last_error = error_to_compat_str(e.cause or e.msg)
 820                         if count < retries:
 821                             continue
 822                 if fatal:
 823                     raise
 824                 else:
 825                     self.report_warning(error_to_compat_str(e))
 826                     return
 827
 828             else:
 829                 # Youtube may send alerts if there was an issue with the continuation page
 830                 try:
 831                     self._extract_and_report_alerts(response, expected=False, only_once=True)
 832                 except ExtractorError as e:
 833                     # YouTube servers may return errors we want to retry on in a 200 OK response
 834                     # See: https://github.com/yt-dlp/yt-dlp/issues/839
 835                     if 'unknown error' in e.msg.lower():
 836                         last_error = e.msg
 837                         continue
 838                     if fatal:
 839                         raise
 840                     self.report_warning(error_to_compat_str(e))
 841                     return
 842                 if not check_get_keys or dict_get(response, check_get_keys):
 843                     break
 844                 # Youtube sometimes sends incomplete data
 845                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 846                 last_error = 'Incomplete data received'
 847                 if count >= retries:
 848                     if fatal:
 849                         raise ExtractorError(last_error)
 850                     else:
 851                         self.report_warning(last_error)
 852                         return
 853         return response
 854
 855     @staticmethod
 856     def is_music_url(url):
 857         return re.match(r'https?://music\.youtube\.com/', url) is not None
 858
 859     def _extract_video(self, renderer):
 860         video_id = renderer.get('videoId')
 861         title = self._get_text(renderer, 'title')
 862         description = self._get_text(renderer, 'descriptionSnippet')
 863         duration = parse_duration(self._get_text(
 864             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 865         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 866         view_count = str_to_int(self._search_regex(
 867             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 868             'view count', default=None))
 869
 870         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 871
 872         return {
 873             '_type': 'url',
 874             'ie_key': YoutubeIE.ie_key(),
 875             'id': video_id,
 876             'url': f'https://www.youtube.com/watch?v={video_id}',
 877             'title': title,
 878             'description': description,
 879             'duration': duration,
 880             'view_count': view_count,
 881             'uploader': uploader,
 882         }
 883
 884
 885 class YoutubeIE(YoutubeBaseInfoExtractor):
 886     IE_DESC = 'YouTube.com'
 887     _INVIDIOUS_SITES = (
 888         # invidious-redirect websites
 889         r'(?:www\.)?redirect\.invidious\.io',
 890         r'(?:(?:www|dev)\.)?invidio\.us',
 891         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 892         r'(?:www\.)?invidious\.pussthecat\.org',
 893         r'(?:www\.)?invidious\.zee\.li',
 894         r'(?:www\.)?invidious\.ethibox\.fr',
 895         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 896         # youtube-dl invidious instances list
 897         r'(?:(?:www|no)\.)?invidiou\.sh',
 898         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 899         r'(?:www\.)?invidious\.kabi\.tk',
 900         r'(?:www\.)?invidious\.mastodon\.host',
 901         r'(?:www\.)?invidious\.zapashcanon\.fr',
 902         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 903         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 904         r'(?:www\.)?invidious\.himiko\.cloud',
 905         r'(?:www\.)?invidious\.reallyancient\.tech',
 906         r'(?:www\.)?invidious\.tube',
 907         r'(?:www\.)?invidiou\.site',
 908         r'(?:www\.)?invidious\.site',
 909         r'(?:www\.)?invidious\.xyz',
 910         r'(?:www\.)?invidious\.nixnet\.xyz',
 911         r'(?:www\.)?invidious\.048596\.xyz',
 912         r'(?:www\.)?invidious\.drycat\.fr',
 913         r'(?:www\.)?inv\.skyn3t\.in',
 914         r'(?:www\.)?tube\.poal\.co',
 915         r'(?:www\.)?tube\.connect\.cafe',
 916         r'(?:www\.)?vid\.wxzm\.sx',
 917         r'(?:www\.)?vid\.mint\.lgbt',
 918         r'(?:www\.)?vid\.puffyan\.us',
 919         r'(?:www\.)?yewtu\.be',
 920         r'(?:www\.)?yt\.elukerio\.org',
 921         r'(?:www\.)?yt\.lelux\.fi',
 922         r'(?:www\.)?invidious\.ggc-project\.de',
 923         r'(?:www\.)?yt\.maisputain\.ovh',
 924         r'(?:www\.)?ytprivate\.com',
 925         r'(?:www\.)?invidious\.13ad\.de',
 926         r'(?:www\.)?invidious\.toot\.koeln',
 927         r'(?:www\.)?invidious\.fdn\.fr',
 928         r'(?:www\.)?watch\.nettohikari\.com',
 929         r'(?:www\.)?invidious\.namazso\.eu',
 930         r'(?:www\.)?invidious\.silkky\.cloud',
 931         r'(?:www\.)?invidious\.exonip\.de',
 932         r'(?:www\.)?invidious\.riverside\.rocks',
 933         r'(?:www\.)?invidious\.blamefran\.net',
 934         r'(?:www\.)?invidious\.moomoo\.de',
 935         r'(?:www\.)?ytb\.trom\.tf',
 936         r'(?:www\.)?yt\.cyberhost\.uk',
 937         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 938         r'(?:www\.)?qklhadlycap4cnod\.onion',
 939         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 940         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 941         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 942         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 943         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 944         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 945         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 946         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 947         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 948         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 949     )
 950     _VALID_URL = r"""(?x)^
 951                      (
 952                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 953                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 954                             (?:www\.)?deturl\.com/www\.youtube\.com|
 955                             (?:www\.)?pwnyoutube\.com|
 956                             (?:www\.)?hooktube\.com|
 957                             (?:www\.)?yourepeat\.com|
 958                             tube\.majestyc\.net|
 959                             %(invidious)s|
 960                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 961                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 962                          (?:                                                  # the various things that can precede the ID:
 963                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 964                              |(?:                                             # or the v= param in all its forms
 965                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 966                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 967                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 968                                  v=
 969                              )
 970                          ))
 971                          |(?:
 972                             youtu\.be|                                        # just youtu.be/xxxx
 973                             vid\.plus|                                        # or vid.plus/xxxx
 974                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 975                             %(invidious)s
 976                          )/
 977                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 978                          )
 979                      )?                                                       # all until now is optional -> you can pass the naked ID
 980                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 981                      (?(1).+)?                                                # if we found the ID, everything can follow
 982                      (?:\#|$)""" % {
 983         'invidious': '|'.join(_INVIDIOUS_SITES),
 984     }
 985     _PLAYER_INFO_RE = (
 986         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 987         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 988         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 989     )
 990     _formats = {
 991         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 992         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 993         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 994         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 995         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 996         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 997         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 998         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 999         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1000         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1001         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1002         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1003         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1004         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1005         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1006         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1007         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1008         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1009
1010
1011         # 3D videos
1012         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1013         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1014         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1015         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1016         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1017         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1018         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1019
1020         # Apple HTTP Live Streaming
1021         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1022         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1023         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1024         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1025         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1026         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1027         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1028         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1029
1030         # DASH mp4 video
1031         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1032         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1033         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1034         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1035         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1036         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1037         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1038         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1039         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1040         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1041         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1042         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1043
1044         # Dash mp4 audio
1045         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1046         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1047         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1048         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1049         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1050         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1051         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1052
1053         # Dash webm
1054         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1055         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1056         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1057         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1058         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1059         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1060         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1061         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1062         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1063         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1064         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1065         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1066         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1067         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1068         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1069         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1070         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1071         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1072         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1073         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1074         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1075         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1076
1077         # Dash webm audio
1078         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1079         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1080
1081         # Dash webm audio with opus inside
1082         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1083         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1084         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1085
1086         # RTMP (unnamed)
1087         '_rtmp': {'protocol': 'rtmp'},
1088
1089         # av01 video only formats sometimes served with "unknown" codecs
1090         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1091         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1092         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1093         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1094         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1095         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1096         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1097         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1098     }
1099     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1100
1101     _GEO_BYPASS = False
1102
1103     IE_NAME = 'youtube'
1104     _TESTS = [
1105         {
1106             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1107             'info_dict': {
1108                 'id': 'BaW_jenozKc',
1109                 'ext': 'mp4',
1110                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1111                 'uploader': 'Philipp Hagemeister',
1112                 'uploader_id': 'phihag',
1113                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1114                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1115                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1116                 'upload_date': '20121002',
1117                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1118                 'categories': ['Science & Technology'],
1119                 'tags': ['youtube-dl'],
1120                 'duration': 10,
1121                 'view_count': int,
1122                 'like_count': int,
1123                 'dislike_count': int,
1124                 'start_time': 1,
1125                 'end_time': 9,
1126             }
1127         },
1128         {
1129             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1130             'note': 'Embed-only video (#1746)',
1131             'info_dict': {
1132                 'id': 'yZIXLfi8CZQ',
1133                 'ext': 'mp4',
1134                 'upload_date': '20120608',
1135                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1136                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1137                 'uploader': 'SET India',
1138                 'uploader_id': 'setindia',
1139                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1140                 'age_limit': 18,
1141             },
1142             'skip': 'Private video',
1143         },
1144         {
1145             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1146             'note': 'Use the first video ID in the URL',
1147             'info_dict': {
1148                 'id': 'BaW_jenozKc',
1149                 'ext': 'mp4',
1150                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1151                 'uploader': 'Philipp Hagemeister',
1152                 'uploader_id': 'phihag',
1153                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1154                 'upload_date': '20121002',
1155                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1156                 'categories': ['Science & Technology'],
1157                 'tags': ['youtube-dl'],
1158                 'duration': 10,
1159                 'view_count': int,
1160                 'like_count': int,
1161                 'dislike_count': int,
1162             },
1163             'params': {
1164                 'skip_download': True,
1165             },
1166         },
1167         {
1168             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1169             'note': '256k DASH audio (format 141) via DASH manifest',
1170             'info_dict': {
1171                 'id': 'a9LDPn-MO4I',
1172                 'ext': 'm4a',
1173                 'upload_date': '20121002',
1174                 'uploader_id': '8KVIDEO',
1175                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1176                 'description': '',
1177                 'uploader': '8KVIDEO',
1178                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1179             },
1180             'params': {
1181                 'youtube_include_dash_manifest': True,
1182                 'format': '141',
1183             },
1184             'skip': 'format 141 not served anymore',
1185         },
1186         # DASH manifest with encrypted signature
1187         {
1188             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1189             'info_dict': {
1190                 'id': 'IB3lcPjvWLA',
1191                 'ext': 'm4a',
1192                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1193                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1194                 'duration': 244,
1195                 'uploader': 'AfrojackVEVO',
1196                 'uploader_id': 'AfrojackVEVO',
1197                 'upload_date': '20131011',
1198                 'abr': 129.495,
1199             },
1200             'params': {
1201                 'youtube_include_dash_manifest': True,
1202                 'format': '141/bestaudio[ext=m4a]',
1203             },
1204         },
1205         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1206         {
1207             'note': 'Embed allowed age-gate video',
1208             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1209             'info_dict': {
1210                 'id': 'HtVdAasjOgU',
1211                 'ext': 'mp4',
1212                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1213                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1214                 'duration': 142,
1215                 'uploader': 'The Witcher',
1216                 'uploader_id': 'WitcherGame',
1217                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1218                 'upload_date': '20140605',
1219                 'age_limit': 18,
1220             },
1221         },
1222         {
1223             'note': 'Age-gate video with embed allowed in public site',
1224             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1225             'info_dict': {
1226                 'id': 'HsUATh_Nc2U',
1227                 'ext': 'mp4',
1228                 'title': 'Godzilla 2 (Official Video)',
1229                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1230                 'upload_date': '20200408',
1231                 'uploader_id': 'FlyingKitty900',
1232                 'uploader': 'FlyingKitty',
1233                 'age_limit': 18,
1234             },
1235         },
1236         {
1237             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1238             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1239             'info_dict': {
1240                 'id': 'Tq92D6wQ1mg',
1241                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1242                 'ext': 'mp4',
1243                 'upload_date': '20191227',
1244                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1245                 'uploader': 'Projekt Melody',
1246                 'description': 'md5:17eccca93a786d51bc67646756894066',
1247                 'age_limit': 18,
1248             },
1249         },
1250         {
1251             'note': 'Non-Agegated non-embeddable video',
1252             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1253             'info_dict': {
1254                 'id': 'MeJVWBSsPAY',
1255                 'ext': 'mp4',
1256                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1257                 'uploader': 'Herr Lurik',
1258                 'uploader_id': 'st3in234',
1259                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1260                 'upload_date': '20130730',
1261             },
1262         },
1263         {
1264             'note': 'Non-bypassable age-gated video',
1265             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1266             'only_matching': True,
1267         },
1268         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1269         # YouTube Red ad is not captured for creator
1270         {
1271             'url': '__2ABJjxzNo',
1272             'info_dict': {
1273                 'id': '__2ABJjxzNo',
1274                 'ext': 'mp4',
1275                 'duration': 266,
1276                 'upload_date': '20100430',
1277                 'uploader_id': 'deadmau5',
1278                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1279                 'creator': 'deadmau5',
1280                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1281                 'uploader': 'deadmau5',
1282                 'title': 'Deadmau5 - Some Chords (HD)',
1283                 'alt_title': 'Some Chords',
1284             },
1285             'expected_warnings': [
1286                 'DASH manifest missing',
1287             ]
1288         },
1289         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1290         {
1291             'url': 'lqQg6PlCWgI',
1292             'info_dict': {
1293                 'id': 'lqQg6PlCWgI',
1294                 'ext': 'mp4',
1295                 'duration': 6085,
1296                 'upload_date': '20150827',
1297                 'uploader_id': 'olympic',
1298                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1299                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1300                 'uploader': 'Olympics',
1301                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1302             },
1303             'params': {
1304                 'skip_download': 'requires avconv',
1305             }
1306         },
1307         # Non-square pixels
1308         {
1309             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1310             'info_dict': {
1311                 'id': '_b-2C3KPAM0',
1312                 'ext': 'mp4',
1313                 'stretched_ratio': 16 / 9.,
1314                 'duration': 85,
1315                 'upload_date': '20110310',
1316                 'uploader_id': 'AllenMeow',
1317                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1318                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1319                 'uploader': '孫ᄋᄅ',
1320                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1321             },
1322         },
1323         # url_encoded_fmt_stream_map is empty string
1324         {
1325             'url': 'qEJwOuvDf7I',
1326             'info_dict': {
1327                 'id': 'qEJwOuvDf7I',
1328                 'ext': 'webm',
1329                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1330                 'description': '',
1331                 'upload_date': '20150404',
1332                 'uploader_id': 'spbelect',
1333                 'uploader': 'Наблюдатели Петербурга',
1334             },
1335             'params': {
1336                 'skip_download': 'requires avconv',
1337             },
1338             'skip': 'This live event has ended.',
1339         },
1340         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1341         {
1342             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1343             'info_dict': {
1344                 'id': 'FIl7x6_3R5Y',
1345                 'ext': 'webm',
1346                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1347                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1348                 'duration': 220,
1349                 'upload_date': '20150625',
1350                 'uploader_id': 'dorappi2000',
1351                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1352                 'uploader': 'dorappi2000',
1353                 'formats': 'mincount:31',
1354             },
1355             'skip': 'not actual anymore',
1356         },
1357         # DASH manifest with segment_list
1358         {
1359             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1360             'md5': '8ce563a1d667b599d21064e982ab9e31',
1361             'info_dict': {
1362                 'id': 'CsmdDsKjzN8',
1363                 'ext': 'mp4',
1364                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1365                 'uploader': 'Airtek',
1366                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1367                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1368                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1369             },
1370             'params': {
1371                 'youtube_include_dash_manifest': True,
1372                 'format': '135',  # bestvideo
1373             },
1374             'skip': 'This live event has ended.',
1375         },
1376         {
1377             # Multifeed videos (multiple cameras), URL is for Main Camera
1378             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1379             'info_dict': {
1380                 'id': 'jvGDaLqkpTg',
1381                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1382                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1383             },
1384             'playlist': [{
1385                 'info_dict': {
1386                     'id': 'jvGDaLqkpTg',
1387                     'ext': 'mp4',
1388                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1389                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1390                     'duration': 10643,
1391                     'upload_date': '20161111',
1392                     'uploader': 'Team PGP',
1393                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1394                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1395                 },
1396             }, {
1397                 'info_dict': {
1398                     'id': '3AKt1R1aDnw',
1399                     'ext': 'mp4',
1400                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1401                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1402                     'duration': 10991,
1403                     'upload_date': '20161111',
1404                     'uploader': 'Team PGP',
1405                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1406                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1407                 },
1408             }, {
1409                 'info_dict': {
1410                     'id': 'RtAMM00gpVc',
1411                     'ext': 'mp4',
1412                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1413                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1414                     'duration': 10995,
1415                     'upload_date': '20161111',
1416                     'uploader': 'Team PGP',
1417                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1418                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1419                 },
1420             }, {
1421                 'info_dict': {
1422                     'id': '6N2fdlP3C5U',
1423                     'ext': 'mp4',
1424                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1425                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1426                     'duration': 10990,
1427                     'upload_date': '20161111',
1428                     'uploader': 'Team PGP',
1429                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1430                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1431                 },
1432             }],
1433             'params': {
1434                 'skip_download': True,
1435             },
1436             'skip': 'Not multifeed anymore',
1437         },
1438         {
1439             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1440             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1441             'info_dict': {
1442                 'id': 'gVfLd0zydlo',
1443                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1444             },
1445             'playlist_count': 2,
1446             'skip': 'Not multifeed anymore',
1447         },
1448         {
1449             'url': 'https://vid.plus/FlRa-iH7PGw',
1450             'only_matching': True,
1451         },
1452         {
1453             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1454             'only_matching': True,
1455         },
1456         {
1457             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1458             # Also tests cut-off URL expansion in video description (see
1459             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1460             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1461             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1462             'info_dict': {
1463                 'id': 'lsguqyKfVQg',
1464                 'ext': 'mp4',
1465                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1466                 'alt_title': 'Dark Walk',
1467                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1468                 'duration': 133,
1469                 'upload_date': '20151119',
1470                 'uploader_id': 'IronSoulElf',
1471                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1472                 'uploader': 'IronSoulElf',
1473                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1474                 'track': 'Dark Walk',
1475                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1476                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1477             },
1478             'params': {
1479                 'skip_download': True,
1480             },
1481         },
1482         {
1483             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1484             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1485             'only_matching': True,
1486         },
1487         {
1488             # Video with yt:stretch=17:0
1489             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1490             'info_dict': {
1491                 'id': 'Q39EVAstoRM',
1492                 'ext': 'mp4',
1493                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1494                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1495                 'upload_date': '20151107',
1496                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1497                 'uploader': 'CH GAMER DROID',
1498             },
1499             'params': {
1500                 'skip_download': True,
1501             },
1502             'skip': 'This video does not exist.',
1503         },
1504         {
1505             # Video with incomplete 'yt:stretch=16:'
1506             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1507             'only_matching': True,
1508         },
1509         {
1510             # Video licensed under Creative Commons
1511             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1512             'info_dict': {
1513                 'id': 'M4gD1WSo5mA',
1514                 'ext': 'mp4',
1515                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1516                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1517                 'duration': 721,
1518                 'upload_date': '20150127',
1519                 'uploader_id': 'BerkmanCenter',
1520                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1521                 'uploader': 'The Berkman Klein Center for Internet & Society',
1522                 'license': 'Creative Commons Attribution license (reuse allowed)',
1523             },
1524             'params': {
1525                 'skip_download': True,
1526             },
1527         },
1528         {
1529             # Channel-like uploader_url
1530             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1531             'info_dict': {
1532                 'id': 'eQcmzGIKrzg',
1533                 'ext': 'mp4',
1534                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1535                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1536                 'duration': 4060,
1537                 'upload_date': '20151119',
1538                 'uploader': 'Bernie Sanders',
1539                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1540                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1541                 'license': 'Creative Commons Attribution license (reuse allowed)',
1542             },
1543             'params': {
1544                 'skip_download': True,
1545             },
1546         },
1547         {
1548             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1549             'only_matching': True,
1550         },
1551         {
1552             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1553             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1554             'only_matching': True,
1555         },
1556         {
1557             # Rental video preview
1558             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1559             'info_dict': {
1560                 'id': 'uGpuVWrhIzE',
1561                 'ext': 'mp4',
1562                 'title': 'Piku - Trailer',
1563                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1564                 'upload_date': '20150811',
1565                 'uploader': 'FlixMatrix',
1566                 'uploader_id': 'FlixMatrixKaravan',
1567                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1568                 'license': 'Standard YouTube License',
1569             },
1570             'params': {
1571                 'skip_download': True,
1572             },
1573             'skip': 'This video is not available.',
1574         },
1575         {
1576             # YouTube Red video with episode data
1577             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1578             'info_dict': {
1579                 'id': 'iqKdEhx-dD4',
1580                 'ext': 'mp4',
1581                 'title': 'Isolation - Mind Field (Ep 1)',
1582                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1583                 'duration': 2085,
1584                 'upload_date': '20170118',
1585                 'uploader': 'Vsauce',
1586                 'uploader_id': 'Vsauce',
1587                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1588                 'series': 'Mind Field',
1589                 'season_number': 1,
1590                 'episode_number': 1,
1591             },
1592             'params': {
1593                 'skip_download': True,
1594             },
1595             'expected_warnings': [
1596                 'Skipping DASH manifest',
1597             ],
1598         },
1599         {
1600             # The following content has been identified by the YouTube community
1601             # as inappropriate or offensive to some audiences.
1602             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1603             'info_dict': {
1604                 'id': '6SJNVb0GnPI',
1605                 'ext': 'mp4',
1606                 'title': 'Race Differences in Intelligence',
1607                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1608                 'duration': 965,
1609                 'upload_date': '20140124',
1610                 'uploader': 'New Century Foundation',
1611                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1612                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1613             },
1614             'params': {
1615                 'skip_download': True,
1616             },
1617             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1618         },
1619         {
1620             # itag 212
1621             'url': '1t24XAntNCY',
1622             'only_matching': True,
1623         },
1624         {
1625             # geo restricted to JP
1626             'url': 'sJL6WA-aGkQ',
1627             'only_matching': True,
1628         },
1629         {
1630             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1631             'only_matching': True,
1632         },
1633         {
1634             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1635             'only_matching': True,
1636         },
1637         {
1638             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1639             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1640             'only_matching': True,
1641         },
1642         {
1643             # DRM protected
1644             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1645             'only_matching': True,
1646         },
1647         {
1648             # Video with unsupported adaptive stream type formats
1649             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1650             'info_dict': {
1651                 'id': 'Z4Vy8R84T1U',
1652                 'ext': 'mp4',
1653                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1654                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1655                 'duration': 433,
1656                 'upload_date': '20130923',
1657                 'uploader': 'Amelia Putri Harwita',
1658                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1659                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1660                 'formats': 'maxcount:10',
1661             },
1662             'params': {
1663                 'skip_download': True,
1664                 'youtube_include_dash_manifest': False,
1665             },
1666             'skip': 'not actual anymore',
1667         },
1668         {
1669             # Youtube Music Auto-generated description
1670             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1671             'info_dict': {
1672                 'id': 'MgNrAu2pzNs',
1673                 'ext': 'mp4',
1674                 'title': 'Voyeur Girl',
1675                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1676                 'upload_date': '20190312',
1677                 'uploader': 'Stephen - Topic',
1678                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1679                 'artist': 'Stephen',
1680                 'track': 'Voyeur Girl',
1681                 'album': 'it\'s too much love to know my dear',
1682                 'release_date': '20190313',
1683                 'release_year': 2019,
1684             },
1685             'params': {
1686                 'skip_download': True,
1687             },
1688         },
1689         {
1690             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1691             'only_matching': True,
1692         },
1693         {
1694             # invalid -> valid video id redirection
1695             'url': 'DJztXj2GPfl',
1696             'info_dict': {
1697                 'id': 'DJztXj2GPfk',
1698                 'ext': 'mp4',
1699                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1700                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1701                 'upload_date': '20090125',
1702                 'uploader': 'Prochorowka',
1703                 'uploader_id': 'Prochorowka',
1704                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1705                 'artist': 'Panjabi MC',
1706                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1707                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1708             },
1709             'params': {
1710                 'skip_download': True,
1711             },
1712             'skip': 'Video unavailable',
1713         },
1714         {
1715             # empty description results in an empty string
1716             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1717             'info_dict': {
1718                 'id': 'x41yOUIvK2k',
1719                 'ext': 'mp4',
1720                 'title': 'IMG 3456',
1721                 'description': '',
1722                 'upload_date': '20170613',
1723                 'uploader_id': 'ElevageOrVert',
1724                 'uploader': 'ElevageOrVert',
1725             },
1726             'params': {
1727                 'skip_download': True,
1728             },
1729         },
1730         {
1731             # with '};' inside yt initial data (see [1])
1732             # see [2] for an example with '};' inside ytInitialPlayerResponse
1733             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1734             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1735             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1736             'info_dict': {
1737                 'id': 'CHqg6qOn4no',
1738                 'ext': 'mp4',
1739                 'title': 'Part 77   Sort a list of simple types in c#',
1740                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1741                 'upload_date': '20130831',
1742                 'uploader_id': 'kudvenkat',
1743                 'uploader': 'kudvenkat',
1744             },
1745             'params': {
1746                 'skip_download': True,
1747             },
1748         },
1749         {
1750             # another example of '};' in ytInitialData
1751             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1752             'only_matching': True,
1753         },
1754         {
1755             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1756             'only_matching': True,
1757         },
1758         {
1759             # https://github.com/ytdl-org/youtube-dl/pull/28094
1760             'url': 'OtqTfy26tG0',
1761             'info_dict': {
1762                 'id': 'OtqTfy26tG0',
1763                 'ext': 'mp4',
1764                 'title': 'Burn Out',
1765                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1766                 'upload_date': '20141120',
1767                 'uploader': 'The Cinematic Orchestra - Topic',
1768                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1769                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1770                 'artist': 'The Cinematic Orchestra',
1771                 'track': 'Burn Out',
1772                 'album': 'Every Day',
1773                 'release_data': None,
1774                 'release_year': None,
1775             },
1776             'params': {
1777                 'skip_download': True,
1778             },
1779         },
1780         {
1781             # controversial video, only works with bpctr when authenticated with cookies
1782             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1783             'only_matching': True,
1784         },
1785         {
1786             # controversial video, requires bpctr/contentCheckOk
1787             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1788             'info_dict': {
1789                 'id': 'SZJvDhaSDnc',
1790                 'ext': 'mp4',
1791                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1792                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1793                 'uploader': 'CBS This Morning',
1794                 'uploader_id': 'CBSThisMorning',
1795                 'upload_date': '20140716',
1796                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1797             }
1798         },
1799         {
1800             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1801             'url': 'cBvYw8_A0vQ',
1802             'info_dict': {
1803                 'id': 'cBvYw8_A0vQ',
1804                 'ext': 'mp4',
1805                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1806                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1807                 'upload_date': '20201120',
1808                 'uploader': 'Walk around Japan',
1809                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1810                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1811             },
1812             'params': {
1813                 'skip_download': True,
1814             },
1815         }, {
1816             # Has multiple audio streams
1817             'url': 'WaOKSUlf4TM',
1818             'only_matching': True
1819         }, {
1820             # Requires Premium: has format 141 when requested using YTM url
1821             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1822             'only_matching': True
1823         }, {
1824             # multiple subtitles with same lang_code
1825             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1826             'only_matching': True,
1827         }, {
1828             # Force use android client fallback
1829             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1830             'info_dict': {
1831                 'id': 'YOelRv7fMxY',
1832                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1833                 'ext': '3gp',
1834                 'upload_date': '20210624',
1835                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1836                 'uploader': 'colinfurze',
1837                 'uploader_id': 'colinfurze',
1838                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1839                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1840             },
1841             'params': {
1842                 'format': '17',  # 3gp format available on android
1843                 'extractor_args': {'youtube': {'player_client': ['android']}},
1844             },
1845         },
1846         {
1847             # Skip download of additional client configs (remix client config in this case)
1848             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1849             'only_matching': True,
1850             'params': {
1851                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1852             },
1853         }, {
1854             # shorts
1855             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1856             'only_matching': True,
1857         },
1858     ]
1859
1860     @classmethod
1861     def suitable(cls, url):
1862         from ..utils import parse_qs
1863
1864         qs = parse_qs(url)
1865         if qs.get('list', [None])[0]:
1866             return False
1867         return super(YoutubeIE, cls).suitable(url)
1868
1869     def __init__(self, *args, **kwargs):
1870         super(YoutubeIE, self).__init__(*args, **kwargs)
1871         self._code_cache = {}
1872         self._player_cache = {}
1873
1874     def _extract_player_url(self, *ytcfgs, webpage=None):
1875         player_url = traverse_obj(
1876             ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1877             get_all=False, expected_type=compat_str)
1878         if not player_url:
1879             return
1880         if player_url.startswith('//'):
1881             player_url = 'https:' + player_url
1882         elif not re.match(r'https?://', player_url):
1883             player_url = compat_urlparse.urljoin(
1884                 'https://www.youtube.com', player_url)
1885         return player_url
1886
1887     def _download_player_url(self, video_id, fatal=False):
1888         res = self._download_webpage(
1889             'https://www.youtube.com/iframe_api',
1890             note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1891         if res:
1892             player_version = self._search_regex(
1893                 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1894             if player_version:
1895                 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1896
1897     def _signature_cache_id(self, example_sig):
1898         """ Return a string representation of a signature """
1899         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1900
1901     @classmethod
1902     def _extract_player_info(cls, player_url):
1903         for player_re in cls._PLAYER_INFO_RE:
1904             id_m = re.search(player_re, player_url)
1905             if id_m:
1906                 break
1907         else:
1908             raise ExtractorError('Cannot identify player %r' % player_url)
1909         return id_m.group('id')
1910
1911     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1912         player_id = self._extract_player_info(player_url)
1913         if player_id not in self._code_cache:
1914             self._code_cache[player_id] = self._download_webpage(
1915                 player_url, video_id, fatal=fatal,
1916                 note='Downloading player ' + player_id,
1917                 errnote='Download of %s failed' % player_url)
1918         return player_id in self._code_cache
1919
1920     def _extract_signature_function(self, video_id, player_url, example_sig):
1921         player_id = self._extract_player_info(player_url)
1922
1923         # Read from filesystem cache
1924         func_id = 'js_%s_%s' % (
1925             player_id, self._signature_cache_id(example_sig))
1926         assert os.path.basename(func_id) == func_id
1927
1928         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1929         if cache_spec is not None:
1930             return lambda s: ''.join(s[i] for i in cache_spec)
1931
1932         if self._load_player(video_id, player_url):
1933             code = self._code_cache[player_id]
1934             res = self._parse_sig_js(code)
1935
1936             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1937             cache_res = res(test_string)
1938             cache_spec = [ord(c) for c in cache_res]
1939
1940             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1941             return res
1942
1943     def _print_sig_code(self, func, example_sig):
1944         def gen_sig_code(idxs):
1945             def _genslice(start, end, step):
1946                 starts = '' if start == 0 else str(start)
1947                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1948                 steps = '' if step == 1 else (':%d' % step)
1949                 return 's[%s%s%s]' % (starts, ends, steps)
1950
1951             step = None
1952             # Quelch pyflakes warnings - start will be set when step is set
1953             start = '(Never used)'
1954             for i, prev in zip(idxs[1:], idxs[:-1]):
1955                 if step is not None:
1956                     if i - prev == step:
1957                         continue
1958                     yield _genslice(start, prev, step)
1959                     step = None
1960                     continue
1961                 if i - prev in [-1, 1]:
1962                     step = i - prev
1963                     start = prev
1964                     continue
1965                 else:
1966                     yield 's[%d]' % prev
1967             if step is None:
1968                 yield 's[%d]' % i
1969             else:
1970                 yield _genslice(start, i, step)
1971
1972         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1973         cache_res = func(test_string)
1974         cache_spec = [ord(c) for c in cache_res]
1975         expr_code = ' + '.join(gen_sig_code(cache_spec))
1976         signature_id_tuple = '(%s)' % (
1977             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1978         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1979                 '    return %s\n') % (signature_id_tuple, expr_code)
1980         self.to_screen('Extracted signature function:\n' + code)
1981
1982     def _parse_sig_js(self, jscode):
1983         funcname = self._search_regex(
1984             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1985              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1986              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1987              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1988              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1989              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1990              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1991              # Obsolete patterns
1992              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1993              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1994              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1995              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1996              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1997              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1998              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1999              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2000             jscode, 'Initial JS player signature function name', group='sig')
2001
2002         jsi = JSInterpreter(jscode)
2003         initial_function = jsi.extract_function(funcname)
2004         return lambda s: initial_function([s])
2005
2006     def _decrypt_signature(self, s, video_id, player_url):
2007         """Turn the encrypted s field into a working signature"""
2008
2009         if player_url is None:
2010             raise ExtractorError('Cannot decrypt signature without player_url')
2011
2012         try:
2013             player_id = (player_url, self._signature_cache_id(s))
2014             if player_id not in self._player_cache:
2015                 func = self._extract_signature_function(
2016                     video_id, player_url, s
2017                 )
2018                 self._player_cache[player_id] = func
2019             func = self._player_cache[player_id]
2020             if self.get_param('youtube_print_sig_code'):
2021                 self._print_sig_code(func, s)
2022             return func(s)
2023         except Exception as e:
2024             tb = traceback.format_exc()
2025             raise ExtractorError(
2026                 'Signature extraction failed: ' + tb, cause=e)
2027
2028     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2029         """
2030         Extract signatureTimestamp (sts)
2031         Required to tell API what sig/player version is in use.
2032         """
2033         sts = None
2034         if isinstance(ytcfg, dict):
2035             sts = int_or_none(ytcfg.get('STS'))
2036
2037         if not sts:
2038             # Attempt to extract from player
2039             if player_url is None:
2040                 error_msg = 'Cannot extract signature timestamp without player_url.'
2041                 if fatal:
2042                     raise ExtractorError(error_msg)
2043                 self.report_warning(error_msg)
2044                 return
2045             if self._load_player(video_id, player_url, fatal=fatal):
2046                 player_id = self._extract_player_info(player_url)
2047                 code = self._code_cache[player_id]
2048                 sts = int_or_none(self._search_regex(
2049                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2050                     'JS player signature timestamp', group='sts', fatal=fatal))
2051         return sts
2052
2053     def _mark_watched(self, video_id, player_responses):
2054         playback_url = traverse_obj(
2055             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2056             expected_type=url_or_none, get_all=False)
2057         if not playback_url:
2058             self.report_warning('Unable to mark watched')
2059             return
2060         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2061         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2062
2063         # cpn generation algorithm is reverse engineered from base.js.
2064         # In fact it works even with dummy cpn.
2065         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2066         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2067
2068         qs.update({
2069             'ver': ['2'],
2070             'cpn': [cpn],
2071         })
2072         playback_url = compat_urlparse.urlunparse(
2073             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2074
2075         self._download_webpage(
2076             playback_url, video_id, 'Marking watched',
2077             'Unable to mark watched', fatal=False)
2078
2079     @staticmethod
2080     def _extract_urls(webpage):
2081         # Embedded YouTube player
2082         entries = [
2083             unescapeHTML(mobj.group('url'))
2084             for mobj in re.finditer(r'''(?x)
2085             (?:
2086                 <iframe[^>]+?src=|
2087                 data-video-url=|
2088                 <embed[^>]+?src=|
2089                 embedSWF\(?:\s*|
2090                 <object[^>]+data=|
2091                 new\s+SWFObject\(
2092             )
2093             (["\'])
2094                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2095                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2096             \1''', webpage)]
2097
2098         # lazyYT YouTube embed
2099         entries.extend(list(map(
2100             unescapeHTML,
2101             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2102
2103         # Wordpress "YouTube Video Importer" plugin
2104         matches = re.findall(r'''(?x)<div[^>]+
2105             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2106             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2107         entries.extend(m[-1] for m in matches)
2108
2109         return entries
2110
2111     @staticmethod
2112     def _extract_url(webpage):
2113         urls = YoutubeIE._extract_urls(webpage)
2114         return urls[0] if urls else None
2115
2116     @classmethod
2117     def extract_id(cls, url):
2118         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2119         if mobj is None:
2120             raise ExtractorError('Invalid URL: %s' % url)
2121         return mobj.group('id')
2122
2123     def _extract_chapters_from_json(self, data, duration):
2124         chapter_list = traverse_obj(
2125             data, (
2126                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2127                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2128             ), expected_type=list)
2129
2130         return self._extract_chapters(
2131             chapter_list,
2132             chapter_time=lambda chapter: float_or_none(
2133                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2134             chapter_title=lambda chapter: traverse_obj(
2135                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2136             duration=duration)
2137
2138     def _extract_chapters_from_engagement_panel(self, data, duration):
2139         content_list = traverse_obj(
2140             data,
2141             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2142             expected_type=list, default=[])
2143         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2144         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2145
2146         return next((
2147             filter(None, (
2148                 self._extract_chapters(
2149                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2150                     chapter_time, chapter_title, duration)
2151                 for contents in content_list
2152             ))), [])
2153
2154     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2155         chapters = []
2156         last_chapter = {'start_time': 0}
2157         for idx, chapter in enumerate(chapter_list or []):
2158             title = chapter_title(chapter)
2159             start_time = chapter_time(chapter)
2160             if start_time is None:
2161                 continue
2162             last_chapter['end_time'] = start_time
2163             if start_time < last_chapter['start_time']:
2164                 if idx == 1:
2165                     chapters.pop()
2166                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2167                 else:
2168                     self.report_warning(f'Invalid start time for chapter "{title}"')
2169                     continue
2170             last_chapter = {'start_time': start_time, 'title': title}
2171             chapters.append(last_chapter)
2172         last_chapter['end_time'] = duration
2173         return chapters
2174
2175     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2176         return self._parse_json(self._search_regex(
2177             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2178              regex), webpage, name, default='{}'), video_id, fatal=False)
2179
2180     @staticmethod
2181     def parse_time_text(time_text):
2182         """
2183         Parse the comment time text
2184         time_text is in the format 'X units ago (edited)'
2185         """
2186         time_text_split = time_text.split(' ')
2187         if len(time_text_split) >= 3:
2188             try:
2189                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2190             except ValueError:
2191                 return None
2192
2193     def _extract_comment(self, comment_renderer, parent=None):
2194         comment_id = comment_renderer.get('commentId')
2195         if not comment_id:
2196             return
2197
2198         text = self._get_text(comment_renderer, 'contentText')
2199
2200         # note: timestamp is an estimate calculated from the current time and time_text
2201         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2202         time_text_dt = self.parse_time_text(time_text)
2203         if isinstance(time_text_dt, datetime.datetime):
2204             timestamp = calendar.timegm(time_text_dt.timetuple())
2205         author = self._get_text(comment_renderer, 'authorText')
2206         author_id = try_get(comment_renderer,
2207                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2208
2209         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2210                                                        lambda x: x['likeCount']), compat_str)) or 0
2211         author_thumbnail = try_get(comment_renderer,
2212                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2213
2214         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2215         is_favorited = 'creatorHeart' in (try_get(
2216             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2217         return {
2218             'id': comment_id,
2219             'text': text,
2220             'timestamp': timestamp,
2221             'time_text': time_text,
2222             'like_count': votes,
2223             'is_favorited': is_favorited,
2224             'author': author,
2225             'author_id': author_id,
2226             'author_thumbnail': author_thumbnail,
2227             'author_is_uploader': author_is_uploader,
2228             'parent': parent or 'root'
2229         }
2230
2231     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2232
2233         def extract_header(contents):
2234             _total_comments = 0
2235             _continuation = None
2236             for content in contents:
2237                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2238                 expected_comment_count = parse_count(self._get_text(
2239                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2240
2241                 if expected_comment_count:
2242                     comment_counts[1] = expected_comment_count
2243                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2244                     _total_comments = comment_counts[1]
2245                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2246                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2247
2248                 sort_menu_item = try_get(
2249                     comments_header_renderer,
2250                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2251                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2252
2253                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2254                 if not _continuation:
2255                     continue
2256
2257                 sort_text = sort_menu_item.get('title')
2258                 if isinstance(sort_text, compat_str):
2259                     sort_text = sort_text.lower()
2260                 else:
2261                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2262                 self.to_screen('Sorting comments by %s' % sort_text)
2263                 break
2264             return _total_comments, _continuation
2265
2266         def extract_thread(contents):
2267             if not parent:
2268                 comment_counts[2] = 0
2269             for content in contents:
2270                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2271                 comment_renderer = try_get(
2272                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2273                     content, (lambda x: x['commentRenderer'], dict))
2274
2275                 if not comment_renderer:
2276                     continue
2277                 comment = self._extract_comment(comment_renderer, parent)
2278                 if not comment:
2279                     continue
2280                 comment_counts[0] += 1
2281                 yield comment
2282                 # Attempt to get the replies
2283                 comment_replies_renderer = try_get(
2284                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2285
2286                 if comment_replies_renderer:
2287                     comment_counts[2] += 1
2288                     comment_entries_iter = self._comment_entries(
2289                         comment_replies_renderer, ytcfg, video_id,
2290                         parent=comment.get('id'), comment_counts=comment_counts)
2291
2292                     for reply_comment in comment_entries_iter:
2293                         yield reply_comment
2294
2295         # YouTube comments have a max depth of 2
2296         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2297         if max_depth == 1 and parent:
2298             return
2299         if not comment_counts:
2300             # comment so far, est. total comments, current comment thread #
2301             comment_counts = [0, 0, 0]
2302
2303         continuation = self._extract_continuation(root_continuation_data)
2304         if continuation and len(continuation['continuation']) < 27:
2305             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2306             continuation_token = self._generate_comment_continuation(video_id)
2307             continuation = self._build_api_continuation_query(continuation_token, None)
2308
2309         visitor_data = None
2310         is_first_continuation = parent is None
2311
2312         for page_num in itertools.count(0):
2313             if not continuation:
2314                 break
2315             headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2316             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2317             if page_num == 0:
2318                 if is_first_continuation:
2319                     note_prefix = 'Downloading comment section API JSON'
2320                 else:
2321                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2322                         comment_counts[2], comment_prog_str)
2323             else:
2324                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2325                     '       ' if parent else '', ' replies' if parent else '',
2326                     page_num, comment_prog_str)
2327
2328             response = self._extract_response(
2329                 item_id=None, query=continuation,
2330                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2331                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2332             if not response:
2333                 break
2334             visitor_data = try_get(
2335                 response,
2336                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2337                 compat_str) or visitor_data
2338
2339             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2340
2341             continuation = None
2342             if isinstance(continuation_contents, list):
2343                 for continuation_section in continuation_contents:
2344                     if not isinstance(continuation_section, dict):
2345                         continue
2346                     continuation_items = try_get(
2347                         continuation_section,
2348                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2349                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2350                         list) or []
2351                     if is_first_continuation:
2352                         total_comments, continuation = extract_header(continuation_items)
2353                         if total_comments:
2354                             yield total_comments
2355                         is_first_continuation = False
2356                         if continuation:
2357                             break
2358                         continue
2359                     count = 0
2360                     for count, entry in enumerate(extract_thread(continuation_items)):
2361                         yield entry
2362                     continuation = self._extract_continuation({'contents': continuation_items})
2363                     if continuation:
2364                         # Sometimes YouTube provides a continuation without any comments
2365                         # In most cases we end up just downloading these with very little comments to come.
2366                         if count == 0:
2367                             if not parent:
2368                                 self.report_warning('No comments received - assuming end of comments')
2369                             continuation = None
2370                         break
2371
2372             # Deprecated response structure
2373             elif isinstance(continuation_contents, dict):
2374                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2375                 for key, continuation_renderer in continuation_contents.items():
2376                     if key not in known_continuation_renderers:
2377                         continue
2378                     if not isinstance(continuation_renderer, dict):
2379                         continue
2380                     if is_first_continuation:
2381                         header_continuation_items = [continuation_renderer.get('header') or {}]
2382                         total_comments, continuation = extract_header(header_continuation_items)
2383                         if total_comments:
2384                             yield total_comments
2385                         is_first_continuation = False
2386                         if continuation:
2387                             break
2388
2389                     # Sometimes YouTube provides a continuation without any comments
2390                     # In most cases we end up just downloading these with very little comments to come.
2391                     count = 0
2392                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2393                         yield entry
2394                     continuation = self._extract_continuation(continuation_renderer)
2395                     if count == 0:
2396                         if not parent:
2397                             self.report_warning('No comments received - assuming end of comments')
2398                         continuation = None
2399                     break
2400
2401     @staticmethod
2402     def _generate_comment_continuation(video_id):
2403         """
2404         Generates initial comment section continuation token from given video id
2405         """
2406         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2407         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2408         new_continuation_intlist = list(itertools.chain.from_iterable(
2409             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2410         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2411
2412     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2413         """Entry for comment extraction"""
2414         def _real_comment_extract(contents):
2415             yield from self._comment_entries(
2416                 traverse_obj(contents, (..., 'itemSectionRenderer'), get_all=False), ytcfg, video_id)
2417
2418         comments = []
2419         estimated_total = 0
2420         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2421         # Force English regardless of account setting to prevent parsing issues
2422         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2423         ytcfg = copy.deepcopy(ytcfg)
2424         traverse_obj(
2425             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2426         try:
2427             for comment in _real_comment_extract(contents):
2428                 if len(comments) >= max_comments:
2429                     break
2430                 if isinstance(comment, int):
2431                     estimated_total = comment
2432                     continue
2433                 comments.append(comment)
2434         except KeyboardInterrupt:
2435             self.to_screen('Interrupted by user')
2436         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2437         return {
2438             'comments': comments,
2439             'comment_count': len(comments),
2440         }
2441
2442     @staticmethod
2443     def _get_checkok_params():
2444         return {'contentCheckOk': True, 'racyCheckOk': True}
2445
2446     @classmethod
2447     def _generate_player_context(cls, sts=None):
2448         context = {
2449             'html5Preference': 'HTML5_PREF_WANTS',
2450         }
2451         if sts is not None:
2452             context['signatureTimestamp'] = sts
2453         return {
2454             'playbackContext': {
2455                 'contentPlaybackContext': context
2456             },
2457             **cls._get_checkok_params()
2458         }
2459
2460     @staticmethod
2461     def _is_agegated(player_response):
2462         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2463             return True
2464
2465         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2466         AGE_GATE_REASONS = (
2467             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2468             'age_verification_required', 'age_check_required',  # status
2469         )
2470         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2471
2472     @staticmethod
2473     def _is_unplayable(player_response):
2474         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2475
2476     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2477
2478         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2479         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2480         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2481         headers = self.generate_api_headers(
2482             ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2483
2484         yt_query = {'videoId': video_id}
2485         yt_query.update(self._generate_player_context(sts))
2486         return self._extract_response(
2487             item_id=video_id, ep='player', query=yt_query,
2488             ytcfg=player_ytcfg, headers=headers, fatal=True,
2489             default_client=client,
2490             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2491         ) or None
2492
2493     def _get_requested_clients(self, url, smuggled_data):
2494         requested_clients = []
2495         allowed_clients = sorted(
2496             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2497             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2498         for client in self._configuration_arg('player_client'):
2499             if client in allowed_clients:
2500                 requested_clients.append(client)
2501             elif client == 'all':
2502                 requested_clients.extend(allowed_clients)
2503             else:
2504                 self.report_warning(f'Skipping unsupported client {client}')
2505         if not requested_clients:
2506             requested_clients = ['android', 'web']
2507
2508         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2509             requested_clients.extend(
2510                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2511
2512         return orderedSet(requested_clients)
2513
2514     def _extract_player_ytcfg(self, client, video_id):
2515         url = {
2516             'web_music': 'https://music.youtube.com',
2517             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2518         }.get(client)
2519         if not url:
2520             return {}
2521         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2522         return self.extract_ytcfg(video_id, webpage) or {}
2523
2524     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2525         initial_pr = None
2526         if webpage:
2527             initial_pr = self._extract_yt_initial_variable(
2528                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2529                 video_id, 'initial player response')
2530
2531         original_clients = clients
2532         clients = clients[::-1]
2533         prs = []
2534
2535         def append_client(client_name):
2536             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2537                 clients.append(client_name)
2538
2539         # Android player_response does not have microFormats which are needed for
2540         # extraction of some data. So we return the initial_pr with formats
2541         # stripped out even if not requested by the user
2542         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2543         if initial_pr:
2544             pr = dict(initial_pr)
2545             pr['streamingData'] = None
2546             prs.append(pr)
2547
2548         last_error = None
2549         tried_iframe_fallback = False
2550         player_url = None
2551         while clients:
2552             client = clients.pop()
2553             player_ytcfg = master_ytcfg if client == 'web' else {}
2554             if 'configs' not in self._configuration_arg('player_skip'):
2555                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2556
2557             player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2558             require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2559             if 'js' in self._configuration_arg('player_skip'):
2560                 require_js_player = False
2561                 player_url = None
2562
2563             if not player_url and not tried_iframe_fallback and require_js_player:
2564                 player_url = self._download_player_url(video_id)
2565                 tried_iframe_fallback = True
2566
2567             try:
2568                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2569                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2570             except ExtractorError as e:
2571                 if last_error:
2572                     self.report_warning(last_error)
2573                 last_error = e
2574                 continue
2575
2576             if pr:
2577                 prs.append(pr)
2578
2579             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2580             if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2581                 append_client(client.replace('_agegate', '_creator'))
2582             elif self._is_agegated(pr):
2583                 append_client(f'{client}_agegate')
2584
2585         if last_error:
2586             if not len(prs):
2587                 raise last_error
2588             self.report_warning(last_error)
2589         return prs, player_url
2590
2591     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2592         itags, stream_ids = [], []
2593         itag_qualities, res_qualities = {}, {}
2594         q = qualities([
2595             # Normally tiny is the smallest video-only formats. But
2596             # audio-only formats with unknown quality may get tagged as tiny
2597             'tiny',
2598             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2599             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2600         ])
2601         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2602
2603         for fmt in streaming_formats:
2604             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2605                 continue
2606
2607             itag = str_or_none(fmt.get('itag'))
2608             audio_track = fmt.get('audioTrack') or {}
2609             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2610             if stream_id in stream_ids:
2611                 continue
2612
2613             quality = fmt.get('quality')
2614             height = int_or_none(fmt.get('height'))
2615             if quality == 'tiny' or not quality:
2616                 quality = fmt.get('audioQuality', '').lower() or quality
2617             # The 3gp format (17) in android client has a quality of "small",
2618             # but is actually worse than other formats
2619             if itag == '17':
2620                 quality = 'tiny'
2621             if quality:
2622                 if itag:
2623                     itag_qualities[itag] = quality
2624                 if height:
2625                     res_qualities[height] = quality
2626             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2627             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2628             # number of fragment that would subsequently requested with (`&sq=N`)
2629             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2630                 continue
2631
2632             fmt_url = fmt.get('url')
2633             if not fmt_url:
2634                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2635                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2636                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2637                 if not (sc and fmt_url and encrypted_sig):
2638                     continue
2639                 if not player_url:
2640                     continue
2641                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2642                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2643                 fmt_url += '&' + sp + '=' + signature
2644
2645             if itag:
2646                 itags.append(itag)
2647                 stream_ids.append(stream_id)
2648
2649             tbr = float_or_none(
2650                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2651             dct = {
2652                 'asr': int_or_none(fmt.get('audioSampleRate')),
2653                 'filesize': int_or_none(fmt.get('contentLength')),
2654                 'format_id': itag,
2655                 'format_note': ', '.join(filter(None, (
2656                     '%s%s' % (audio_track.get('displayName') or '',
2657                               ' (default)' if audio_track.get('audioIsDefault') else ''),
2658                     fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2659                 'fps': int_or_none(fmt.get('fps')),
2660                 'height': height,
2661                 'quality': q(quality),
2662                 'tbr': tbr,
2663                 'url': fmt_url,
2664                 'width': int_or_none(fmt.get('width')),
2665                 'language': audio_track.get('id', '').split('.')[0],
2666                 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2667             }
2668             mime_mobj = re.match(
2669                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2670             if mime_mobj:
2671                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2672                 dct.update(parse_codecs(mime_mobj.group(2)))
2673             no_audio = dct.get('acodec') == 'none'
2674             no_video = dct.get('vcodec') == 'none'
2675             if no_audio:
2676                 dct['vbr'] = tbr
2677             if no_video:
2678                 dct['abr'] = tbr
2679             if no_audio or no_video:
2680                 dct['downloader_options'] = {
2681                     # Youtube throttles chunks >~10M
2682                     'http_chunk_size': 10485760,
2683                 }
2684                 if dct.get('ext'):
2685                     dct['container'] = dct['ext'] + '_dash'
2686             yield dct
2687
2688         skip_manifests = self._configuration_arg('skip')
2689         get_dash = (
2690             (not is_live or self._configuration_arg('include_live_dash'))
2691             and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2692         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2693
2694         def guess_quality(f):
2695             for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2696                 if val in qdict:
2697                     return q(qdict[val])
2698             return -1
2699
2700         for sd in streaming_data:
2701             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2702             if hls_manifest_url:
2703                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2704                     itag = self._search_regex(
2705                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2706                     if itag in itags:
2707                         continue
2708                     if itag:
2709                         f['format_id'] = itag
2710                         itags.append(itag)
2711                     f['quality'] = guess_quality(f)
2712                     yield f
2713
2714             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2715             if dash_manifest_url:
2716                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2717                     itag = f['format_id']
2718                     if itag in itags:
2719                         continue
2720                     if itag:
2721                         itags.append(itag)
2722                     f['quality'] = guess_quality(f)
2723                     filesize = int_or_none(self._search_regex(
2724                         r'/clen/(\d+)', f.get('fragment_base_url')
2725                         or f['url'], 'file size', default=None))
2726                     if filesize:
2727                         f['filesize'] = filesize
2728                     yield f
2729
2730     def _real_extract(self, url):
2731         url, smuggled_data = unsmuggle_url(url, {})
2732         video_id = self._match_id(url)
2733
2734         base_url = self.http_scheme() + '//www.youtube.com/'
2735         webpage_url = base_url + 'watch?v=' + video_id
2736         webpage = None
2737         if 'webpage' not in self._configuration_arg('player_skip'):
2738             webpage = self._download_webpage(
2739                 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2740
2741         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2742
2743         player_responses, player_url = self._extract_player_responses(
2744             self._get_requested_clients(url, smuggled_data),
2745             video_id, webpage, master_ytcfg)
2746
2747         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2748
2749         playability_statuses = traverse_obj(
2750             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2751
2752         trailer_video_id = get_first(
2753             playability_statuses,
2754             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2755             expected_type=str)
2756         if trailer_video_id:
2757             return self.url_result(
2758                 trailer_video_id, self.ie_key(), trailer_video_id)
2759
2760         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2761                        if webpage else (lambda x: None))
2762
2763         video_details = traverse_obj(
2764             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2765         microformats = traverse_obj(
2766             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2767             expected_type=dict, default=[])
2768         video_title = (
2769             get_first(video_details, 'title')
2770             or self._get_text(microformats, (..., 'title'))
2771             or search_meta(['og:title', 'twitter:title', 'title']))
2772         video_description = get_first(video_details, 'shortDescription')
2773
2774         if not smuggled_data.get('force_singlefeed', False):
2775             if not self.get_param('noplaylist'):
2776                 multifeed_metadata_list = get_first(
2777                     player_responses,
2778                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2779                     expected_type=str)
2780                 if multifeed_metadata_list:
2781                     entries = []
2782                     feed_ids = []
2783                     for feed in multifeed_metadata_list.split(','):
2784                         # Unquote should take place before split on comma (,) since textual
2785                         # fields may contain comma as well (see
2786                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2787                         feed_data = compat_parse_qs(
2788                             compat_urllib_parse_unquote_plus(feed))
2789
2790                         def feed_entry(name):
2791                             return try_get(
2792                                 feed_data, lambda x: x[name][0], compat_str)
2793
2794                         feed_id = feed_entry('id')
2795                         if not feed_id:
2796                             continue
2797                         feed_title = feed_entry('title')
2798                         title = video_title
2799                         if feed_title:
2800                             title += ' (%s)' % feed_title
2801                         entries.append({
2802                             '_type': 'url_transparent',
2803                             'ie_key': 'Youtube',
2804                             'url': smuggle_url(
2805                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2806                                 {'force_singlefeed': True}),
2807                             'title': title,
2808                         })
2809                         feed_ids.append(feed_id)
2810                     self.to_screen(
2811                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2812                         % (', '.join(feed_ids), video_id))
2813                     return self.playlist_result(
2814                         entries, video_id, video_title, video_description)
2815             else:
2816                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2817
2818         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2819         is_live = get_first(video_details, 'isLive')
2820         if is_live is None:
2821             is_live = get_first(live_broadcast_details, 'isLiveNow')
2822
2823         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2824         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2825
2826         if not formats:
2827             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2828                 self.report_drm(video_id)
2829             pemr = get_first(
2830                 playability_statuses,
2831                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2832             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2833             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2834             if subreason:
2835                 if subreason == 'The uploader has not made this video available in your country.':
2836                     countries = get_first(microformats, 'availableCountries')
2837                     if not countries:
2838                         regions_allowed = search_meta('regionsAllowed')
2839                         countries = regions_allowed.split(',') if regions_allowed else None
2840                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2841                 reason += f'. {subreason}'
2842             if reason:
2843                 self.raise_no_formats(reason, expected=True)
2844
2845         for f in formats:
2846             if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']:  # throttled
2847                 f['source_preference'] = -10
2848                 # TODO: this method is not reliable
2849                 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2850
2851         # Source is given priority since formats that throttle are given lower source_preference
2852         # When throttling issue is fully fixed, remove this
2853         self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
2854
2855         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2856         if not keywords and webpage:
2857             keywords = [
2858                 unescapeHTML(m.group('content'))
2859                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2860         for keyword in keywords:
2861             if keyword.startswith('yt:stretch='):
2862                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2863                 if mobj:
2864                     # NB: float is intentional for forcing float division
2865                     w, h = (float(v) for v in mobj.groups())
2866                     if w > 0 and h > 0:
2867                         ratio = w / h
2868                         for f in formats:
2869                             if f.get('vcodec') != 'none':
2870                                 f['stretched_ratio'] = ratio
2871                         break
2872
2873         thumbnails = []
2874         thumbnail_dicts = traverse_obj(
2875             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2876             expected_type=dict, default=[])
2877         for thumbnail in thumbnail_dicts:
2878             thumbnail_url = thumbnail.get('url')
2879             if not thumbnail_url:
2880                 continue
2881             # Sometimes youtube gives a wrong thumbnail URL. See:
2882             # https://github.com/yt-dlp/yt-dlp/issues/233
2883             # https://github.com/ytdl-org/youtube-dl/issues/28023
2884             if 'maxresdefault' in thumbnail_url:
2885                 thumbnail_url = thumbnail_url.split('?')[0]
2886             thumbnails.append({
2887                 'url': thumbnail_url,
2888                 'height': int_or_none(thumbnail.get('height')),
2889                 'width': int_or_none(thumbnail.get('width')),
2890             })
2891         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2892         if thumbnail_url:
2893             thumbnails.append({
2894                 'url': thumbnail_url,
2895             })
2896         # The best resolution thumbnails sometimes does not appear in the webpage
2897         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2898         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2899         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2900         # TODO: Test them also? - For some videos, even these don't exist
2901         guaranteed_thumbnail_names = [
2902             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2903             'mqdefault', 'mq1', 'mq2', 'mq3',
2904             'default', '1', '2', '3'
2905         ]
2906         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2907         n_thumbnail_names = len(thumbnail_names)
2908
2909         thumbnails.extend({
2910             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2911                 video_id=video_id, name=name, ext=ext,
2912                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2913             '_test_url': name in hq_thumbnail_names,
2914         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2915         for thumb in thumbnails:
2916             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2917             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2918         self._remove_duplicate_formats(thumbnails)
2919
2920         category = get_first(microformats, 'category') or search_meta('genre')
2921         channel_id = str_or_none(
2922             get_first(video_details, 'channelId')
2923             or get_first(microformats, 'externalChannelId')
2924             or search_meta('channelId'))
2925         duration = int_or_none(
2926             get_first(video_details, 'lengthSeconds')
2927             or get_first(microformats, 'lengthSeconds')
2928             or parse_duration(search_meta('duration'))) or None
2929         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2930
2931         live_content = get_first(video_details, 'isLiveContent')
2932         is_upcoming = get_first(video_details, 'isUpcoming')
2933         if is_live is None:
2934             if is_upcoming or live_content is False:
2935                 is_live = False
2936         if is_upcoming is None and (live_content or is_live):
2937             is_upcoming = False
2938         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2939         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2940         if not duration and live_endtime and live_starttime:
2941             duration = live_endtime - live_starttime
2942
2943         info = {
2944             'id': video_id,
2945             'title': self._live_title(video_title) if is_live else video_title,
2946             'formats': formats,
2947             'thumbnails': thumbnails,
2948             'description': video_description,
2949             'upload_date': unified_strdate(
2950                 get_first(microformats, 'uploadDate')
2951                 or search_meta('uploadDate')),
2952             'uploader': get_first(video_details, 'author'),
2953             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2954             'uploader_url': owner_profile_url,
2955             'channel_id': channel_id,
2956             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2957             'duration': duration,
2958             'view_count': int_or_none(
2959                 get_first((video_details, microformats), (..., 'viewCount'))
2960                 or search_meta('interactionCount')),
2961             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2962             'age_limit': 18 if (
2963                 get_first(microformats, 'isFamilySafe') is False
2964                 or search_meta('isFamilyFriendly') == 'false'
2965                 or search_meta('og:restrictions:age') == '18+') else 0,
2966             'webpage_url': webpage_url,
2967             'categories': [category] if category else None,
2968             'tags': keywords,
2969             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2970             'is_live': is_live,
2971             'was_live': (False if is_live or is_upcoming or live_content is False
2972                          else None if is_live is None or is_upcoming is None
2973                          else live_content),
2974             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2975             'release_timestamp': live_starttime,
2976         }
2977
2978         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2979         # Converted into dicts to remove duplicates
2980         captions = {
2981             sub.get('baseUrl'): sub
2982             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2983         translation_languages = {
2984             lang.get('languageCode'): lang.get('languageName')
2985             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2986         subtitles = {}
2987         if pctr:
2988             def process_language(container, base_url, lang_code, sub_name, query):
2989                 lang_subs = container.setdefault(lang_code, [])
2990                 for fmt in self._SUBTITLE_FORMATS:
2991                     query.update({
2992                         'fmt': fmt,
2993                     })
2994                     lang_subs.append({
2995                         'ext': fmt,
2996                         'url': update_url_query(base_url, query),
2997                         'name': sub_name,
2998                     })
2999
3000             for base_url, caption_track in captions.items():
3001                 if not base_url:
3002                     continue
3003                 if caption_track.get('kind') != 'asr':
3004                     lang_code = (
3005                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
3006                         or caption_track.get('languageCode'))
3007                     if not lang_code:
3008                         continue
3009                     process_language(
3010                         subtitles, base_url, lang_code,
3011                         traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False),
3012                         {})
3013                     continue
3014                 automatic_captions = {}
3015                 for trans_code, trans_name in translation_languages.items():
3016                     if not trans_code:
3017                         continue
3018                     process_language(
3019                         automatic_captions, base_url, trans_code,
3020                         self._get_text(trans_name, max_runs=1),
3021                         {'tlang': trans_code})
3022                 info['automatic_captions'] = automatic_captions
3023         info['subtitles'] = subtitles
3024
3025         parsed_url = compat_urllib_parse_urlparse(url)
3026         for component in [parsed_url.fragment, parsed_url.query]:
3027             query = compat_parse_qs(component)
3028             for k, v in query.items():
3029                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3030                     d_k += '_time'
3031                     if d_k not in info and k in s_ks:
3032                         info[d_k] = parse_duration(query[k][0])
3033
3034         # Youtube Music Auto-generated description
3035         if video_description:
3036             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3037             if mobj:
3038                 release_year = mobj.group('release_year')
3039                 release_date = mobj.group('release_date')
3040                 if release_date:
3041                     release_date = release_date.replace('-', '')
3042                     if not release_year:
3043                         release_year = release_date[:4]
3044                 info.update({
3045                     'album': mobj.group('album'.strip()),
3046                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3047                     'track': mobj.group('track').strip(),
3048                     'release_date': release_date,
3049                     'release_year': int_or_none(release_year),
3050                 })
3051
3052         initial_data = None
3053         if webpage:
3054             initial_data = self._extract_yt_initial_variable(
3055                 webpage, self._YT_INITIAL_DATA_RE, video_id,
3056                 'yt initial data')
3057         if not initial_data:
3058             query = {'videoId': video_id}
3059             query.update(self._get_checkok_params())
3060             initial_data = self._extract_response(
3061                 item_id=video_id, ep='next', fatal=False,
3062                 ytcfg=master_ytcfg, query=query,
3063                 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3064                 note='Downloading initial data API JSON')
3065
3066         try:
3067             # This will error if there is no livechat
3068             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3069             info['subtitles']['live_chat'] = [{
3070                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3071                 'video_id': video_id,
3072                 'ext': 'json',
3073                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3074             }]
3075         except (KeyError, IndexError, TypeError):
3076             pass
3077
3078         if initial_data:
3079             info['chapters'] = (
3080                 self._extract_chapters_from_json(initial_data, duration)
3081                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3082                 or None)
3083
3084             contents = try_get(
3085                 initial_data,
3086                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3087                 list) or []
3088             for content in contents:
3089                 vpir = content.get('videoPrimaryInfoRenderer')
3090                 if vpir:
3091                     stl = vpir.get('superTitleLink')
3092                     if stl:
3093                         stl = self._get_text(stl)
3094                         if try_get(
3095                                 vpir,
3096                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3097                             info['location'] = stl
3098                         else:
3099                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3100                             if mobj:
3101                                 info.update({
3102                                     'series': mobj.group(1),
3103                                     'season_number': int(mobj.group(2)),
3104                                     'episode_number': int(mobj.group(3)),
3105                                 })
3106                     for tlb in (try_get(
3107                             vpir,
3108                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3109                             list) or []):
3110                         tbr = tlb.get('toggleButtonRenderer') or {}
3111                         for getter, regex in [(
3112                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3113                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3114                                     lambda x: x['accessibility'],
3115                                     lambda x: x['accessibilityData']['accessibilityData'],
3116                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3117                             label = (try_get(tbr, getter, dict) or {}).get('label')
3118                             if label:
3119                                 mobj = re.match(regex, label)
3120                                 if mobj:
3121                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3122                                     break
3123                     sbr_tooltip = try_get(
3124                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3125                     if sbr_tooltip:
3126                         like_count, dislike_count = sbr_tooltip.split(' / ')
3127                         info.update({
3128                             'like_count': str_to_int(like_count),
3129                             'dislike_count': str_to_int(dislike_count),
3130                         })
3131                 vsir = content.get('videoSecondaryInfoRenderer')
3132                 if vsir:
3133                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3134                     rows = try_get(
3135                         vsir,
3136                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3137                         list) or []
3138                     multiple_songs = False
3139                     for row in rows:
3140                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3141                             multiple_songs = True
3142                             break
3143                     for row in rows:
3144                         mrr = row.get('metadataRowRenderer') or {}
3145                         mrr_title = mrr.get('title')
3146                         if not mrr_title:
3147                             continue
3148                         mrr_title = self._get_text(mrr, 'title')
3149                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3150                         if mrr_title == 'License':
3151                             info['license'] = mrr_contents_text
3152                         elif not multiple_songs:
3153                             if mrr_title == 'Album':
3154                                 info['album'] = mrr_contents_text
3155                             elif mrr_title == 'Artist':
3156                                 info['artist'] = mrr_contents_text
3157                             elif mrr_title == 'Song':
3158                                 info['track'] = mrr_contents_text
3159
3160         fallbacks = {
3161             'channel': 'uploader',
3162             'channel_id': 'uploader_id',
3163             'channel_url': 'uploader_url',
3164         }
3165         for to, frm in fallbacks.items():
3166             if not info.get(to):
3167                 info[to] = info.get(frm)
3168
3169         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3170             v = info.get(s_k)
3171             if v:
3172                 info[d_k] = v
3173
3174         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3175         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3176         is_membersonly = None
3177         is_premium = None
3178         if initial_data and is_private is not None:
3179             is_membersonly = False
3180             is_premium = False
3181             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3182             badge_labels = set()
3183             for content in contents:
3184                 if not isinstance(content, dict):
3185                     continue
3186                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3187             for badge_label in badge_labels:
3188                 if badge_label.lower() == 'members only':
3189                     is_membersonly = True
3190                 elif badge_label.lower() == 'premium':
3191                     is_premium = True
3192                 elif badge_label.lower() == 'unlisted':
3193                     is_unlisted = True
3194
3195         info['availability'] = self._availability(
3196             is_private=is_private,
3197             needs_premium=is_premium,
3198             needs_subscription=is_membersonly,
3199             needs_auth=info['age_limit'] >= 18,
3200             is_unlisted=None if is_private is None else is_unlisted)
3201
3202         if self.get_param('getcomments', False):
3203             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3204
3205         self.mark_watched(video_id, player_responses)
3206
3207         return info
3208
3209
3210 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3211     IE_DESC = 'YouTube.com tab'
3212     _VALID_URL = r'''(?x)
3213                     https?://
3214                         (?:\w+\.)?
3215                         (?:
3216                             youtube(?:kids)?\.com|
3217                             invidio\.us
3218                         )/
3219                         (?:
3220                             (?P<channel_type>channel|c|user|browse)/|
3221                             (?P<not_channel>
3222                                 feed/|hashtag/|
3223                                 (?:playlist|watch)\?.*?\blist=
3224                             )|
3225                             (?!(?:%s)\b)  # Direct URLs
3226                         )
3227                         (?P<id>[^/?\#&]+)
3228                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3229     IE_NAME = 'youtube:tab'
3230
3231     _TESTS = [{
3232         'note': 'playlists, multipage',
3233         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3234         'playlist_mincount': 94,
3235         'info_dict': {
3236             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3237             'title': 'Игорь Клейнер - Playlists',
3238             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3239             'uploader': 'Игорь Клейнер',
3240             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3241         },
3242     }, {
3243         'note': 'playlists, multipage, different order',
3244         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3245         'playlist_mincount': 94,
3246         'info_dict': {
3247             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3248             'title': 'Игорь Клейнер - Playlists',
3249             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3250             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3251             'uploader': 'Игорь Клейнер',
3252         },
3253     }, {
3254         'note': 'playlists, series',
3255         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3256         'playlist_mincount': 5,
3257         'info_dict': {
3258             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3259             'title': '3Blue1Brown - Playlists',
3260             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3261             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3262             'uploader': '3Blue1Brown',
3263         },
3264     }, {
3265         'note': 'playlists, singlepage',
3266         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3267         'playlist_mincount': 4,
3268         'info_dict': {
3269             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3270             'title': 'ThirstForScience - Playlists',
3271             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3272             'uploader': 'ThirstForScience',
3273             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3274         }
3275     }, {
3276         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3277         'only_matching': True,
3278     }, {
3279         'note': 'basic, single video playlist',
3280         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3281         'info_dict': {
3282             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3283             'uploader': 'Sergey M.',
3284             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3285             'title': 'youtube-dl public playlist',
3286         },
3287         'playlist_count': 1,
3288     }, {
3289         'note': 'empty playlist',
3290         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3291         'info_dict': {
3292             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3293             'uploader': 'Sergey M.',
3294             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3295             'title': 'youtube-dl empty playlist',
3296         },
3297         'playlist_count': 0,
3298     }, {
3299         'note': 'Home tab',
3300         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3301         'info_dict': {
3302             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3303             'title': 'lex will - Home',
3304             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3305             'uploader': 'lex will',
3306             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3307         },
3308         'playlist_mincount': 2,
3309     }, {
3310         'note': 'Videos tab',
3311         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3312         'info_dict': {
3313             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3314             'title': 'lex will - Videos',
3315             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3316             'uploader': 'lex will',
3317             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3318         },
3319         'playlist_mincount': 975,
3320     }, {
3321         'note': 'Videos tab, sorted by popular',
3322         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3323         'info_dict': {
3324             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3325             'title': 'lex will - Videos',
3326             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3327             'uploader': 'lex will',
3328             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3329         },
3330         'playlist_mincount': 199,
3331     }, {
3332         'note': 'Playlists tab',
3333         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3334         'info_dict': {
3335             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3336             'title': 'lex will - Playlists',
3337             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3338             'uploader': 'lex will',
3339             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3340         },
3341         'playlist_mincount': 17,
3342     }, {
3343         'note': 'Community tab',
3344         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3345         'info_dict': {
3346             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3347             'title': 'lex will - Community',
3348             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3349             'uploader': 'lex will',
3350             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3351         },
3352         'playlist_mincount': 18,
3353     }, {
3354         'note': 'Channels tab',
3355         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3356         'info_dict': {
3357             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3358             'title': 'lex will - Channels',
3359             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3360             'uploader': 'lex will',
3361             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3362         },
3363         'playlist_mincount': 12,
3364     }, {
3365         'note': 'Search tab',
3366         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3367         'playlist_mincount': 40,
3368         'info_dict': {
3369             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3370             'title': '3Blue1Brown - Search - linear algebra',
3371             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3372             'uploader': '3Blue1Brown',
3373             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3374         },
3375     }, {
3376         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3377         'only_matching': True,
3378     }, {
3379         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3380         'only_matching': True,
3381     }, {
3382         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3383         'only_matching': True,
3384     }, {
3385         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3386         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3387         'info_dict': {
3388             'title': '29C3: Not my department',
3389             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3390             'uploader': 'Christiaan008',
3391             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3392             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3393         },
3394         'playlist_count': 96,
3395     }, {
3396         'note': 'Large playlist',
3397         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3398         'info_dict': {
3399             'title': 'Uploads from Cauchemar',
3400             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3401             'uploader': 'Cauchemar',
3402             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3403         },
3404         'playlist_mincount': 1123,
3405     }, {
3406         'note': 'even larger playlist, 8832 videos',
3407         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3408         'only_matching': True,
3409     }, {
3410         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3411         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3412         'info_dict': {
3413             'title': 'Uploads from Interstellar Movie',
3414             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3415             'uploader': 'Interstellar Movie',
3416             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3417         },
3418         'playlist_mincount': 21,
3419     }, {
3420         'note': 'Playlist with "show unavailable videos" button',
3421         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3422         'info_dict': {
3423             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3424             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3425             'uploader': 'Phim Siêu Nhân Nhật Bản',
3426             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3427         },
3428         'playlist_mincount': 200,
3429     }, {
3430         'note': 'Playlist with unavailable videos in page 7',
3431         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3432         'info_dict': {
3433             'title': 'Uploads from BlankTV',
3434             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3435             'uploader': 'BlankTV',
3436             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3437         },
3438         'playlist_mincount': 1000,
3439     }, {
3440         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3441         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3442         'info_dict': {
3443             'title': 'Data Analysis with Dr Mike Pound',
3444             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3445             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3446             'uploader': 'Computerphile',
3447             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3448         },
3449         'playlist_mincount': 11,
3450     }, {
3451         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3452         'only_matching': True,
3453     }, {
3454         'note': 'Playlist URL that does not actually serve a playlist',
3455         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3456         'info_dict': {
3457             'id': 'FqZTN594JQw',
3458             'ext': 'webm',
3459             'title': "Smiley's People 01 detective, Adventure Series, Action",
3460             'uploader': 'STREEM',
3461             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3462             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3463             'upload_date': '20150526',
3464             'license': 'Standard YouTube License',
3465             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3466             'categories': ['People & Blogs'],
3467             'tags': list,
3468             'view_count': int,
3469             'like_count': int,
3470             'dislike_count': int,
3471         },
3472         'params': {
3473             'skip_download': True,
3474         },
3475         'skip': 'This video is not available.',
3476         'add_ie': [YoutubeIE.ie_key()],
3477     }, {
3478         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3479         'only_matching': True,
3480     }, {
3481         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3482         'only_matching': True,
3483     }, {
3484         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3485         'info_dict': {
3486             'id': '3yImotZU3tw',  # This will keep changing
3487             'ext': 'mp4',
3488             'title': compat_str,
3489             'uploader': 'Sky News',
3490             'uploader_id': 'skynews',
3491             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3492             'upload_date': r're:\d{8}',
3493             'description': compat_str,
3494             'categories': ['News & Politics'],
3495             'tags': list,
3496             'like_count': int,
3497             'dislike_count': int,
3498         },
3499         'params': {
3500             'skip_download': True,
3501         },
3502         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3503     }, {
3504         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3505         'info_dict': {
3506             'id': 'a48o2S1cPoo',
3507             'ext': 'mp4',
3508             'title': 'The Young Turks - Live Main Show',
3509             'uploader': 'The Young Turks',
3510             'uploader_id': 'TheYoungTurks',
3511             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3512             'upload_date': '20150715',
3513             'license': 'Standard YouTube License',
3514             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3515             'categories': ['News & Politics'],
3516             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3517             'like_count': int,
3518             'dislike_count': int,
3519         },
3520         'params': {
3521             'skip_download': True,
3522         },
3523         'only_matching': True,
3524     }, {
3525         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3526         'only_matching': True,
3527     }, {
3528         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3529         'only_matching': True,
3530     }, {
3531         'note': 'A channel that is not live. Should raise error',
3532         'url': 'https://www.youtube.com/user/numberphile/live',
3533         'only_matching': True,
3534     }, {
3535         'url': 'https://www.youtube.com/feed/trending',
3536         'only_matching': True,
3537     }, {
3538         'url': 'https://www.youtube.com/feed/library',
3539         'only_matching': True,
3540     }, {
3541         'url': 'https://www.youtube.com/feed/history',
3542         'only_matching': True,
3543     }, {
3544         'url': 'https://www.youtube.com/feed/subscriptions',
3545         'only_matching': True,
3546     }, {
3547         'url': 'https://www.youtube.com/feed/watch_later',
3548         'only_matching': True,
3549     }, {
3550         'note': 'Recommended - redirects to home page',
3551         'url': 'https://www.youtube.com/feed/recommended',
3552         'only_matching': True,
3553     }, {
3554         'note': 'inline playlist with not always working continuations',
3555         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3556         'only_matching': True,
3557     }, {
3558         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3559         'only_matching': True,
3560     }, {
3561         'url': 'https://www.youtube.com/course',
3562         'only_matching': True,
3563     }, {
3564         'url': 'https://www.youtube.com/zsecurity',
3565         'only_matching': True,
3566     }, {
3567         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3568         'only_matching': True,
3569     }, {
3570         'url': 'https://www.youtube.com/TheYoungTurks/live',
3571         'only_matching': True,
3572     }, {
3573         'url': 'https://www.youtube.com/hashtag/cctv9',
3574         'info_dict': {
3575             'id': 'cctv9',
3576             'title': '#cctv9',
3577         },
3578         'playlist_mincount': 350,
3579     }, {
3580         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3581         'only_matching': True,
3582     }, {
3583         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3584         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3585         'only_matching': True
3586     }, {
3587         'note': '/browse/ should redirect to /channel/',
3588         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3589         'only_matching': True
3590     }, {
3591         'note': 'VLPL, should redirect to playlist?list=PL...',
3592         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3593         'info_dict': {
3594             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3595             'uploader': 'NoCopyrightSounds',
3596             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3597             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3598             'title': 'NCS Releases',
3599         },
3600         'playlist_mincount': 166,
3601     }, {
3602         'note': 'Topic, should redirect to playlist?list=UU...',
3603         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3604         'info_dict': {
3605             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3606             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3607             'title': 'Uploads from Royalty Free Music - Topic',
3608             'uploader': 'Royalty Free Music - Topic',
3609         },
3610         'expected_warnings': [
3611             'A channel/user page was given',
3612             'The URL does not have a videos tab',
3613         ],
3614         'playlist_mincount': 101,
3615     }, {
3616         'note': 'Topic without a UU playlist',
3617         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3618         'info_dict': {
3619             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3620             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3621         },
3622         'expected_warnings': [
3623             'A channel/user page was given',
3624             'The URL does not have a videos tab',
3625             'Falling back to channel URL',
3626         ],
3627         'playlist_mincount': 9,
3628     }, {
3629         'note': 'Youtube music Album',
3630         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3631         'info_dict': {
3632             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3633             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3634         },
3635         'playlist_count': 50,
3636     }, {
3637         'note': 'unlisted single video playlist',
3638         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3639         'info_dict': {
3640             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3641             'uploader': 'colethedj',
3642             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3643             'title': 'yt-dlp unlisted playlist test',
3644             'availability': 'unlisted'
3645         },
3646         'playlist_count': 1,
3647     }]
3648
3649     @classmethod
3650     def suitable(cls, url):
3651         return False if YoutubeIE.suitable(url) else super(
3652             YoutubeTabIE, cls).suitable(url)
3653
3654     def _extract_channel_id(self, webpage):
3655         channel_id = self._html_search_meta(
3656             'channelId', webpage, 'channel id', default=None)
3657         if channel_id:
3658             return channel_id
3659         channel_url = self._html_search_meta(
3660             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3661              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3662              'twitter:app:url:googleplay'), webpage, 'channel url')
3663         return self._search_regex(
3664             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3665             channel_url, 'channel id')
3666
3667     @staticmethod
3668     def _extract_basic_item_renderer(item):
3669         # Modified from _extract_grid_item_renderer
3670         known_basic_renderers = (
3671             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3672         )
3673         for key, renderer in item.items():
3674             if not isinstance(renderer, dict):
3675                 continue
3676             elif key in known_basic_renderers:
3677                 return renderer
3678             elif key.startswith('grid') and key.endswith('Renderer'):
3679                 return renderer
3680
3681     def _grid_entries(self, grid_renderer):
3682         for item in grid_renderer['items']:
3683             if not isinstance(item, dict):
3684                 continue
3685             renderer = self._extract_basic_item_renderer(item)
3686             if not isinstance(renderer, dict):
3687                 continue
3688             title = self._get_text(renderer, 'title')
3689
3690             # playlist
3691             playlist_id = renderer.get('playlistId')
3692             if playlist_id:
3693                 yield self.url_result(
3694                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3695                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3696                     video_title=title)
3697                 continue
3698             # video
3699             video_id = renderer.get('videoId')
3700             if video_id:
3701                 yield self._extract_video(renderer)
3702                 continue
3703             # channel
3704             channel_id = renderer.get('channelId')
3705             if channel_id:
3706                 yield self.url_result(
3707                     'https://www.youtube.com/channel/%s' % channel_id,
3708                     ie=YoutubeTabIE.ie_key(), video_title=title)
3709                 continue
3710             # generic endpoint URL support
3711             ep_url = urljoin('https://www.youtube.com/', try_get(
3712                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3713                 compat_str))
3714             if ep_url:
3715                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3716                     if ie.suitable(ep_url):
3717                         yield self.url_result(
3718                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3719                         break
3720
3721     def _shelf_entries_from_content(self, shelf_renderer):
3722         content = shelf_renderer.get('content')
3723         if not isinstance(content, dict):
3724             return
3725         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3726         if renderer:
3727             # TODO: add support for nested playlists so each shelf is processed
3728             # as separate playlist
3729             # TODO: this includes only first N items
3730             for entry in self._grid_entries(renderer):
3731                 yield entry
3732         renderer = content.get('horizontalListRenderer')
3733         if renderer:
3734             # TODO
3735             pass
3736
3737     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3738         ep = try_get(
3739             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3740             compat_str)
3741         shelf_url = urljoin('https://www.youtube.com', ep)
3742         if shelf_url:
3743             # Skipping links to another channels, note that checking for
3744             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3745             # will not work
3746             if skip_channels and '/channels?' in shelf_url:
3747                 return
3748             title = self._get_text(shelf_renderer, 'title')
3749             yield self.url_result(shelf_url, video_title=title)
3750         # Shelf may not contain shelf URL, fallback to extraction from content
3751         for entry in self._shelf_entries_from_content(shelf_renderer):
3752             yield entry
3753
3754     def _playlist_entries(self, video_list_renderer):
3755         for content in video_list_renderer['contents']:
3756             if not isinstance(content, dict):
3757                 continue
3758             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3759             if not isinstance(renderer, dict):
3760                 continue
3761             video_id = renderer.get('videoId')
3762             if not video_id:
3763                 continue
3764             yield self._extract_video(renderer)
3765
3766     def _rich_entries(self, rich_grid_renderer):
3767         renderer = try_get(
3768             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3769         video_id = renderer.get('videoId')
3770         if not video_id:
3771             return
3772         yield self._extract_video(renderer)
3773
3774     def _video_entry(self, video_renderer):
3775         video_id = video_renderer.get('videoId')
3776         if video_id:
3777             return self._extract_video(video_renderer)
3778
3779     def _post_thread_entries(self, post_thread_renderer):
3780         post_renderer = try_get(
3781             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3782         if not post_renderer:
3783             return
3784         # video attachment
3785         video_renderer = try_get(
3786             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3787         video_id = video_renderer.get('videoId')
3788         if video_id:
3789             entry = self._extract_video(video_renderer)
3790             if entry:
3791                 yield entry
3792         # playlist attachment
3793         playlist_id = try_get(
3794             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3795         if playlist_id:
3796             yield self.url_result(
3797                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3798                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3799         # inline video links
3800         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3801         for run in runs:
3802             if not isinstance(run, dict):
3803                 continue
3804             ep_url = try_get(
3805                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3806             if not ep_url:
3807                 continue
3808             if not YoutubeIE.suitable(ep_url):
3809                 continue
3810             ep_video_id = YoutubeIE._match_id(ep_url)
3811             if video_id == ep_video_id:
3812                 continue
3813             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3814
3815     def _post_thread_continuation_entries(self, post_thread_continuation):
3816         contents = post_thread_continuation.get('contents')
3817         if not isinstance(contents, list):
3818             return
3819         for content in contents:
3820             renderer = content.get('backstagePostThreadRenderer')
3821             if not isinstance(renderer, dict):
3822                 continue
3823             for entry in self._post_thread_entries(renderer):
3824                 yield entry
3825
3826     r''' # unused
3827     def _rich_grid_entries(self, contents):
3828         for content in contents:
3829             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3830             if video_renderer:
3831                 entry = self._video_entry(video_renderer)
3832                 if entry:
3833                     yield entry
3834     '''
3835     def _entries(self, tab, item_id, account_syncid, ytcfg):
3836
3837         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3838             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3839             for content in contents:
3840                 if not isinstance(content, dict):
3841                     continue
3842                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3843                 if not is_renderer:
3844                     renderer = content.get('richItemRenderer')
3845                     if renderer:
3846                         for entry in self._rich_entries(renderer):
3847                             yield entry
3848                         continuation_list[0] = self._extract_continuation(parent_renderer)
3849                     continue
3850                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3851                 for isr_content in isr_contents:
3852                     if not isinstance(isr_content, dict):
3853                         continue
3854
3855                     known_renderers = {
3856                         'playlistVideoListRenderer': self._playlist_entries,
3857                         'gridRenderer': self._grid_entries,
3858                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3859                         'backstagePostThreadRenderer': self._post_thread_entries,
3860                         'videoRenderer': lambda x: [self._video_entry(x)],
3861                     }
3862                     for key, renderer in isr_content.items():
3863                         if key not in known_renderers:
3864                             continue
3865                         for entry in known_renderers[key](renderer):
3866                             if entry:
3867                                 yield entry
3868                         continuation_list[0] = self._extract_continuation(renderer)
3869                         break
3870
3871                 if not continuation_list[0]:
3872                     continuation_list[0] = self._extract_continuation(is_renderer)
3873
3874             if not continuation_list[0]:
3875                 continuation_list[0] = self._extract_continuation(parent_renderer)
3876
3877         continuation_list = [None]  # Python 2 doesnot support nonlocal
3878         tab_content = try_get(tab, lambda x: x['content'], dict)
3879         if not tab_content:
3880             return
3881         parent_renderer = (
3882             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3883             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3884         for entry in extract_entries(parent_renderer):
3885             yield entry
3886         continuation = continuation_list[0]
3887         visitor_data = None
3888
3889         for page_num in itertools.count(1):
3890             if not continuation:
3891                 break
3892             headers = self.generate_api_headers(
3893                 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3894             response = self._extract_response(
3895                 item_id='%s page %s' % (item_id, page_num),
3896                 query=continuation, headers=headers, ytcfg=ytcfg,
3897                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3898
3899             if not response:
3900                 break
3901             visitor_data = try_get(
3902                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3903
3904             known_continuation_renderers = {
3905                 'playlistVideoListContinuation': self._playlist_entries,
3906                 'gridContinuation': self._grid_entries,
3907                 'itemSectionContinuation': self._post_thread_continuation_entries,
3908                 'sectionListContinuation': extract_entries,  # for feeds
3909             }
3910             continuation_contents = try_get(
3911                 response, lambda x: x['continuationContents'], dict) or {}
3912             continuation_renderer = None
3913             for key, value in continuation_contents.items():
3914                 if key not in known_continuation_renderers:
3915                     continue
3916                 continuation_renderer = value
3917                 continuation_list = [None]
3918                 for entry in known_continuation_renderers[key](continuation_renderer):
3919                     yield entry
3920                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3921                 break
3922             if continuation_renderer:
3923                 continue
3924
3925             known_renderers = {
3926                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3927                 'gridVideoRenderer': (self._grid_entries, 'items'),
3928                 'gridChannelRenderer': (self._grid_entries, 'items'),
3929                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3930                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3931                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3932                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3933             }
3934             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3935             continuation_items = try_get(
3936                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3937             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3938             video_items_renderer = None
3939             for key, value in continuation_item.items():
3940                 if key not in known_renderers:
3941                     continue
3942                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3943                 continuation_list = [None]
3944                 for entry in known_renderers[key][0](video_items_renderer):
3945                     yield entry
3946                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3947                 break
3948             if video_items_renderer:
3949                 continue
3950             break
3951
3952     @staticmethod
3953     def _extract_selected_tab(tabs):
3954         for tab in tabs:
3955             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3956             if renderer.get('selected') is True:
3957                 return renderer
3958         else:
3959             raise ExtractorError('Unable to find selected tab')
3960
3961     @classmethod
3962     def _extract_uploader(cls, data):
3963         uploader = {}
3964         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3965         owner = try_get(
3966             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3967         if owner:
3968             uploader['uploader'] = owner.get('text')
3969             uploader['uploader_id'] = try_get(
3970                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3971             uploader['uploader_url'] = urljoin(
3972                 'https://www.youtube.com/',
3973                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3974         return {k: v for k, v in uploader.items() if v is not None}
3975
3976     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3977         playlist_id = title = description = channel_url = channel_name = channel_id = None
3978         thumbnails_list = tags = []
3979
3980         selected_tab = self._extract_selected_tab(tabs)
3981         renderer = try_get(
3982             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3983         if renderer:
3984             channel_name = renderer.get('title')
3985             channel_url = renderer.get('channelUrl')
3986             channel_id = renderer.get('externalId')
3987         else:
3988             renderer = try_get(
3989                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3990
3991         if renderer:
3992             title = renderer.get('title')
3993             description = renderer.get('description', '')
3994             playlist_id = channel_id
3995             tags = renderer.get('keywords', '').split()
3996             thumbnails_list = (
3997                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3998                 or try_get(
3999                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
4000                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
4001                     list)
4002                 or [])
4003
4004         thumbnails = []
4005         for t in thumbnails_list:
4006             if not isinstance(t, dict):
4007                 continue
4008             thumbnail_url = url_or_none(t.get('url'))
4009             if not thumbnail_url:
4010                 continue
4011             thumbnails.append({
4012                 'url': thumbnail_url,
4013                 'width': int_or_none(t.get('width')),
4014                 'height': int_or_none(t.get('height')),
4015             })
4016         if playlist_id is None:
4017             playlist_id = item_id
4018         if title is None:
4019             title = (
4020                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
4021                 or playlist_id)
4022         title += format_field(selected_tab, 'title', ' - %s')
4023         title += format_field(selected_tab, 'expandedText', ' - %s')
4024         metadata = {
4025             'playlist_id': playlist_id,
4026             'playlist_title': title,
4027             'playlist_description': description,
4028             'uploader': channel_name,
4029             'uploader_id': channel_id,
4030             'uploader_url': channel_url,
4031             'thumbnails': thumbnails,
4032             'tags': tags,
4033         }
4034         availability = self._extract_availability(data)
4035         if availability:
4036             metadata['availability'] = availability
4037         if not channel_id:
4038             metadata.update(self._extract_uploader(data))
4039         metadata.update({
4040             'channel': metadata['uploader'],
4041             'channel_id': metadata['uploader_id'],
4042             'channel_url': metadata['uploader_url']})
4043         ytcfg = self.extract_ytcfg(item_id, webpage)
4044         return self.playlist_result(
4045             self._entries(
4046                 selected_tab, playlist_id,
4047                 self._extract_account_syncid(ytcfg, data), ytcfg),
4048             **metadata)
4049
4050     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4051         first_id = last_id = None
4052         ytcfg = self.extract_ytcfg(playlist_id, webpage)
4053         headers = self.generate_api_headers(
4054             ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data))
4055         for page_num in itertools.count(1):
4056             videos = list(self._playlist_entries(playlist))
4057             if not videos:
4058                 return
4059             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4060             if start >= len(videos):
4061                 return
4062             for video in videos[start:]:
4063                 if video['id'] == first_id:
4064                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4065                     return
4066                 yield video
4067             first_id = first_id or videos[0]['id']
4068             last_id = videos[-1]['id']
4069             watch_endpoint = try_get(
4070                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4071             query = {
4072                 'playlistId': playlist_id,
4073                 'videoId': watch_endpoint.get('videoId') or last_id,
4074                 'index': watch_endpoint.get('index') or len(videos),
4075                 'params': watch_endpoint.get('params') or 'OAE%3D'
4076             }
4077             response = self._extract_response(
4078                 item_id='%s page %d' % (playlist_id, page_num),
4079                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4080                 check_get_keys='contents'
4081             )
4082             playlist = try_get(
4083                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4084
4085     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4086         title = playlist.get('title') or try_get(
4087             data, lambda x: x['titleText']['simpleText'], compat_str)
4088         playlist_id = playlist.get('playlistId') or item_id
4089
4090         # Delegating everything except mix playlists to regular tab-based playlist URL
4091         playlist_url = urljoin(url, try_get(
4092             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4093             compat_str))
4094         if playlist_url and playlist_url != url:
4095             return self.url_result(
4096                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4097                 video_title=title)
4098
4099         return self.playlist_result(
4100             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4101             playlist_id=playlist_id, playlist_title=title)
4102
4103     def _extract_availability(self, data):
4104         """
4105         Gets the availability of a given playlist/tab.
4106         Note: Unless YouTube tells us explicitly, we do not assume it is public
4107         @param data: response
4108         """
4109         is_private = is_unlisted = None
4110         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4111         badge_labels = self._extract_badges(renderer)
4112
4113         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4114         privacy_dropdown_entries = try_get(
4115             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4116         for renderer_dict in privacy_dropdown_entries:
4117             is_selected = try_get(
4118                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4119             if not is_selected:
4120                 continue
4121             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4122             if label:
4123                 badge_labels.add(label.lower())
4124                 break
4125
4126         for badge_label in badge_labels:
4127             if badge_label == 'unlisted':
4128                 is_unlisted = True
4129             elif badge_label == 'private':
4130                 is_private = True
4131             elif badge_label == 'public':
4132                 is_unlisted = is_private = False
4133         return self._availability(is_private, False, False, False, is_unlisted)
4134
4135     @staticmethod
4136     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4137         sidebar_renderer = try_get(
4138             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4139         for item in sidebar_renderer:
4140             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4141             if renderer:
4142                 return renderer
4143
4144     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4145         """
4146         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4147         """
4148         browse_id = params = None
4149         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4150         if not renderer:
4151             return
4152         menu_renderer = try_get(
4153             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4154         for menu_item in menu_renderer:
4155             if not isinstance(menu_item, dict):
4156                 continue
4157             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4158             text = try_get(
4159                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4160             if not text or text.lower() != 'show unavailable videos':
4161                 continue
4162             browse_endpoint = try_get(
4163                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4164             browse_id = browse_endpoint.get('browseId')
4165             params = browse_endpoint.get('params')
4166             break
4167
4168         ytcfg = self.extract_ytcfg(item_id, webpage)
4169         headers = self.generate_api_headers(
4170             ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4171             visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4172         query = {
4173             'params': params or 'wgYCCAA=',
4174             'browseId': browse_id or 'VL%s' % item_id
4175         }
4176         return self._extract_response(
4177             item_id=item_id, headers=headers, query=query,
4178             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4179             note='Downloading API JSON with unavailable videos')
4180
4181     def _extract_webpage(self, url, item_id):
4182         retries = self.get_param('extractor_retries', 3)
4183         count = -1
4184         last_error = 'Incomplete yt initial data recieved'
4185         while count < retries:
4186             count += 1
4187             # Sometimes youtube returns a webpage with incomplete ytInitialData
4188             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4189             if count:
4190                 self.report_warning('%s. Retrying ...' % last_error)
4191             webpage = self._download_webpage(
4192                 url, item_id,
4193                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4194             data = self.extract_yt_initial_data(item_id, webpage)
4195             if data.get('contents') or data.get('currentVideoEndpoint'):
4196                 break
4197             # Extract alerts here only when there is error
4198             self._extract_and_report_alerts(data)
4199             if count >= retries:
4200                 raise ExtractorError(last_error)
4201         return webpage, data
4202
4203     @staticmethod
4204     def _smuggle_data(entries, data):
4205         for entry in entries:
4206             if data:
4207                 entry['url'] = smuggle_url(entry['url'], data)
4208             yield entry
4209
4210     def _real_extract(self, url):
4211         url, smuggled_data = unsmuggle_url(url, {})
4212         if self.is_music_url(url):
4213             smuggled_data['is_music_url'] = True
4214         info_dict = self.__real_extract(url, smuggled_data)
4215         if info_dict.get('entries'):
4216             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4217         return info_dict
4218
4219     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4220
4221     def __real_extract(self, url, smuggled_data):
4222         item_id = self._match_id(url)
4223         url = compat_urlparse.urlunparse(
4224             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4225         compat_opts = self.get_param('compat_opts', [])
4226
4227         def get_mobj(url):
4228             mobj = self._url_re.match(url).groupdict()
4229             mobj.update((k, '') for k, v in mobj.items() if v is None)
4230             return mobj
4231
4232         mobj = get_mobj(url)
4233         # Youtube returns incomplete data if tabname is not lower case
4234         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4235
4236         if is_channel:
4237             if smuggled_data.get('is_music_url'):
4238                 if item_id[:2] == 'VL':
4239                     # Youtube music VL channels have an equivalent playlist
4240                     item_id = item_id[2:]
4241                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4242                 elif item_id[:2] == 'MP':
4243                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4244                     item_id = self._search_regex(
4245                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4246                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4247                         'playlist id')
4248                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4249                 elif mobj['channel_type'] == 'browse':
4250                     # Youtube music /browse/ should be changed to /channel/
4251                     pre = 'https://www.youtube.com/channel/%s' % item_id
4252         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4253             # Home URLs should redirect to /videos/
4254             self.report_warning(
4255                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4256                 'To download only the videos in the home page, add a "/featured" to the URL')
4257             tab = '/videos'
4258
4259         url = ''.join((pre, tab, post))
4260         mobj = get_mobj(url)
4261
4262         # Handle both video/playlist URLs
4263         qs = parse_qs(url)
4264         video_id = qs.get('v', [None])[0]
4265         playlist_id = qs.get('list', [None])[0]
4266
4267         if not video_id and mobj['not_channel'].startswith('watch'):
4268             if not playlist_id:
4269                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4270                 raise ExtractorError('Unable to recognize tab page')
4271             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4272             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4273             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4274             mobj = get_mobj(url)
4275
4276         if video_id and playlist_id:
4277             if self.get_param('noplaylist'):
4278                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4279                 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4280             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4281
4282         webpage, data = self._extract_webpage(url, item_id)
4283
4284         tabs = try_get(
4285             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4286         if tabs:
4287             selected_tab = self._extract_selected_tab(tabs)
4288             tab_name = selected_tab.get('title', '')
4289             if 'no-youtube-channel-redirect' not in compat_opts:
4290                 if mobj['tab'] == '/live':
4291                     # Live tab should have redirected to the video
4292                     raise ExtractorError('The channel is not currently live', expected=True)
4293                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4294                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4295                         # Topic channels don't have /videos. Use the equivalent playlist instead
4296                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4297                         pl_id = 'UU%s' % item_id[2:]
4298                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4299                         try:
4300                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4301                             for alert_type, alert_message in self._extract_alerts(pl_data):
4302                                 if alert_type == 'error':
4303                                     raise ExtractorError('Youtube said: %s' % alert_message)
4304                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4305                         except ExtractorError:
4306                             self.report_warning('The playlist gave error. Falling back to channel URL')
4307                     else:
4308                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4309
4310         self.write_debug('Final URL: %s' % url)
4311
4312         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4313         if 'no-youtube-unavailable-videos' not in compat_opts:
4314             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4315         self._extract_and_report_alerts(data, only_once=True)
4316         tabs = try_get(
4317             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4318         if tabs:
4319             return self._extract_from_tabs(item_id, webpage, data, tabs)
4320
4321         playlist = try_get(
4322             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4323         if playlist:
4324             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4325
4326         video_id = try_get(
4327             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4328             compat_str) or video_id
4329         if video_id:
4330             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4331                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4332             return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4333
4334         raise ExtractorError('Unable to recognize tab page')
4335
4336
4337 class YoutubePlaylistIE(InfoExtractor):
4338     IE_DESC = 'YouTube.com playlists'
4339     _VALID_URL = r'''(?x)(?:
4340                         (?:https?://)?
4341                         (?:\w+\.)?
4342                         (?:
4343                             (?:
4344                                 youtube(?:kids)?\.com|
4345                                 invidio\.us
4346                             )
4347                             /.*?\?.*?\blist=
4348                         )?
4349                         (?P<id>%(playlist_id)s)
4350                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4351     IE_NAME = 'youtube:playlist'
4352     _TESTS = [{
4353         'note': 'issue #673',
4354         'url': 'PLBB231211A4F62143',
4355         'info_dict': {
4356             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4357             'id': 'PLBB231211A4F62143',
4358             'uploader': 'Wickydoo',
4359             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4360             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4361         },
4362         'playlist_mincount': 29,
4363     }, {
4364         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4365         'info_dict': {
4366             'title': 'YDL_safe_search',
4367             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4368         },
4369         'playlist_count': 2,
4370         'skip': 'This playlist is private',
4371     }, {
4372         'note': 'embedded',
4373         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4374         'playlist_count': 4,
4375         'info_dict': {
4376             'title': 'JODA15',
4377             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4378             'uploader': 'milan',
4379             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4380         }
4381     }, {
4382         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4383         'playlist_mincount': 654,
4384         'info_dict': {
4385             'title': '2018 Chinese New Singles (11/6 updated)',
4386             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4387             'uploader': 'LBK',
4388             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4389             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4390         }
4391     }, {
4392         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4393         'only_matching': True,
4394     }, {
4395         # music album playlist
4396         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4397         'only_matching': True,
4398     }]
4399
4400     @classmethod
4401     def suitable(cls, url):
4402         if YoutubeTabIE.suitable(url):
4403             return False
4404         # Hack for lazy extractors until more generic solution is implemented
4405         # (see #28780)
4406         from .youtube import parse_qs
4407         qs = parse_qs(url)
4408         if qs.get('v', [None])[0]:
4409             return False
4410         return super(YoutubePlaylistIE, cls).suitable(url)
4411
4412     def _real_extract(self, url):
4413         playlist_id = self._match_id(url)
4414         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4415         url = update_url_query(
4416             'https://www.youtube.com/playlist',
4417             parse_qs(url) or {'list': playlist_id})
4418         if is_music_url:
4419             url = smuggle_url(url, {'is_music_url': True})
4420         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4421
4422
4423 class YoutubeYtBeIE(InfoExtractor):
4424     IE_DESC = 'youtu.be'
4425     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4426     _TESTS = [{
4427         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4428         'info_dict': {
4429             'id': 'yeWKywCrFtk',
4430             'ext': 'mp4',
4431             'title': 'Small Scale Baler and Braiding Rugs',
4432             'uploader': 'Backus-Page House Museum',
4433             'uploader_id': 'backuspagemuseum',
4434             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4435             'upload_date': '20161008',
4436             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4437             'categories': ['Nonprofits & Activism'],
4438             'tags': list,
4439             'like_count': int,
4440             'dislike_count': int,
4441         },
4442         'params': {
4443             'noplaylist': True,
4444             'skip_download': True,
4445         },
4446     }, {
4447         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4448         'only_matching': True,
4449     }]
4450
4451     def _real_extract(self, url):
4452         mobj = self._match_valid_url(url)
4453         video_id = mobj.group('id')
4454         playlist_id = mobj.group('playlist_id')
4455         return self.url_result(
4456             update_url_query('https://www.youtube.com/watch', {
4457                 'v': video_id,
4458                 'list': playlist_id,
4459                 'feature': 'youtu.be',
4460             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4461
4462
4463 class YoutubeYtUserIE(InfoExtractor):
4464     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4465     _VALID_URL = r'ytuser:(?P<id>.+)'
4466     _TESTS = [{
4467         'url': 'ytuser:phihag',
4468         'only_matching': True,
4469     }]
4470
4471     def _real_extract(self, url):
4472         user_id = self._match_id(url)
4473         return self.url_result(
4474             'https://www.youtube.com/user/%s' % user_id,
4475             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4476
4477
4478 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4479     IE_NAME = 'youtube:favorites'
4480     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4481     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4482     _LOGIN_REQUIRED = True
4483     _TESTS = [{
4484         'url': ':ytfav',
4485         'only_matching': True,
4486     }, {
4487         'url': ':ytfavorites',
4488         'only_matching': True,
4489     }]
4490
4491     def _real_extract(self, url):
4492         return self.url_result(
4493             'https://www.youtube.com/playlist?list=LL',
4494             ie=YoutubeTabIE.ie_key())
4495
4496
4497 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4498     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4499     # there doesn't appear to be a real limit, for example if you search for
4500     # 'python' you get more than 8.000.000 results
4501     _MAX_RESULTS = float('inf')
4502     IE_NAME = 'youtube:search'
4503     _SEARCH_KEY = 'ytsearch'
4504     _SEARCH_PARAMS = None
4505     _TESTS = []
4506
4507     def _entries(self, query, n):
4508         data = {'query': query}
4509         if self._SEARCH_PARAMS:
4510             data['params'] = self._SEARCH_PARAMS
4511         total = 0
4512         continuation = {}
4513         for page_num in itertools.count(1):
4514             data.update(continuation)
4515             search = self._extract_response(
4516                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4517                 check_get_keys=('contents', 'onResponseReceivedCommands')
4518             )
4519             if not search:
4520                 break
4521             slr_contents = try_get(
4522                 search,
4523                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4524                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4525                 list)
4526             if not slr_contents:
4527                 break
4528
4529             # Youtube sometimes adds promoted content to searches,
4530             # changing the index location of videos and token.
4531             # So we search through all entries till we find them.
4532             continuation = None
4533             for slr_content in slr_contents:
4534                 if not continuation:
4535                     continuation = self._extract_continuation({'contents': [slr_content]})
4536
4537                 isr_contents = try_get(
4538                     slr_content,
4539                     lambda x: x['itemSectionRenderer']['contents'],
4540                     list)
4541                 if not isr_contents:
4542                     continue
4543                 for content in isr_contents:
4544                     if not isinstance(content, dict):
4545                         continue
4546                     video = content.get('videoRenderer')
4547                     if not isinstance(video, dict):
4548                         continue
4549                     video_id = video.get('videoId')
4550                     if not video_id:
4551                         continue
4552
4553                     yield self._extract_video(video)
4554                     total += 1
4555                     if total == n:
4556                         return
4557
4558             if not continuation:
4559                 break
4560
4561     def _get_n_results(self, query, n):
4562         """Get a specified number of results for a query"""
4563         return self.playlist_result(self._entries(query, n), query, query)
4564
4565
4566 class YoutubeSearchDateIE(YoutubeSearchIE):
4567     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4568     _SEARCH_KEY = 'ytsearchdate'
4569     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4570     _SEARCH_PARAMS = 'CAI%3D'
4571
4572
4573 class YoutubeSearchURLIE(YoutubeSearchIE):
4574     IE_DESC = 'YouTube.com search URLs'
4575     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4576     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4577     # _MAX_RESULTS = 100
4578     _TESTS = [{
4579         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4580         'playlist_mincount': 5,
4581         'info_dict': {
4582             'id': 'youtube-dl test video',
4583             'title': 'youtube-dl test video',
4584         }
4585     }, {
4586         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4587         'only_matching': True,
4588     }]
4589
4590     @classmethod
4591     def _make_valid_url(cls):
4592         return cls._VALID_URL
4593
4594     def _real_extract(self, url):
4595         qs = parse_qs(url)
4596         query = (qs.get('search_query') or qs.get('q'))[0]
4597         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4598         return self._get_n_results(query, self._MAX_RESULTS)
4599
4600
4601 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4602     """
4603     Base class for feed extractors
4604     Subclasses must define the _FEED_NAME property.
4605     """
4606     _LOGIN_REQUIRED = True
4607     _TESTS = []
4608
4609     @property
4610     def IE_NAME(self):
4611         return 'youtube:%s' % self._FEED_NAME
4612
4613     def _real_extract(self, url):
4614         return self.url_result(
4615             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4616             ie=YoutubeTabIE.ie_key())
4617
4618
4619 class YoutubeWatchLaterIE(InfoExtractor):
4620     IE_NAME = 'youtube:watchlater'
4621     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4622     _VALID_URL = r':ytwatchlater'
4623     _TESTS = [{
4624         'url': ':ytwatchlater',
4625         'only_matching': True,
4626     }]
4627
4628     def _real_extract(self, url):
4629         return self.url_result(
4630             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4631
4632
4633 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4634     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4635     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4636     _FEED_NAME = 'recommended'
4637     _LOGIN_REQUIRED = False
4638     _TESTS = [{
4639         'url': ':ytrec',
4640         'only_matching': True,
4641     }, {
4642         'url': ':ytrecommended',
4643         'only_matching': True,
4644     }, {
4645         'url': 'https://youtube.com',
4646         'only_matching': True,
4647     }]
4648
4649
4650 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4651     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4652     _VALID_URL = r':ytsub(?:scription)?s?'
4653     _FEED_NAME = 'subscriptions'
4654     _TESTS = [{
4655         'url': ':ytsubs',
4656         'only_matching': True,
4657     }, {
4658         'url': ':ytsubscriptions',
4659         'only_matching': True,
4660     }]
4661
4662
4663 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4664     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4665     _VALID_URL = r':ythis(?:tory)?'
4666     _FEED_NAME = 'history'
4667     _TESTS = [{
4668         'url': ':ythistory',
4669         'only_matching': True,
4670     }]
4671
4672
4673 class YoutubeTruncatedURLIE(InfoExtractor):
4674     IE_NAME = 'youtube:truncated_url'
4675     IE_DESC = False  # Do not list
4676     _VALID_URL = r'''(?x)
4677         (?:https?://)?
4678         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4679         (?:watch\?(?:
4680             feature=[a-z_]+|
4681             annotation_id=annotation_[^&]+|
4682             x-yt-cl=[0-9]+|
4683             hl=[^&]*|
4684             t=[0-9]+
4685         )?
4686         |
4687             attribution_link\?a=[^&]+
4688         )
4689         $
4690     '''
4691
4692     _TESTS = [{
4693         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4694         'only_matching': True,
4695     }, {
4696         'url': 'https://www.youtube.com/watch?',
4697         'only_matching': True,
4698     }, {
4699         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4700         'only_matching': True,
4701     }, {
4702         'url': 'https://www.youtube.com/watch?feature=foo',
4703         'only_matching': True,
4704     }, {
4705         'url': 'https://www.youtube.com/watch?hl=en-GB',
4706         'only_matching': True,
4707     }, {
4708         'url': 'https://www.youtube.com/watch?t=2372',
4709         'only_matching': True,
4710     }]
4711
4712     def _real_extract(self, url):
4713         raise ExtractorError(
4714             'Did you forget to quote the URL? Remember that & is a meta '
4715             'character in most shells, so you want to put the URL in quotes, '
4716             'like  youtube-dl '
4717             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4718             ' or simply  youtube-dl BaW_jenozKc  .',
4719             expected=True)
4720
4721
4722 class YoutubeClipIE(InfoExtractor):
4723     IE_NAME = 'youtube:clip'
4724     IE_DESC = False  # Do not list
4725     _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4726
4727     def _real_extract(self, url):
4728         self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4729         return self.url_result(url, 'Generic')
4730
4731
4732 class YoutubeTruncatedIDIE(InfoExtractor):
4733     IE_NAME = 'youtube:truncated_id'
4734     IE_DESC = False  # Do not list
4735     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4736
4737     _TESTS = [{
4738         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4739         'only_matching': True,
4740     }]
4741
4742     def _real_extract(self, url):
4743         video_id = self._match_id(url)
4744         raise ExtractorError(
4745             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4746             expected=True)