yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     is_html,
  42     mimetype2ext,
  43     network_exceptions,
  44     orderedSet,
  45     parse_codecs,
  46     parse_count,
  47     parse_duration,
  48     parse_iso8601,
  49     parse_qs,
  50     qualities,
  51     remove_end,
  52     remove_start,
  53     smuggle_url,
  54     str_or_none,
  55     str_to_int,
  56     traverse_obj,
  57     try_get,
  58     unescapeHTML,
  59     unified_strdate,
  60     unsmuggle_url,
  61     update_url_query,
  62     url_or_none,
  63     urljoin,
  64     variadic,
  65 )
  66
  67
  68 # any clients starting with _ cannot be explicity requested by the user
  69 INNERTUBE_CLIENTS = {
  70     'web': {
  71         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  72         'INNERTUBE_CONTEXT': {
  73             'client': {
  74                 'clientName': 'WEB',
  75                 'clientVersion': '2.20210622.10.00',
  76             }
  77         },
  78         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  79     },
  80     'web_embedded': {
  81         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  82         'INNERTUBE_CONTEXT': {
  83             'client': {
  84                 'clientName': 'WEB_EMBEDDED_PLAYER',
  85                 'clientVersion': '1.20210620.0.1',
  86             },
  87         },
  88         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  89     },
  90     'web_music': {
  91         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  92         'INNERTUBE_HOST': 'music.youtube.com',
  93         'INNERTUBE_CONTEXT': {
  94             'client': {
  95                 'clientName': 'WEB_REMIX',
  96                 'clientVersion': '1.20210621.00.00',
  97             }
  98         },
  99         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 100     },
 101     'web_creator': {
 102         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 103         'INNERTUBE_CONTEXT': {
 104             'client': {
 105                 'clientName': 'WEB_CREATOR',
 106                 'clientVersion': '1.20210621.00.00',
 107             }
 108         },
 109         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 110     },
 111     'android': {
 112         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 113         'INNERTUBE_CONTEXT': {
 114             'client': {
 115                 'clientName': 'ANDROID',
 116                 'clientVersion': '16.20',
 117             }
 118         },
 119         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 120         'REQUIRE_JS_PLAYER': False
 121     },
 122     'android_embedded': {
 123         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 124         'INNERTUBE_CONTEXT': {
 125             'client': {
 126                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 127                 'clientVersion': '16.20',
 128             },
 129         },
 130         'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
 131         'REQUIRE_JS_PLAYER': False
 132     },
 133     'android_music': {
 134         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 135         'INNERTUBE_HOST': 'music.youtube.com',
 136         'INNERTUBE_CONTEXT': {
 137             'client': {
 138                 'clientName': 'ANDROID_MUSIC',
 139                 'clientVersion': '4.32',
 140             }
 141         },
 142         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 143         'REQUIRE_JS_PLAYER': False
 144     },
 145     'android_creator': {
 146         'INNERTUBE_CONTEXT': {
 147             'client': {
 148                 'clientName': 'ANDROID_CREATOR',
 149                 'clientVersion': '21.24.100',
 150             },
 151         },
 152         'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
 153         'REQUIRE_JS_PLAYER': False
 154     },
 155     # ios has HLS live streams
 156     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 157     'ios': {
 158         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 159         'INNERTUBE_CONTEXT': {
 160             'client': {
 161                 'clientName': 'IOS',
 162                 'clientVersion': '16.20',
 163             }
 164         },
 165         'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
 166         'REQUIRE_JS_PLAYER': False
 167     },
 168     'ios_embedded': {
 169         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 170         'INNERTUBE_CONTEXT': {
 171             'client': {
 172                 'clientName': 'IOS_MESSAGES_EXTENSION',
 173                 'clientVersion': '16.20',
 174             },
 175         },
 176         'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
 177         'REQUIRE_JS_PLAYER': False
 178     },
 179     'ios_music': {
 180         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 181         'INNERTUBE_HOST': 'music.youtube.com',
 182         'INNERTUBE_CONTEXT': {
 183             'client': {
 184                 'clientName': 'IOS_MUSIC',
 185                 'clientVersion': '4.32',
 186             },
 187         },
 188         'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
 189         'REQUIRE_JS_PLAYER': False
 190     },
 191     'ios_creator': {
 192         'INNERTUBE_CONTEXT': {
 193             'client': {
 194                 'clientName': 'IOS_CREATOR',
 195                 'clientVersion': '21.24.100',
 196             },
 197         },
 198         'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
 199         'REQUIRE_JS_PLAYER': False
 200     },
 201     # mweb has 'ultralow' formats
 202     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 203     'mweb': {
 204         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 205         'INNERTUBE_CONTEXT': {
 206             'client': {
 207                 'clientName': 'MWEB',
 208                 'clientVersion': '2.20210721.07.00',
 209             }
 210         },
 211         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 212     },
 213 }
 214
 215
 216 def build_innertube_clients():
 217     third_party = {
 218         'embedUrl': 'https://google.com',  # Can be any valid URL
 219     }
 220     base_clients = ('android', 'web', 'ios', 'mweb')
 221     priority = qualities(base_clients[::-1])
 222
 223     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 224         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 225         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 226         ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
 227         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 228         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 229
 230         if client in base_clients:
 231             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 232             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 233             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 234             agegate_ytcfg['priority'] -= 1
 235         elif client.endswith('_embedded'):
 236             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 237             ytcfg['priority'] -= 2
 238         else:
 239             ytcfg['priority'] -= 3
 240
 241
 242 build_innertube_clients()
 243
 244
 245 class YoutubeBaseInfoExtractor(InfoExtractor):
 246     """Provide base functions for Youtube extractors"""
 247
 248     _RESERVED_NAMES = (
 249         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
 250         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 251         r'browse|oembed|get_video_info|iframe_api|s/player|'
 252         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 253
 254     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 255
 256     _NETRC_MACHINE = 'youtube'
 257
 258     # If True it will raise an error if no login info is provided
 259     _LOGIN_REQUIRED = False
 260
 261     r'''  # Unused since login is broken
 262     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 263     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
 264
 265     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
 266     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
 267     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 268     '''
 269
 270     def _login(self):
 271         """
 272         Attempt to log in to YouTube.
 273         True is returned if successful or skipped.
 274         False is returned if login failed.
 275
 276         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 277         """
 278
 279         def warn(message):
 280             self.report_warning(message)
 281
 282         # username+password login is broken
 283         if (self._LOGIN_REQUIRED
 284                 and self.get_param('cookiefile') is None
 285                 and self.get_param('cookiesfrombrowser') is None):
 286             self.raise_login_required(
 287                 'Login details are needed to download this content', method='cookies')
 288         username, password = self._get_login_info()
 289         if username:
 290             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 291         return
 292
 293         # Everything below this is broken!
 294         r'''
 295         # No authentication to be performed
 296         if username is None:
 297             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 298                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 299             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 300             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 301             return True
 302
 303         login_page = self._download_webpage(
 304             self._LOGIN_URL, None,
 305             note='Downloading login page',
 306             errnote='unable to fetch login page', fatal=False)
 307         if login_page is False:
 308             return
 309
 310         login_form = self._hidden_inputs(login_page)
 311
 312         def req(url, f_req, note, errnote):
 313             data = login_form.copy()
 314             data.update({
 315                 'pstMsg': 1,
 316                 'checkConnection': 'youtube',
 317                 'checkedDomains': 'youtube',
 318                 'hl': 'en',
 319                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 320                 'f.req': json.dumps(f_req),
 321                 'flowName': 'GlifWebSignIn',
 322                 'flowEntry': 'ServiceLogin',
 323                 # TODO: reverse actual botguard identifier generation algo
 324                 'bgRequest': '["identifier",""]',
 325             })
 326             return self._download_json(
 327                 url, None, note=note, errnote=errnote,
 328                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 329                 fatal=False,
 330                 data=urlencode_postdata(data), headers={
 331                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 332                     'Google-Accounts-XSRF': 1,
 333                 })
 334
 335         lookup_req = [
 336             username,
 337             None, [], None, 'US', None, None, 2, False, True,
 338             [
 339                 None, None,
 340                 [2, 1, None, 1,
 341                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 342                  None, [], 4],
 343                 1, [None, None, []], None, None, None, True
 344             ],
 345             username,
 346         ]
 347
 348         lookup_results = req(
 349             self._LOOKUP_URL, lookup_req,
 350             'Looking up account info', 'Unable to look up account info')
 351
 352         if lookup_results is False:
 353             return False
 354
 355         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 356         if not user_hash:
 357             warn('Unable to extract user hash')
 358             return False
 359
 360         challenge_req = [
 361             user_hash,
 362             None, 1, None, [1, None, None, None, [password, None, True]],
 363             [
 364                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 365                 1, [None, None, []], None, None, None, True
 366             ]]
 367
 368         challenge_results = req(
 369             self._CHALLENGE_URL, challenge_req,
 370             'Logging in', 'Unable to log in')
 371
 372         if challenge_results is False:
 373             return
 374
 375         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 376         if login_res:
 377             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 378             warn(
 379                 'Unable to login: %s' % 'Invalid password'
 380                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 381             return False
 382
 383         res = try_get(challenge_results, lambda x: x[0][-1], list)
 384         if not res:
 385             warn('Unable to extract result entry')
 386             return False
 387
 388         login_challenge = try_get(res, lambda x: x[0][0], list)
 389         if login_challenge:
 390             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 391             if challenge_str == 'TWO_STEP_VERIFICATION':
 392                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 393                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 394                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 395                 if status == 'QUOTA_EXCEEDED':
 396                     warn('Exceeded the limit of TFA codes, try later')
 397                     return False
 398
 399                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 400                 if not tl:
 401                     warn('Unable to extract TL')
 402                     return False
 403
 404                 tfa_code = self._get_tfa_info('2-step verification code')
 405
 406                 if not tfa_code:
 407                     warn(
 408                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 409                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 410                     return False
 411
 412                 tfa_code = remove_start(tfa_code, 'G-')
 413
 414                 tfa_req = [
 415                     user_hash, None, 2, None,
 416                     [
 417                         9, None, None, None, None, None, None, None,
 418                         [None, tfa_code, True, 2]
 419                     ]]
 420
 421                 tfa_results = req(
 422                     self._TFA_URL.format(tl), tfa_req,
 423                     'Submitting TFA code', 'Unable to submit TFA code')
 424
 425                 if tfa_results is False:
 426                     return False
 427
 428                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 429                 if tfa_res:
 430                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 431                     warn(
 432                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 433                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 434                     return False
 435
 436                 check_cookie_url = try_get(
 437                     tfa_results, lambda x: x[0][-1][2], compat_str)
 438             else:
 439                 CHALLENGES = {
 440                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 441                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 442                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 443                 }
 444                 challenge = CHALLENGES.get(
 445                     challenge_str,
 446                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 447                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 448                 return False
 449         else:
 450             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 451
 452         if not check_cookie_url:
 453             warn('Unable to extract CheckCookie URL')
 454             return False
 455
 456         check_cookie_results = self._download_webpage(
 457             check_cookie_url, None, 'Checking cookie', fatal=False)
 458
 459         if check_cookie_results is False:
 460             return False
 461
 462         if 'https://myaccount.google.com/' not in check_cookie_results:
 463             warn('Unable to log in')
 464             return False
 465
 466         return True
 467         '''
 468
 469     def _initialize_consent(self):
 470         cookies = self._get_cookies('https://www.youtube.com/')
 471         if cookies.get('__Secure-3PSID'):
 472             return
 473         consent_id = None
 474         consent = cookies.get('CONSENT')
 475         if consent:
 476             if 'YES' in consent.value:
 477                 return
 478             consent_id = self._search_regex(
 479                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 480         if not consent_id:
 481             consent_id = random.randint(100, 999)
 482         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 483
 484     def _real_initialize(self):
 485         self._initialize_consent()
 486         if self._downloader is None:
 487             return
 488         if not self._login():
 489             return
 490
 491     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 492     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 493     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 494
 495     def _get_default_ytcfg(self, client='web'):
 496         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 497
 498     def _get_innertube_host(self, client='web'):
 499         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 500
 501     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 502         # try_get but with fallback to default ytcfg client values when present
 503         _func = lambda y: try_get(y, getter, expected_type)
 504         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 505
 506     def _extract_client_name(self, ytcfg, default_client='web'):
 507         return self._ytcfg_get_safe(
 508             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 509                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 510
 511     def _extract_client_version(self, ytcfg, default_client='web'):
 512         return self._ytcfg_get_safe(
 513             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 514                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 515
 516     def _extract_api_key(self, ytcfg=None, default_client='web'):
 517         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 518
 519     def _extract_context(self, ytcfg=None, default_client='web'):
 520         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 521         context = _get_context(ytcfg)
 522         if context:
 523             return context
 524
 525         context = _get_context(self._get_default_ytcfg(default_client))
 526         if not ytcfg:
 527             return context
 528
 529         # Recreate the client context (required)
 530         context['client'].update({
 531             'clientVersion': self._extract_client_version(ytcfg, default_client),
 532             'clientName': self._extract_client_name(ytcfg, default_client),
 533         })
 534         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 535         if visitor_data:
 536             context['client']['visitorData'] = visitor_data
 537         return context
 538
 539     _SAPISID = None
 540
 541     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 542         time_now = round(time.time())
 543         if self._SAPISID is None:
 544             yt_cookies = self._get_cookies('https://www.youtube.com')
 545             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 546             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 547             sapisid_cookie = dict_get(
 548                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 549             if sapisid_cookie and sapisid_cookie.value:
 550                 self._SAPISID = sapisid_cookie.value
 551                 self.write_debug('Extracted SAPISID cookie')
 552                 # SAPISID cookie is required if not already present
 553                 if not yt_cookies.get('SAPISID'):
 554                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 555                     self._set_cookie(
 556                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 557             else:
 558                 self._SAPISID = False
 559         if not self._SAPISID:
 560             return None
 561         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 562         sapisidhash = hashlib.sha1(
 563             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 564         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 565
 566     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 567                   note='Downloading API JSON', errnote='Unable to download API page',
 568                   context=None, api_key=None, api_hostname=None, default_client='web'):
 569
 570         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 571         data.update(query)
 572         real_headers = self.generate_api_headers(default_client=default_client)
 573         real_headers.update({'content-type': 'application/json'})
 574         if headers:
 575             real_headers.update(headers)
 576         return self._download_json(
 577             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 578             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 579             data=json.dumps(data).encode('utf8'), headers=real_headers,
 580             query={'key': api_key or self._extract_api_key()})
 581
 582     def extract_yt_initial_data(self, item_id, webpage, fatal=True):
 583         data = self._search_regex(
 584             (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 585              self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
 586         if data:
 587             return self._parse_json(data, item_id, fatal=fatal)
 588
 589     @staticmethod
 590     def _extract_session_index(*data):
 591         """
 592         Index of current account in account list.
 593         See: https://github.com/yt-dlp/yt-dlp/pull/519
 594         """
 595         for ytcfg in data:
 596             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 597             if session_index is not None:
 598                 return session_index
 599
 600     # Deprecated?
 601     def _extract_identity_token(self, ytcfg=None, webpage=None):
 602         if ytcfg:
 603             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 604             if token:
 605                 return token
 606         if webpage:
 607             return self._search_regex(
 608                 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 609                 'identity token', default=None, fatal=False)
 610
 611     @staticmethod
 612     def _extract_account_syncid(*args):
 613         """
 614         Extract syncId required to download private playlists of secondary channels
 615         @params response and/or ytcfg
 616         """
 617         for data in args:
 618             # ytcfg includes channel_syncid if on secondary channel
 619             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 620             if delegated_sid:
 621                 return delegated_sid
 622             sync_ids = (try_get(
 623                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 624                        lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
 625             if len(sync_ids) >= 2 and sync_ids[1]:
 626                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 627                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 628                 return sync_ids[0]
 629
 630     @staticmethod
 631     def _extract_visitor_data(*args):
 632         """
 633         Extracts visitorData from an API response or ytcfg
 634         Appears to be used to track session state
 635         """
 636         return traverse_obj(
 637             args, (..., ('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
 638             expected_type=compat_str, get_all=False)
 639
 640     @property
 641     def is_authenticated(self):
 642         return bool(self._generate_sapisidhash_header())
 643
 644     def extract_ytcfg(self, video_id, webpage):
 645         if not webpage:
 646             return {}
 647         return self._parse_json(
 648             self._search_regex(
 649                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 650                 default='{}'), video_id, fatal=False) or {}
 651
 652     def generate_api_headers(
 653             self, *, ytcfg=None, account_syncid=None, session_index=None,
 654             visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
 655
 656         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 657         headers = {
 658             'X-YouTube-Client-Name': compat_str(
 659                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 660             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 661             'Origin': origin,
 662             'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
 663             'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
 664             'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
 665         }
 666         if session_index is None:
 667             session_index = self._extract_session_index(ytcfg)
 668         if account_syncid or session_index is not None:
 669             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 670
 671         auth = self._generate_sapisidhash_header(origin)
 672         if auth is not None:
 673             headers['Authorization'] = auth
 674             headers['X-Origin'] = origin
 675         return {h: v for h, v in headers.items() if v is not None}
 676
 677     @staticmethod
 678     def _build_api_continuation_query(continuation, ctp=None):
 679         query = {
 680             'continuation': continuation
 681         }
 682         # TODO: Inconsistency with clickTrackingParams.
 683         # Currently we have a fixed ctp contained within context (from ytcfg)
 684         # and a ctp in root query for continuation.
 685         if ctp:
 686             query['clickTracking'] = {'clickTrackingParams': ctp}
 687         return query
 688
 689     @classmethod
 690     def _extract_next_continuation_data(cls, renderer):
 691         next_continuation = try_get(
 692             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 693                        lambda x: x['continuation']['reloadContinuationData']), dict)
 694         if not next_continuation:
 695             return
 696         continuation = next_continuation.get('continuation')
 697         if not continuation:
 698             return
 699         ctp = next_continuation.get('clickTrackingParams')
 700         return cls._build_api_continuation_query(continuation, ctp)
 701
 702     @classmethod
 703     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 704         if isinstance(continuation_ep, dict):
 705             continuation = try_get(
 706                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 707             if not continuation:
 708                 return
 709             ctp = continuation_ep.get('clickTrackingParams')
 710             return cls._build_api_continuation_query(continuation, ctp)
 711
 712     @classmethod
 713     def _extract_continuation(cls, renderer):
 714         next_continuation = cls._extract_next_continuation_data(renderer)
 715         if next_continuation:
 716             return next_continuation
 717
 718         contents = []
 719         for key in ('contents', 'items'):
 720             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 721
 722         for content in contents:
 723             if not isinstance(content, dict):
 724                 continue
 725             continuation_ep = try_get(
 726                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 727                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 728                 dict)
 729             continuation = cls._extract_continuation_ep_data(continuation_ep)
 730             if continuation:
 731                 return continuation
 732
 733     @classmethod
 734     def _extract_alerts(cls, data):
 735         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 736             if not isinstance(alert_dict, dict):
 737                 continue
 738             for alert in alert_dict.values():
 739                 alert_type = alert.get('type')
 740                 if not alert_type:
 741                     continue
 742                 message = cls._get_text(alert, 'text')
 743                 if message:
 744                     yield alert_type, message
 745
 746     def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
 747         errors = []
 748         warnings = []
 749         for alert_type, alert_message in alerts:
 750             if alert_type.lower() == 'error' and fatal:
 751                 errors.append([alert_type, alert_message])
 752             else:
 753                 warnings.append([alert_type, alert_message])
 754
 755         for alert_type, alert_message in (warnings + errors[:-1]):
 756             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
 757         if errors:
 758             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 759
 760     def _extract_and_report_alerts(self, data, *args, **kwargs):
 761         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 762
 763     def _extract_badges(self, renderer: dict):
 764         badges = set()
 765         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 766             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 767             if label:
 768                 badges.add(label.lower())
 769         return badges
 770
 771     @staticmethod
 772     def _get_text(data, *path_list, max_runs=None):
 773         for path in path_list or [None]:
 774             if path is None:
 775                 obj = [data]
 776             else:
 777                 obj = traverse_obj(data, path, default=[])
 778                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 779                     obj = [obj]
 780             for item in obj:
 781                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 782                 if text:
 783                     return text
 784                 runs = try_get(item, lambda x: x['runs'], list) or []
 785                 if not runs and isinstance(item, list):
 786                     runs = item
 787
 788                 runs = runs[:min(len(runs), max_runs or len(runs))]
 789                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 790                 if text:
 791                     return text
 792
 793     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 794                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 795                           default_client='web'):
 796         response = None
 797         last_error = None
 798         count = -1
 799         retries = self.get_param('extractor_retries', 3)
 800         if check_get_keys is None:
 801             check_get_keys = []
 802         while count < retries:
 803             count += 1
 804             if last_error:
 805                 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
 806             try:
 807                 response = self._call_api(
 808                     ep=ep, fatal=True, headers=headers,
 809                     video_id=item_id, query=query,
 810                     context=self._extract_context(ytcfg, default_client),
 811                     api_key=self._extract_api_key(ytcfg, default_client),
 812                     api_hostname=api_hostname, default_client=default_client,
 813                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 814             except ExtractorError as e:
 815                 if isinstance(e.cause, network_exceptions):
 816                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 817                         e.cause.seek(0)
 818                         yt_error = try_get(
 819                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 820                             lambda x: x['error']['message'], compat_str)
 821                         if yt_error:
 822                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 823                     # Downloading page may result in intermittent 5xx HTTP error
 824                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 825                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 826                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 827                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 828                         last_error = error_to_compat_str(e.cause or e.msg)
 829                         if count < retries:
 830                             continue
 831                 if fatal:
 832                     raise
 833                 else:
 834                     self.report_warning(error_to_compat_str(e))
 835                     return
 836
 837             else:
 838                 try:
 839                     self._extract_and_report_alerts(response, only_once=True)
 840                 except ExtractorError as e:
 841                     # YouTube servers may return errors we want to retry on in a 200 OK response
 842                     # See: https://github.com/yt-dlp/yt-dlp/issues/839
 843                     if 'unknown error' in e.msg.lower():
 844                         last_error = e.msg
 845                         continue
 846                     if fatal:
 847                         raise
 848                     self.report_warning(error_to_compat_str(e))
 849                     return
 850                 if not check_get_keys or dict_get(response, check_get_keys):
 851                     break
 852                 # Youtube sometimes sends incomplete data
 853                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 854                 last_error = 'Incomplete data received'
 855                 if count >= retries:
 856                     if fatal:
 857                         raise ExtractorError(last_error)
 858                     else:
 859                         self.report_warning(last_error)
 860                         return
 861         return response
 862
 863     @staticmethod
 864     def is_music_url(url):
 865         return re.match(r'https?://music\.youtube\.com/', url) is not None
 866
 867     def _extract_video(self, renderer):
 868         video_id = renderer.get('videoId')
 869         title = self._get_text(renderer, 'title')
 870         description = self._get_text(renderer, 'descriptionSnippet')
 871         duration = parse_duration(self._get_text(
 872             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 873         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 874         view_count = str_to_int(self._search_regex(
 875             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 876             'view count', default=None))
 877
 878         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 879
 880         return {
 881             '_type': 'url',
 882             'ie_key': YoutubeIE.ie_key(),
 883             'id': video_id,
 884             'url': f'https://www.youtube.com/watch?v={video_id}',
 885             'title': title,
 886             'description': description,
 887             'duration': duration,
 888             'view_count': view_count,
 889             'uploader': uploader,
 890         }
 891
 892
 893 class YoutubeIE(YoutubeBaseInfoExtractor):
 894     IE_DESC = 'YouTube.com'
 895     _INVIDIOUS_SITES = (
 896         # invidious-redirect websites
 897         r'(?:www\.)?redirect\.invidious\.io',
 898         r'(?:(?:www|dev)\.)?invidio\.us',
 899         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 900         r'(?:www\.)?invidious\.pussthecat\.org',
 901         r'(?:www\.)?invidious\.zee\.li',
 902         r'(?:www\.)?invidious\.ethibox\.fr',
 903         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 904         # youtube-dl invidious instances list
 905         r'(?:(?:www|no)\.)?invidiou\.sh',
 906         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 907         r'(?:www\.)?invidious\.kabi\.tk',
 908         r'(?:www\.)?invidious\.mastodon\.host',
 909         r'(?:www\.)?invidious\.zapashcanon\.fr',
 910         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 911         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 912         r'(?:www\.)?invidious\.himiko\.cloud',
 913         r'(?:www\.)?invidious\.reallyancient\.tech',
 914         r'(?:www\.)?invidious\.tube',
 915         r'(?:www\.)?invidiou\.site',
 916         r'(?:www\.)?invidious\.site',
 917         r'(?:www\.)?invidious\.xyz',
 918         r'(?:www\.)?invidious\.nixnet\.xyz',
 919         r'(?:www\.)?invidious\.048596\.xyz',
 920         r'(?:www\.)?invidious\.drycat\.fr',
 921         r'(?:www\.)?inv\.skyn3t\.in',
 922         r'(?:www\.)?tube\.poal\.co',
 923         r'(?:www\.)?tube\.connect\.cafe',
 924         r'(?:www\.)?vid\.wxzm\.sx',
 925         r'(?:www\.)?vid\.mint\.lgbt',
 926         r'(?:www\.)?vid\.puffyan\.us',
 927         r'(?:www\.)?yewtu\.be',
 928         r'(?:www\.)?yt\.elukerio\.org',
 929         r'(?:www\.)?yt\.lelux\.fi',
 930         r'(?:www\.)?invidious\.ggc-project\.de',
 931         r'(?:www\.)?yt\.maisputain\.ovh',
 932         r'(?:www\.)?ytprivate\.com',
 933         r'(?:www\.)?invidious\.13ad\.de',
 934         r'(?:www\.)?invidious\.toot\.koeln',
 935         r'(?:www\.)?invidious\.fdn\.fr',
 936         r'(?:www\.)?watch\.nettohikari\.com',
 937         r'(?:www\.)?invidious\.namazso\.eu',
 938         r'(?:www\.)?invidious\.silkky\.cloud',
 939         r'(?:www\.)?invidious\.exonip\.de',
 940         r'(?:www\.)?invidious\.riverside\.rocks',
 941         r'(?:www\.)?invidious\.blamefran\.net',
 942         r'(?:www\.)?invidious\.moomoo\.de',
 943         r'(?:www\.)?ytb\.trom\.tf',
 944         r'(?:www\.)?yt\.cyberhost\.uk',
 945         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 946         r'(?:www\.)?qklhadlycap4cnod\.onion',
 947         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 948         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 949         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 950         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 951         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 952         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 953         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 954         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 955         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 956         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 957     )
 958     _VALID_URL = r"""(?x)^
 959                      (
 960                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 961                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 962                             (?:www\.)?deturl\.com/www\.youtube\.com|
 963                             (?:www\.)?pwnyoutube\.com|
 964                             (?:www\.)?hooktube\.com|
 965                             (?:www\.)?yourepeat\.com|
 966                             tube\.majestyc\.net|
 967                             %(invidious)s|
 968                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 969                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 970                          (?:                                                  # the various things that can precede the ID:
 971                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 972                              |(?:                                             # or the v= param in all its forms
 973                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 974                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 975                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 976                                  v=
 977                              )
 978                          ))
 979                          |(?:
 980                             youtu\.be|                                        # just youtu.be/xxxx
 981                             vid\.plus|                                        # or vid.plus/xxxx
 982                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 983                             %(invidious)s
 984                          )/
 985                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 986                          )
 987                      )?                                                       # all until now is optional -> you can pass the naked ID
 988                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 989                      (?(1).+)?                                                # if we found the ID, everything can follow
 990                      (?:\#|$)""" % {
 991         'invidious': '|'.join(_INVIDIOUS_SITES),
 992     }
 993     _PLAYER_INFO_RE = (
 994         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 995         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 996         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 997     )
 998     _formats = {
 999         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1000         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
1001         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
1002         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
1003         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
1004         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1005         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1006         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1007         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1008         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1009         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1010         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1011         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1012         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1013         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1014         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1015         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1016         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1017
1018
1019         # 3D videos
1020         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1021         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1022         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1023         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1024         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1025         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1026         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1027
1028         # Apple HTTP Live Streaming
1029         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1030         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1031         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1032         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1033         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1034         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1035         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1036         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1037
1038         # DASH mp4 video
1039         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1040         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1041         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1042         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1043         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1044         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1045         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1046         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1047         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1048         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1049         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1050         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1051
1052         # Dash mp4 audio
1053         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1054         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1055         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1056         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1057         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1058         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1059         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1060
1061         # Dash webm
1062         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1063         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1064         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1065         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1066         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1067         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1068         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1069         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1070         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1071         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1072         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1073         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1074         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1075         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1076         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1077         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1078         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1079         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1080         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1081         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1082         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1083         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1084
1085         # Dash webm audio
1086         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1087         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1088
1089         # Dash webm audio with opus inside
1090         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1091         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1092         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1093
1094         # RTMP (unnamed)
1095         '_rtmp': {'protocol': 'rtmp'},
1096
1097         # av01 video only formats sometimes served with "unknown" codecs
1098         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1099         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1100         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1101         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1102         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1103         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1104         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1105         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1106     }
1107     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1108
1109     _GEO_BYPASS = False
1110
1111     IE_NAME = 'youtube'
1112     _TESTS = [
1113         {
1114             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1115             'info_dict': {
1116                 'id': 'BaW_jenozKc',
1117                 'ext': 'mp4',
1118                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1119                 'uploader': 'Philipp Hagemeister',
1120                 'uploader_id': 'phihag',
1121                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1122                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1123                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1124                 'upload_date': '20121002',
1125                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1126                 'categories': ['Science & Technology'],
1127                 'tags': ['youtube-dl'],
1128                 'duration': 10,
1129                 'view_count': int,
1130                 'like_count': int,
1131                 'dislike_count': int,
1132                 'start_time': 1,
1133                 'end_time': 9,
1134             }
1135         },
1136         {
1137             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1138             'note': 'Embed-only video (#1746)',
1139             'info_dict': {
1140                 'id': 'yZIXLfi8CZQ',
1141                 'ext': 'mp4',
1142                 'upload_date': '20120608',
1143                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1144                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1145                 'uploader': 'SET India',
1146                 'uploader_id': 'setindia',
1147                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1148                 'age_limit': 18,
1149             },
1150             'skip': 'Private video',
1151         },
1152         {
1153             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1154             'note': 'Use the first video ID in the URL',
1155             'info_dict': {
1156                 'id': 'BaW_jenozKc',
1157                 'ext': 'mp4',
1158                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1159                 'uploader': 'Philipp Hagemeister',
1160                 'uploader_id': 'phihag',
1161                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1162                 'upload_date': '20121002',
1163                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1164                 'categories': ['Science & Technology'],
1165                 'tags': ['youtube-dl'],
1166                 'duration': 10,
1167                 'view_count': int,
1168                 'like_count': int,
1169                 'dislike_count': int,
1170             },
1171             'params': {
1172                 'skip_download': True,
1173             },
1174         },
1175         {
1176             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1177             'note': '256k DASH audio (format 141) via DASH manifest',
1178             'info_dict': {
1179                 'id': 'a9LDPn-MO4I',
1180                 'ext': 'm4a',
1181                 'upload_date': '20121002',
1182                 'uploader_id': '8KVIDEO',
1183                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1184                 'description': '',
1185                 'uploader': '8KVIDEO',
1186                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1187             },
1188             'params': {
1189                 'youtube_include_dash_manifest': True,
1190                 'format': '141',
1191             },
1192             'skip': 'format 141 not served anymore',
1193         },
1194         # DASH manifest with encrypted signature
1195         {
1196             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1197             'info_dict': {
1198                 'id': 'IB3lcPjvWLA',
1199                 'ext': 'm4a',
1200                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1201                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1202                 'duration': 244,
1203                 'uploader': 'AfrojackVEVO',
1204                 'uploader_id': 'AfrojackVEVO',
1205                 'upload_date': '20131011',
1206                 'abr': 129.495,
1207             },
1208             'params': {
1209                 'youtube_include_dash_manifest': True,
1210                 'format': '141/bestaudio[ext=m4a]',
1211             },
1212         },
1213         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1214         {
1215             'note': 'Embed allowed age-gate video',
1216             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1217             'info_dict': {
1218                 'id': 'HtVdAasjOgU',
1219                 'ext': 'mp4',
1220                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1221                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1222                 'duration': 142,
1223                 'uploader': 'The Witcher',
1224                 'uploader_id': 'WitcherGame',
1225                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1226                 'upload_date': '20140605',
1227                 'age_limit': 18,
1228             },
1229         },
1230         {
1231             'note': 'Age-gate video with embed allowed in public site',
1232             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1233             'info_dict': {
1234                 'id': 'HsUATh_Nc2U',
1235                 'ext': 'mp4',
1236                 'title': 'Godzilla 2 (Official Video)',
1237                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1238                 'upload_date': '20200408',
1239                 'uploader_id': 'FlyingKitty900',
1240                 'uploader': 'FlyingKitty',
1241                 'age_limit': 18,
1242             },
1243         },
1244         {
1245             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1246             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1247             'info_dict': {
1248                 'id': 'Tq92D6wQ1mg',
1249                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1250                 'ext': 'mp4',
1251                 'upload_date': '20191227',
1252                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1253                 'uploader': 'Projekt Melody',
1254                 'description': 'md5:17eccca93a786d51bc67646756894066',
1255                 'age_limit': 18,
1256             },
1257         },
1258         {
1259             'note': 'Non-Agegated non-embeddable video',
1260             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1261             'info_dict': {
1262                 'id': 'MeJVWBSsPAY',
1263                 'ext': 'mp4',
1264                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1265                 'uploader': 'Herr Lurik',
1266                 'uploader_id': 'st3in234',
1267                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1268                 'upload_date': '20130730',
1269             },
1270         },
1271         {
1272             'note': 'Non-bypassable age-gated video',
1273             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1274             'only_matching': True,
1275         },
1276         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1277         # YouTube Red ad is not captured for creator
1278         {
1279             'url': '__2ABJjxzNo',
1280             'info_dict': {
1281                 'id': '__2ABJjxzNo',
1282                 'ext': 'mp4',
1283                 'duration': 266,
1284                 'upload_date': '20100430',
1285                 'uploader_id': 'deadmau5',
1286                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1287                 'creator': 'deadmau5',
1288                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1289                 'uploader': 'deadmau5',
1290                 'title': 'Deadmau5 - Some Chords (HD)',
1291                 'alt_title': 'Some Chords',
1292             },
1293             'expected_warnings': [
1294                 'DASH manifest missing',
1295             ]
1296         },
1297         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1298         {
1299             'url': 'lqQg6PlCWgI',
1300             'info_dict': {
1301                 'id': 'lqQg6PlCWgI',
1302                 'ext': 'mp4',
1303                 'duration': 6085,
1304                 'upload_date': '20150827',
1305                 'uploader_id': 'olympic',
1306                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1307                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1308                 'uploader': 'Olympics',
1309                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1310             },
1311             'params': {
1312                 'skip_download': 'requires avconv',
1313             }
1314         },
1315         # Non-square pixels
1316         {
1317             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1318             'info_dict': {
1319                 'id': '_b-2C3KPAM0',
1320                 'ext': 'mp4',
1321                 'stretched_ratio': 16 / 9.,
1322                 'duration': 85,
1323                 'upload_date': '20110310',
1324                 'uploader_id': 'AllenMeow',
1325                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1326                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1327                 'uploader': '孫ᄋᄅ',
1328                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1329             },
1330         },
1331         # url_encoded_fmt_stream_map is empty string
1332         {
1333             'url': 'qEJwOuvDf7I',
1334             'info_dict': {
1335                 'id': 'qEJwOuvDf7I',
1336                 'ext': 'webm',
1337                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1338                 'description': '',
1339                 'upload_date': '20150404',
1340                 'uploader_id': 'spbelect',
1341                 'uploader': 'Наблюдатели Петербурга',
1342             },
1343             'params': {
1344                 'skip_download': 'requires avconv',
1345             },
1346             'skip': 'This live event has ended.',
1347         },
1348         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1349         {
1350             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1351             'info_dict': {
1352                 'id': 'FIl7x6_3R5Y',
1353                 'ext': 'webm',
1354                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1355                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1356                 'duration': 220,
1357                 'upload_date': '20150625',
1358                 'uploader_id': 'dorappi2000',
1359                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1360                 'uploader': 'dorappi2000',
1361                 'formats': 'mincount:31',
1362             },
1363             'skip': 'not actual anymore',
1364         },
1365         # DASH manifest with segment_list
1366         {
1367             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1368             'md5': '8ce563a1d667b599d21064e982ab9e31',
1369             'info_dict': {
1370                 'id': 'CsmdDsKjzN8',
1371                 'ext': 'mp4',
1372                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1373                 'uploader': 'Airtek',
1374                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1375                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1376                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1377             },
1378             'params': {
1379                 'youtube_include_dash_manifest': True,
1380                 'format': '135',  # bestvideo
1381             },
1382             'skip': 'This live event has ended.',
1383         },
1384         {
1385             # Multifeed videos (multiple cameras), URL is for Main Camera
1386             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1387             'info_dict': {
1388                 'id': 'jvGDaLqkpTg',
1389                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1390                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1391             },
1392             'playlist': [{
1393                 'info_dict': {
1394                     'id': 'jvGDaLqkpTg',
1395                     'ext': 'mp4',
1396                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1397                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1398                     'duration': 10643,
1399                     'upload_date': '20161111',
1400                     'uploader': 'Team PGP',
1401                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1402                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1403                 },
1404             }, {
1405                 'info_dict': {
1406                     'id': '3AKt1R1aDnw',
1407                     'ext': 'mp4',
1408                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1409                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1410                     'duration': 10991,
1411                     'upload_date': '20161111',
1412                     'uploader': 'Team PGP',
1413                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1414                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1415                 },
1416             }, {
1417                 'info_dict': {
1418                     'id': 'RtAMM00gpVc',
1419                     'ext': 'mp4',
1420                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1421                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1422                     'duration': 10995,
1423                     'upload_date': '20161111',
1424                     'uploader': 'Team PGP',
1425                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1426                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1427                 },
1428             }, {
1429                 'info_dict': {
1430                     'id': '6N2fdlP3C5U',
1431                     'ext': 'mp4',
1432                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1433                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1434                     'duration': 10990,
1435                     'upload_date': '20161111',
1436                     'uploader': 'Team PGP',
1437                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1438                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1439                 },
1440             }],
1441             'params': {
1442                 'skip_download': True,
1443             },
1444             'skip': 'Not multifeed anymore',
1445         },
1446         {
1447             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1448             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1449             'info_dict': {
1450                 'id': 'gVfLd0zydlo',
1451                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1452             },
1453             'playlist_count': 2,
1454             'skip': 'Not multifeed anymore',
1455         },
1456         {
1457             'url': 'https://vid.plus/FlRa-iH7PGw',
1458             'only_matching': True,
1459         },
1460         {
1461             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1462             'only_matching': True,
1463         },
1464         {
1465             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1466             # Also tests cut-off URL expansion in video description (see
1467             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1468             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1469             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1470             'info_dict': {
1471                 'id': 'lsguqyKfVQg',
1472                 'ext': 'mp4',
1473                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1474                 'alt_title': 'Dark Walk',
1475                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1476                 'duration': 133,
1477                 'upload_date': '20151119',
1478                 'uploader_id': 'IronSoulElf',
1479                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1480                 'uploader': 'IronSoulElf',
1481                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1482                 'track': 'Dark Walk',
1483                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1484                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1485             },
1486             'params': {
1487                 'skip_download': True,
1488             },
1489         },
1490         {
1491             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1492             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1493             'only_matching': True,
1494         },
1495         {
1496             # Video with yt:stretch=17:0
1497             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1498             'info_dict': {
1499                 'id': 'Q39EVAstoRM',
1500                 'ext': 'mp4',
1501                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1502                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1503                 'upload_date': '20151107',
1504                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1505                 'uploader': 'CH GAMER DROID',
1506             },
1507             'params': {
1508                 'skip_download': True,
1509             },
1510             'skip': 'This video does not exist.',
1511         },
1512         {
1513             # Video with incomplete 'yt:stretch=16:'
1514             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1515             'only_matching': True,
1516         },
1517         {
1518             # Video licensed under Creative Commons
1519             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1520             'info_dict': {
1521                 'id': 'M4gD1WSo5mA',
1522                 'ext': 'mp4',
1523                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1524                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1525                 'duration': 721,
1526                 'upload_date': '20150127',
1527                 'uploader_id': 'BerkmanCenter',
1528                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1529                 'uploader': 'The Berkman Klein Center for Internet & Society',
1530                 'license': 'Creative Commons Attribution license (reuse allowed)',
1531             },
1532             'params': {
1533                 'skip_download': True,
1534             },
1535         },
1536         {
1537             # Channel-like uploader_url
1538             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1539             'info_dict': {
1540                 'id': 'eQcmzGIKrzg',
1541                 'ext': 'mp4',
1542                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1543                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1544                 'duration': 4060,
1545                 'upload_date': '20151119',
1546                 'uploader': 'Bernie Sanders',
1547                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1548                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1549                 'license': 'Creative Commons Attribution license (reuse allowed)',
1550             },
1551             'params': {
1552                 'skip_download': True,
1553             },
1554         },
1555         {
1556             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1557             'only_matching': True,
1558         },
1559         {
1560             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1561             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1562             'only_matching': True,
1563         },
1564         {
1565             # Rental video preview
1566             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1567             'info_dict': {
1568                 'id': 'uGpuVWrhIzE',
1569                 'ext': 'mp4',
1570                 'title': 'Piku - Trailer',
1571                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1572                 'upload_date': '20150811',
1573                 'uploader': 'FlixMatrix',
1574                 'uploader_id': 'FlixMatrixKaravan',
1575                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1576                 'license': 'Standard YouTube License',
1577             },
1578             'params': {
1579                 'skip_download': True,
1580             },
1581             'skip': 'This video is not available.',
1582         },
1583         {
1584             # YouTube Red video with episode data
1585             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1586             'info_dict': {
1587                 'id': 'iqKdEhx-dD4',
1588                 'ext': 'mp4',
1589                 'title': 'Isolation - Mind Field (Ep 1)',
1590                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1591                 'duration': 2085,
1592                 'upload_date': '20170118',
1593                 'uploader': 'Vsauce',
1594                 'uploader_id': 'Vsauce',
1595                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1596                 'series': 'Mind Field',
1597                 'season_number': 1,
1598                 'episode_number': 1,
1599             },
1600             'params': {
1601                 'skip_download': True,
1602             },
1603             'expected_warnings': [
1604                 'Skipping DASH manifest',
1605             ],
1606         },
1607         {
1608             # The following content has been identified by the YouTube community
1609             # as inappropriate or offensive to some audiences.
1610             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1611             'info_dict': {
1612                 'id': '6SJNVb0GnPI',
1613                 'ext': 'mp4',
1614                 'title': 'Race Differences in Intelligence',
1615                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1616                 'duration': 965,
1617                 'upload_date': '20140124',
1618                 'uploader': 'New Century Foundation',
1619                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1620                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1621             },
1622             'params': {
1623                 'skip_download': True,
1624             },
1625             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1626         },
1627         {
1628             # itag 212
1629             'url': '1t24XAntNCY',
1630             'only_matching': True,
1631         },
1632         {
1633             # geo restricted to JP
1634             'url': 'sJL6WA-aGkQ',
1635             'only_matching': True,
1636         },
1637         {
1638             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1639             'only_matching': True,
1640         },
1641         {
1642             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1643             'only_matching': True,
1644         },
1645         {
1646             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1647             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1648             'only_matching': True,
1649         },
1650         {
1651             # DRM protected
1652             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1653             'only_matching': True,
1654         },
1655         {
1656             # Video with unsupported adaptive stream type formats
1657             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1658             'info_dict': {
1659                 'id': 'Z4Vy8R84T1U',
1660                 'ext': 'mp4',
1661                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1662                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1663                 'duration': 433,
1664                 'upload_date': '20130923',
1665                 'uploader': 'Amelia Putri Harwita',
1666                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1667                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1668                 'formats': 'maxcount:10',
1669             },
1670             'params': {
1671                 'skip_download': True,
1672                 'youtube_include_dash_manifest': False,
1673             },
1674             'skip': 'not actual anymore',
1675         },
1676         {
1677             # Youtube Music Auto-generated description
1678             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1679             'info_dict': {
1680                 'id': 'MgNrAu2pzNs',
1681                 'ext': 'mp4',
1682                 'title': 'Voyeur Girl',
1683                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1684                 'upload_date': '20190312',
1685                 'uploader': 'Stephen - Topic',
1686                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1687                 'artist': 'Stephen',
1688                 'track': 'Voyeur Girl',
1689                 'album': 'it\'s too much love to know my dear',
1690                 'release_date': '20190313',
1691                 'release_year': 2019,
1692             },
1693             'params': {
1694                 'skip_download': True,
1695             },
1696         },
1697         {
1698             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1699             'only_matching': True,
1700         },
1701         {
1702             # invalid -> valid video id redirection
1703             'url': 'DJztXj2GPfl',
1704             'info_dict': {
1705                 'id': 'DJztXj2GPfk',
1706                 'ext': 'mp4',
1707                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1708                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1709                 'upload_date': '20090125',
1710                 'uploader': 'Prochorowka',
1711                 'uploader_id': 'Prochorowka',
1712                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1713                 'artist': 'Panjabi MC',
1714                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1715                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1716             },
1717             'params': {
1718                 'skip_download': True,
1719             },
1720             'skip': 'Video unavailable',
1721         },
1722         {
1723             # empty description results in an empty string
1724             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1725             'info_dict': {
1726                 'id': 'x41yOUIvK2k',
1727                 'ext': 'mp4',
1728                 'title': 'IMG 3456',
1729                 'description': '',
1730                 'upload_date': '20170613',
1731                 'uploader_id': 'ElevageOrVert',
1732                 'uploader': 'ElevageOrVert',
1733             },
1734             'params': {
1735                 'skip_download': True,
1736             },
1737         },
1738         {
1739             # with '};' inside yt initial data (see [1])
1740             # see [2] for an example with '};' inside ytInitialPlayerResponse
1741             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1742             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1743             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1744             'info_dict': {
1745                 'id': 'CHqg6qOn4no',
1746                 'ext': 'mp4',
1747                 'title': 'Part 77   Sort a list of simple types in c#',
1748                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1749                 'upload_date': '20130831',
1750                 'uploader_id': 'kudvenkat',
1751                 'uploader': 'kudvenkat',
1752             },
1753             'params': {
1754                 'skip_download': True,
1755             },
1756         },
1757         {
1758             # another example of '};' in ytInitialData
1759             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1760             'only_matching': True,
1761         },
1762         {
1763             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1764             'only_matching': True,
1765         },
1766         {
1767             # https://github.com/ytdl-org/youtube-dl/pull/28094
1768             'url': 'OtqTfy26tG0',
1769             'info_dict': {
1770                 'id': 'OtqTfy26tG0',
1771                 'ext': 'mp4',
1772                 'title': 'Burn Out',
1773                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1774                 'upload_date': '20141120',
1775                 'uploader': 'The Cinematic Orchestra - Topic',
1776                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1777                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1778                 'artist': 'The Cinematic Orchestra',
1779                 'track': 'Burn Out',
1780                 'album': 'Every Day',
1781                 'release_data': None,
1782                 'release_year': None,
1783             },
1784             'params': {
1785                 'skip_download': True,
1786             },
1787         },
1788         {
1789             # controversial video, only works with bpctr when authenticated with cookies
1790             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1791             'only_matching': True,
1792         },
1793         {
1794             # controversial video, requires bpctr/contentCheckOk
1795             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1796             'info_dict': {
1797                 'id': 'SZJvDhaSDnc',
1798                 'ext': 'mp4',
1799                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1800                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1801                 'uploader': 'CBS This Morning',
1802                 'uploader_id': 'CBSThisMorning',
1803                 'upload_date': '20140716',
1804                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1805             }
1806         },
1807         {
1808             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1809             'url': 'cBvYw8_A0vQ',
1810             'info_dict': {
1811                 'id': 'cBvYw8_A0vQ',
1812                 'ext': 'mp4',
1813                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1814                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1815                 'upload_date': '20201120',
1816                 'uploader': 'Walk around Japan',
1817                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1818                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1819             },
1820             'params': {
1821                 'skip_download': True,
1822             },
1823         }, {
1824             # Has multiple audio streams
1825             'url': 'WaOKSUlf4TM',
1826             'only_matching': True
1827         }, {
1828             # Requires Premium: has format 141 when requested using YTM url
1829             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1830             'only_matching': True
1831         }, {
1832             # multiple subtitles with same lang_code
1833             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1834             'only_matching': True,
1835         }, {
1836             # Force use android client fallback
1837             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1838             'info_dict': {
1839                 'id': 'YOelRv7fMxY',
1840                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1841                 'ext': '3gp',
1842                 'upload_date': '20210624',
1843                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1844                 'uploader': 'colinfurze',
1845                 'uploader_id': 'colinfurze',
1846                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1847                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1848             },
1849             'params': {
1850                 'format': '17',  # 3gp format available on android
1851                 'extractor_args': {'youtube': {'player_client': ['android']}},
1852             },
1853         },
1854         {
1855             # Skip download of additional client configs (remix client config in this case)
1856             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1857             'only_matching': True,
1858             'params': {
1859                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1860             },
1861         }, {
1862             # shorts
1863             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1864             'only_matching': True,
1865         },
1866     ]
1867
1868     @classmethod
1869     def suitable(cls, url):
1870         from ..utils import parse_qs
1871
1872         qs = parse_qs(url)
1873         if qs.get('list', [None])[0]:
1874             return False
1875         return super(YoutubeIE, cls).suitable(url)
1876
1877     def __init__(self, *args, **kwargs):
1878         super(YoutubeIE, self).__init__(*args, **kwargs)
1879         self._code_cache = {}
1880         self._player_cache = {}
1881
1882     def _extract_player_url(self, *ytcfgs, webpage=None):
1883         player_url = traverse_obj(
1884             ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1885             get_all=False, expected_type=compat_str)
1886         if not player_url:
1887             return
1888         if player_url.startswith('//'):
1889             player_url = 'https:' + player_url
1890         elif not re.match(r'https?://', player_url):
1891             player_url = compat_urlparse.urljoin(
1892                 'https://www.youtube.com', player_url)
1893         return player_url
1894
1895     def _download_player_url(self, video_id, fatal=False):
1896         res = self._download_webpage(
1897             'https://www.youtube.com/iframe_api',
1898             note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1899         if res:
1900             player_version = self._search_regex(
1901                 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1902             if player_version:
1903                 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1904
1905     def _signature_cache_id(self, example_sig):
1906         """ Return a string representation of a signature """
1907         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1908
1909     @classmethod
1910     def _extract_player_info(cls, player_url):
1911         for player_re in cls._PLAYER_INFO_RE:
1912             id_m = re.search(player_re, player_url)
1913             if id_m:
1914                 break
1915         else:
1916             raise ExtractorError('Cannot identify player %r' % player_url)
1917         return id_m.group('id')
1918
1919     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1920         player_id = self._extract_player_info(player_url)
1921         if player_id not in self._code_cache:
1922             code = self._download_webpage(
1923                 player_url, video_id, fatal=fatal,
1924                 note='Downloading player ' + player_id,
1925                 errnote='Download of %s failed' % player_url)
1926             if code:
1927                 self._code_cache[player_id] = code
1928         return player_id in self._code_cache
1929
1930     def _extract_signature_function(self, video_id, player_url, example_sig):
1931         player_id = self._extract_player_info(player_url)
1932
1933         # Read from filesystem cache
1934         func_id = 'js_%s_%s' % (
1935             player_id, self._signature_cache_id(example_sig))
1936         assert os.path.basename(func_id) == func_id
1937
1938         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1939         if cache_spec is not None:
1940             return lambda s: ''.join(s[i] for i in cache_spec)
1941
1942         if self._load_player(video_id, player_url):
1943             code = self._code_cache[player_id]
1944             res = self._parse_sig_js(code)
1945
1946             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1947             cache_res = res(test_string)
1948             cache_spec = [ord(c) for c in cache_res]
1949
1950             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1951             return res
1952
1953     def _print_sig_code(self, func, example_sig):
1954         def gen_sig_code(idxs):
1955             def _genslice(start, end, step):
1956                 starts = '' if start == 0 else str(start)
1957                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1958                 steps = '' if step == 1 else (':%d' % step)
1959                 return 's[%s%s%s]' % (starts, ends, steps)
1960
1961             step = None
1962             # Quelch pyflakes warnings - start will be set when step is set
1963             start = '(Never used)'
1964             for i, prev in zip(idxs[1:], idxs[:-1]):
1965                 if step is not None:
1966                     if i - prev == step:
1967                         continue
1968                     yield _genslice(start, prev, step)
1969                     step = None
1970                     continue
1971                 if i - prev in [-1, 1]:
1972                     step = i - prev
1973                     start = prev
1974                     continue
1975                 else:
1976                     yield 's[%d]' % prev
1977             if step is None:
1978                 yield 's[%d]' % i
1979             else:
1980                 yield _genslice(start, i, step)
1981
1982         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1983         cache_res = func(test_string)
1984         cache_spec = [ord(c) for c in cache_res]
1985         expr_code = ' + '.join(gen_sig_code(cache_spec))
1986         signature_id_tuple = '(%s)' % (
1987             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1988         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1989                 '    return %s\n') % (signature_id_tuple, expr_code)
1990         self.to_screen('Extracted signature function:\n' + code)
1991
1992     def _parse_sig_js(self, jscode):
1993         funcname = self._search_regex(
1994             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1995              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1996              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1997              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1998              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1999              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2000              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
2001              # Obsolete patterns
2002              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2003              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
2004              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2005              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2006              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2007              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2008              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2009              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2010             jscode, 'Initial JS player signature function name', group='sig')
2011
2012         jsi = JSInterpreter(jscode)
2013         initial_function = jsi.extract_function(funcname)
2014         return lambda s: initial_function([s])
2015
2016     def _decrypt_signature(self, s, video_id, player_url):
2017         """Turn the encrypted s field into a working signature"""
2018
2019         if player_url is None:
2020             raise ExtractorError('Cannot decrypt signature without player_url')
2021
2022         try:
2023             player_id = (player_url, self._signature_cache_id(s))
2024             if player_id not in self._player_cache:
2025                 func = self._extract_signature_function(
2026                     video_id, player_url, s
2027                 )
2028                 self._player_cache[player_id] = func
2029             func = self._player_cache[player_id]
2030             if self.get_param('youtube_print_sig_code'):
2031                 self._print_sig_code(func, s)
2032             return func(s)
2033         except Exception as e:
2034             tb = traceback.format_exc()
2035             raise ExtractorError(
2036                 'Signature extraction failed: ' + tb, cause=e)
2037
2038     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2039         """
2040         Extract signatureTimestamp (sts)
2041         Required to tell API what sig/player version is in use.
2042         """
2043         sts = None
2044         if isinstance(ytcfg, dict):
2045             sts = int_or_none(ytcfg.get('STS'))
2046
2047         if not sts:
2048             # Attempt to extract from player
2049             if player_url is None:
2050                 error_msg = 'Cannot extract signature timestamp without player_url.'
2051                 if fatal:
2052                     raise ExtractorError(error_msg)
2053                 self.report_warning(error_msg)
2054                 return
2055             if self._load_player(video_id, player_url, fatal=fatal):
2056                 player_id = self._extract_player_info(player_url)
2057                 code = self._code_cache[player_id]
2058                 sts = int_or_none(self._search_regex(
2059                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2060                     'JS player signature timestamp', group='sts', fatal=fatal))
2061         return sts
2062
2063     def _mark_watched(self, video_id, player_responses):
2064         playback_url = traverse_obj(
2065             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2066             expected_type=url_or_none, get_all=False)
2067         if not playback_url:
2068             self.report_warning('Unable to mark watched')
2069             return
2070         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2071         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2072
2073         # cpn generation algorithm is reverse engineered from base.js.
2074         # In fact it works even with dummy cpn.
2075         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2076         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2077
2078         qs.update({
2079             'ver': ['2'],
2080             'cpn': [cpn],
2081         })
2082         playback_url = compat_urlparse.urlunparse(
2083             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2084
2085         self._download_webpage(
2086             playback_url, video_id, 'Marking watched',
2087             'Unable to mark watched', fatal=False)
2088
2089     @staticmethod
2090     def _extract_urls(webpage):
2091         # Embedded YouTube player
2092         entries = [
2093             unescapeHTML(mobj.group('url'))
2094             for mobj in re.finditer(r'''(?x)
2095             (?:
2096                 <iframe[^>]+?src=|
2097                 data-video-url=|
2098                 <embed[^>]+?src=|
2099                 embedSWF\(?:\s*|
2100                 <object[^>]+data=|
2101                 new\s+SWFObject\(
2102             )
2103             (["\'])
2104                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2105                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2106             \1''', webpage)]
2107
2108         # lazyYT YouTube embed
2109         entries.extend(list(map(
2110             unescapeHTML,
2111             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2112
2113         # Wordpress "YouTube Video Importer" plugin
2114         matches = re.findall(r'''(?x)<div[^>]+
2115             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2116             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2117         entries.extend(m[-1] for m in matches)
2118
2119         return entries
2120
2121     @staticmethod
2122     def _extract_url(webpage):
2123         urls = YoutubeIE._extract_urls(webpage)
2124         return urls[0] if urls else None
2125
2126     @classmethod
2127     def extract_id(cls, url):
2128         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2129         if mobj is None:
2130             raise ExtractorError('Invalid URL: %s' % url)
2131         return mobj.group('id')
2132
2133     def _extract_chapters_from_json(self, data, duration):
2134         chapter_list = traverse_obj(
2135             data, (
2136                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2137                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2138             ), expected_type=list)
2139
2140         return self._extract_chapters(
2141             chapter_list,
2142             chapter_time=lambda chapter: float_or_none(
2143                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2144             chapter_title=lambda chapter: traverse_obj(
2145                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2146             duration=duration)
2147
2148     def _extract_chapters_from_engagement_panel(self, data, duration):
2149         content_list = traverse_obj(
2150             data,
2151             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2152             expected_type=list, default=[])
2153         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2154         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2155
2156         return next((
2157             filter(None, (
2158                 self._extract_chapters(
2159                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2160                     chapter_time, chapter_title, duration)
2161                 for contents in content_list
2162             ))), [])
2163
2164     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2165         chapters = []
2166         last_chapter = {'start_time': 0}
2167         for idx, chapter in enumerate(chapter_list or []):
2168             title = chapter_title(chapter)
2169             start_time = chapter_time(chapter)
2170             if start_time is None:
2171                 continue
2172             last_chapter['end_time'] = start_time
2173             if start_time < last_chapter['start_time']:
2174                 if idx == 1:
2175                     chapters.pop()
2176                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2177                 else:
2178                     self.report_warning(f'Invalid start time for chapter "{title}"')
2179                     continue
2180             last_chapter = {'start_time': start_time, 'title': title}
2181             chapters.append(last_chapter)
2182         last_chapter['end_time'] = duration
2183         return chapters
2184
2185     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2186         return self._parse_json(self._search_regex(
2187             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2188              regex), webpage, name, default='{}'), video_id, fatal=False)
2189
2190     @staticmethod
2191     def parse_time_text(time_text):
2192         """
2193         Parse the comment time text
2194         time_text is in the format 'X units ago (edited)'
2195         """
2196         time_text_split = time_text.split(' ')
2197         if len(time_text_split) >= 3:
2198             try:
2199                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2200             except ValueError:
2201                 return None
2202
2203     def _extract_comment(self, comment_renderer, parent=None):
2204         comment_id = comment_renderer.get('commentId')
2205         if not comment_id:
2206             return
2207
2208         text = self._get_text(comment_renderer, 'contentText')
2209
2210         # note: timestamp is an estimate calculated from the current time and time_text
2211         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2212         time_text_dt = self.parse_time_text(time_text)
2213         if isinstance(time_text_dt, datetime.datetime):
2214             timestamp = calendar.timegm(time_text_dt.timetuple())
2215         author = self._get_text(comment_renderer, 'authorText')
2216         author_id = try_get(comment_renderer,
2217                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2218
2219         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2220                                                        lambda x: x['likeCount']), compat_str)) or 0
2221         author_thumbnail = try_get(comment_renderer,
2222                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2223
2224         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2225         is_favorited = 'creatorHeart' in (try_get(
2226             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2227         return {
2228             'id': comment_id,
2229             'text': text,
2230             'timestamp': timestamp,
2231             'time_text': time_text,
2232             'like_count': votes,
2233             'is_favorited': is_favorited,
2234             'author': author,
2235             'author_id': author_id,
2236             'author_thumbnail': author_thumbnail,
2237             'author_is_uploader': author_is_uploader,
2238             'parent': parent or 'root'
2239         }
2240
2241     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2242
2243         def extract_header(contents):
2244             _continuation = None
2245             for content in contents:
2246                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2247                 expected_comment_count = parse_count(self._get_text(
2248                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2249
2250                 if expected_comment_count:
2251                     comment_counts[1] = expected_comment_count
2252                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2253                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2254                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2255
2256                 sort_menu_item = try_get(
2257                     comments_header_renderer,
2258                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2259                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2260
2261                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2262                 if not _continuation:
2263                     continue
2264
2265                 sort_text = sort_menu_item.get('title')
2266                 if isinstance(sort_text, compat_str):
2267                     sort_text = sort_text.lower()
2268                 else:
2269                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2270                 self.to_screen('Sorting comments by %s' % sort_text)
2271                 break
2272             return _continuation
2273
2274         def extract_thread(contents):
2275             if not parent:
2276                 comment_counts[2] = 0
2277             for content in contents:
2278                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2279                 comment_renderer = try_get(
2280                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2281                     content, (lambda x: x['commentRenderer'], dict))
2282
2283                 if not comment_renderer:
2284                     continue
2285                 comment = self._extract_comment(comment_renderer, parent)
2286                 if not comment:
2287                     continue
2288                 comment_counts[0] += 1
2289                 yield comment
2290                 # Attempt to get the replies
2291                 comment_replies_renderer = try_get(
2292                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2293
2294                 if comment_replies_renderer:
2295                     comment_counts[2] += 1
2296                     comment_entries_iter = self._comment_entries(
2297                         comment_replies_renderer, ytcfg, video_id,
2298                         parent=comment.get('id'), comment_counts=comment_counts)
2299
2300                     for reply_comment in comment_entries_iter:
2301                         yield reply_comment
2302
2303         # YouTube comments have a max depth of 2
2304         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2305         if max_depth == 1 and parent:
2306             return
2307         if not comment_counts:
2308             # comment so far, est. total comments, current comment thread #
2309             comment_counts = [0, 0, 0]
2310
2311         continuation = self._extract_continuation(root_continuation_data)
2312         if continuation and len(continuation['continuation']) < 27:
2313             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2314             continuation_token = self._generate_comment_continuation(video_id)
2315             continuation = self._build_api_continuation_query(continuation_token, None)
2316
2317         visitor_data = None
2318         is_first_continuation = parent is None
2319
2320         for page_num in itertools.count(0):
2321             if not continuation:
2322                 break
2323             headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2324             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2325             if page_num == 0:
2326                 if is_first_continuation:
2327                     note_prefix = 'Downloading comment section API JSON'
2328                 else:
2329                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2330                         comment_counts[2], comment_prog_str)
2331             else:
2332                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2333                     '       ' if parent else '', ' replies' if parent else '',
2334                     page_num, comment_prog_str)
2335
2336             response = self._extract_response(
2337                 item_id=None, query=continuation,
2338                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2339                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2340             if not response:
2341                 break
2342             visitor_data = try_get(
2343                 response,
2344                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2345                 compat_str) or visitor_data
2346
2347             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2348
2349             continuation = None
2350             if isinstance(continuation_contents, list):
2351                 for continuation_section in continuation_contents:
2352                     if not isinstance(continuation_section, dict):
2353                         continue
2354                     continuation_items = try_get(
2355                         continuation_section,
2356                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2357                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2358                         list) or []
2359                     if is_first_continuation:
2360                         continuation = extract_header(continuation_items)
2361                         is_first_continuation = False
2362                         if continuation:
2363                             break
2364                         continue
2365                     count = 0
2366                     for count, entry in enumerate(extract_thread(continuation_items)):
2367                         yield entry
2368                     continuation = self._extract_continuation({'contents': continuation_items})
2369                     if continuation:
2370                         # Sometimes YouTube provides a continuation without any comments
2371                         # In most cases we end up just downloading these with very little comments to come.
2372                         if count == 0:
2373                             if not parent:
2374                                 self.report_warning('No comments received - assuming end of comments')
2375                             continuation = None
2376                         break
2377
2378             # Deprecated response structure
2379             elif isinstance(continuation_contents, dict):
2380                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2381                 for key, continuation_renderer in continuation_contents.items():
2382                     if key not in known_continuation_renderers:
2383                         continue
2384                     if not isinstance(continuation_renderer, dict):
2385                         continue
2386                     if is_first_continuation:
2387                         header_continuation_items = [continuation_renderer.get('header') or {}]
2388                         continuation = extract_header(header_continuation_items)
2389                         is_first_continuation = False
2390                         if continuation:
2391                             break
2392
2393                     # Sometimes YouTube provides a continuation without any comments
2394                     # In most cases we end up just downloading these with very little comments to come.
2395                     count = 0
2396                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2397                         yield entry
2398                     continuation = self._extract_continuation(continuation_renderer)
2399                     if count == 0:
2400                         if not parent:
2401                             self.report_warning('No comments received - assuming end of comments')
2402                         continuation = None
2403                     break
2404
2405     @staticmethod
2406     def _generate_comment_continuation(video_id):
2407         """
2408         Generates initial comment section continuation token from given video id
2409         """
2410         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2411         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2412         new_continuation_intlist = list(itertools.chain.from_iterable(
2413             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2414         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2415
2416     def _get_comments(self, ytcfg, video_id, contents, webpage):
2417         """Entry for comment extraction"""
2418         def _real_comment_extract(contents):
2419             yield from self._comment_entries(
2420                 traverse_obj(contents, (..., 'itemSectionRenderer'), get_all=False), ytcfg, video_id)
2421
2422         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2423         # Force English regardless of account setting to prevent parsing issues
2424         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2425         ytcfg = copy.deepcopy(ytcfg)
2426         traverse_obj(
2427             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2428         return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2429
2430     @staticmethod
2431     def _get_checkok_params():
2432         return {'contentCheckOk': True, 'racyCheckOk': True}
2433
2434     @classmethod
2435     def _generate_player_context(cls, sts=None):
2436         context = {
2437             'html5Preference': 'HTML5_PREF_WANTS',
2438         }
2439         if sts is not None:
2440             context['signatureTimestamp'] = sts
2441         return {
2442             'playbackContext': {
2443                 'contentPlaybackContext': context
2444             },
2445             **cls._get_checkok_params()
2446         }
2447
2448     @staticmethod
2449     def _is_agegated(player_response):
2450         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2451             return True
2452
2453         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2454         AGE_GATE_REASONS = (
2455             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2456             'age_verification_required', 'age_check_required',  # status
2457         )
2458         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2459
2460     @staticmethod
2461     def _is_unplayable(player_response):
2462         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2463
2464     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2465
2466         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2467         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2468         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2469         headers = self.generate_api_headers(
2470             ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2471
2472         yt_query = {'videoId': video_id}
2473         yt_query.update(self._generate_player_context(sts))
2474         return self._extract_response(
2475             item_id=video_id, ep='player', query=yt_query,
2476             ytcfg=player_ytcfg, headers=headers, fatal=True,
2477             default_client=client,
2478             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2479         ) or None
2480
2481     def _get_requested_clients(self, url, smuggled_data):
2482         requested_clients = []
2483         allowed_clients = sorted(
2484             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2485             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2486         for client in self._configuration_arg('player_client'):
2487             if client in allowed_clients:
2488                 requested_clients.append(client)
2489             elif client == 'all':
2490                 requested_clients.extend(allowed_clients)
2491             else:
2492                 self.report_warning(f'Skipping unsupported client {client}')
2493         if not requested_clients:
2494             requested_clients = ['android', 'web']
2495
2496         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2497             requested_clients.extend(
2498                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2499
2500         return orderedSet(requested_clients)
2501
2502     def _extract_player_ytcfg(self, client, video_id):
2503         url = {
2504             'web_music': 'https://music.youtube.com',
2505             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2506         }.get(client)
2507         if not url:
2508             return {}
2509         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2510         return self.extract_ytcfg(video_id, webpage) or {}
2511
2512     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2513         initial_pr = None
2514         if webpage:
2515             initial_pr = self._extract_yt_initial_variable(
2516                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2517                 video_id, 'initial player response')
2518
2519         original_clients = clients
2520         clients = clients[::-1]
2521         prs = []
2522
2523         def append_client(client_name):
2524             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2525                 clients.append(client_name)
2526
2527         # Android player_response does not have microFormats which are needed for
2528         # extraction of some data. So we return the initial_pr with formats
2529         # stripped out even if not requested by the user
2530         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2531         if initial_pr:
2532             pr = dict(initial_pr)
2533             pr['streamingData'] = None
2534             prs.append(pr)
2535
2536         last_error = None
2537         tried_iframe_fallback = False
2538         player_url = None
2539         while clients:
2540             client = clients.pop()
2541             player_ytcfg = master_ytcfg if client == 'web' else {}
2542             if 'configs' not in self._configuration_arg('player_skip'):
2543                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2544
2545             player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2546             require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2547             if 'js' in self._configuration_arg('player_skip'):
2548                 require_js_player = False
2549                 player_url = None
2550
2551             if not player_url and not tried_iframe_fallback and require_js_player:
2552                 player_url = self._download_player_url(video_id)
2553                 tried_iframe_fallback = True
2554
2555             try:
2556                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2557                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2558             except ExtractorError as e:
2559                 if last_error:
2560                     self.report_warning(last_error)
2561                 last_error = e
2562                 continue
2563
2564             if pr:
2565                 prs.append(pr)
2566
2567             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2568             if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2569                 append_client(client.replace('_agegate', '_creator'))
2570             elif self._is_agegated(pr):
2571                 append_client(f'{client}_agegate')
2572
2573         if last_error:
2574             if not len(prs):
2575                 raise last_error
2576             self.report_warning(last_error)
2577         return prs, player_url
2578
2579     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2580         itags, stream_ids = [], []
2581         itag_qualities, res_qualities = {}, {}
2582         q = qualities([
2583             # Normally tiny is the smallest video-only formats. But
2584             # audio-only formats with unknown quality may get tagged as tiny
2585             'tiny',
2586             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2587             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2588         ])
2589         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2590
2591         for fmt in streaming_formats:
2592             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2593                 continue
2594
2595             itag = str_or_none(fmt.get('itag'))
2596             audio_track = fmt.get('audioTrack') or {}
2597             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2598             if stream_id in stream_ids:
2599                 continue
2600
2601             quality = fmt.get('quality')
2602             height = int_or_none(fmt.get('height'))
2603             if quality == 'tiny' or not quality:
2604                 quality = fmt.get('audioQuality', '').lower() or quality
2605             # The 3gp format (17) in android client has a quality of "small",
2606             # but is actually worse than other formats
2607             if itag == '17':
2608                 quality = 'tiny'
2609             if quality:
2610                 if itag:
2611                     itag_qualities[itag] = quality
2612                 if height:
2613                     res_qualities[height] = quality
2614             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2615             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2616             # number of fragment that would subsequently requested with (`&sq=N`)
2617             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2618                 continue
2619
2620             fmt_url = fmt.get('url')
2621             if not fmt_url:
2622                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2623                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2624                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2625                 if not (sc and fmt_url and encrypted_sig):
2626                     continue
2627                 if not player_url:
2628                     continue
2629                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2630                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2631                 fmt_url += '&' + sp + '=' + signature
2632
2633             if itag:
2634                 itags.append(itag)
2635                 stream_ids.append(stream_id)
2636
2637             tbr = float_or_none(
2638                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2639             dct = {
2640                 'asr': int_or_none(fmt.get('audioSampleRate')),
2641                 'filesize': int_or_none(fmt.get('contentLength')),
2642                 'format_id': itag,
2643                 'format_note': ', '.join(filter(None, (
2644                     '%s%s' % (audio_track.get('displayName') or '',
2645                               ' (default)' if audio_track.get('audioIsDefault') else ''),
2646                     fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2647                 'fps': int_or_none(fmt.get('fps')),
2648                 'height': height,
2649                 'quality': q(quality),
2650                 'tbr': tbr,
2651                 'url': fmt_url,
2652                 'width': int_or_none(fmt.get('width')),
2653                 'language': audio_track.get('id', '').split('.')[0],
2654                 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2655             }
2656             mime_mobj = re.match(
2657                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2658             if mime_mobj:
2659                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2660                 dct.update(parse_codecs(mime_mobj.group(2)))
2661             no_audio = dct.get('acodec') == 'none'
2662             no_video = dct.get('vcodec') == 'none'
2663             if no_audio:
2664                 dct['vbr'] = tbr
2665             if no_video:
2666                 dct['abr'] = tbr
2667             if no_audio or no_video:
2668                 dct['downloader_options'] = {
2669                     # Youtube throttles chunks >~10M
2670                     'http_chunk_size': 10485760,
2671                 }
2672                 if dct.get('ext'):
2673                     dct['container'] = dct['ext'] + '_dash'
2674             yield dct
2675
2676         skip_manifests = self._configuration_arg('skip')
2677         get_dash = (
2678             (not is_live or self._configuration_arg('include_live_dash'))
2679             and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2680         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2681
2682         def guess_quality(f):
2683             for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2684                 if val in qdict:
2685                     return q(qdict[val])
2686             return -1
2687
2688         for sd in streaming_data:
2689             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2690             if hls_manifest_url:
2691                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2692                     itag = self._search_regex(
2693                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2694                     if itag in itags:
2695                         continue
2696                     if itag:
2697                         f['format_id'] = itag
2698                         itags.append(itag)
2699                     f['quality'] = guess_quality(f)
2700                     yield f
2701
2702             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2703             if dash_manifest_url:
2704                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2705                     itag = f['format_id']
2706                     if itag in itags:
2707                         continue
2708                     if itag:
2709                         itags.append(itag)
2710                     f['quality'] = guess_quality(f)
2711                     filesize = int_or_none(self._search_regex(
2712                         r'/clen/(\d+)', f.get('fragment_base_url')
2713                         or f['url'], 'file size', default=None))
2714                     if filesize:
2715                         f['filesize'] = filesize
2716                     yield f
2717
2718     def _real_extract(self, url):
2719         url, smuggled_data = unsmuggle_url(url, {})
2720         video_id = self._match_id(url)
2721
2722         base_url = self.http_scheme() + '//www.youtube.com/'
2723         webpage_url = base_url + 'watch?v=' + video_id
2724         webpage = None
2725         if 'webpage' not in self._configuration_arg('player_skip'):
2726             webpage = self._download_webpage(
2727                 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2728
2729         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2730
2731         player_responses, player_url = self._extract_player_responses(
2732             self._get_requested_clients(url, smuggled_data),
2733             video_id, webpage, master_ytcfg)
2734
2735         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2736
2737         playability_statuses = traverse_obj(
2738             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2739
2740         trailer_video_id = get_first(
2741             playability_statuses,
2742             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2743             expected_type=str)
2744         if trailer_video_id:
2745             return self.url_result(
2746                 trailer_video_id, self.ie_key(), trailer_video_id)
2747
2748         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2749                        if webpage else (lambda x: None))
2750
2751         video_details = traverse_obj(
2752             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2753         microformats = traverse_obj(
2754             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2755             expected_type=dict, default=[])
2756         video_title = (
2757             get_first(video_details, 'title')
2758             or self._get_text(microformats, (..., 'title'))
2759             or search_meta(['og:title', 'twitter:title', 'title']))
2760         video_description = get_first(video_details, 'shortDescription')
2761
2762         if not smuggled_data.get('force_singlefeed', False):
2763             if not self.get_param('noplaylist'):
2764                 multifeed_metadata_list = get_first(
2765                     player_responses,
2766                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2767                     expected_type=str)
2768                 if multifeed_metadata_list:
2769                     entries = []
2770                     feed_ids = []
2771                     for feed in multifeed_metadata_list.split(','):
2772                         # Unquote should take place before split on comma (,) since textual
2773                         # fields may contain comma as well (see
2774                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2775                         feed_data = compat_parse_qs(
2776                             compat_urllib_parse_unquote_plus(feed))
2777
2778                         def feed_entry(name):
2779                             return try_get(
2780                                 feed_data, lambda x: x[name][0], compat_str)
2781
2782                         feed_id = feed_entry('id')
2783                         if not feed_id:
2784                             continue
2785                         feed_title = feed_entry('title')
2786                         title = video_title
2787                         if feed_title:
2788                             title += ' (%s)' % feed_title
2789                         entries.append({
2790                             '_type': 'url_transparent',
2791                             'ie_key': 'Youtube',
2792                             'url': smuggle_url(
2793                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2794                                 {'force_singlefeed': True}),
2795                             'title': title,
2796                         })
2797                         feed_ids.append(feed_id)
2798                     self.to_screen(
2799                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2800                         % (', '.join(feed_ids), video_id))
2801                     return self.playlist_result(
2802                         entries, video_id, video_title, video_description)
2803             else:
2804                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2805
2806         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2807         is_live = get_first(video_details, 'isLive')
2808         if is_live is None:
2809             is_live = get_first(live_broadcast_details, 'isLiveNow')
2810
2811         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2812         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2813
2814         if not formats:
2815             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2816                 self.report_drm(video_id)
2817             pemr = get_first(
2818                 playability_statuses,
2819                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2820             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2821             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2822             if subreason:
2823                 if subreason == 'The uploader has not made this video available in your country.':
2824                     countries = get_first(microformats, 'availableCountries')
2825                     if not countries:
2826                         regions_allowed = search_meta('regionsAllowed')
2827                         countries = regions_allowed.split(',') if regions_allowed else None
2828                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2829                 reason += f'. {subreason}'
2830             if reason:
2831                 self.raise_no_formats(reason, expected=True)
2832
2833         for f in formats:
2834             if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']:  # throttled
2835                 f['source_preference'] = -10
2836                 # TODO: this method is not reliable
2837                 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2838
2839         # Source is given priority since formats that throttle are given lower source_preference
2840         # When throttling issue is fully fixed, remove this
2841         self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
2842
2843         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2844         if not keywords and webpage:
2845             keywords = [
2846                 unescapeHTML(m.group('content'))
2847                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2848         for keyword in keywords:
2849             if keyword.startswith('yt:stretch='):
2850                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2851                 if mobj:
2852                     # NB: float is intentional for forcing float division
2853                     w, h = (float(v) for v in mobj.groups())
2854                     if w > 0 and h > 0:
2855                         ratio = w / h
2856                         for f in formats:
2857                             if f.get('vcodec') != 'none':
2858                                 f['stretched_ratio'] = ratio
2859                         break
2860
2861         thumbnails = []
2862         thumbnail_dicts = traverse_obj(
2863             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2864             expected_type=dict, default=[])
2865         for thumbnail in thumbnail_dicts:
2866             thumbnail_url = thumbnail.get('url')
2867             if not thumbnail_url:
2868                 continue
2869             # Sometimes youtube gives a wrong thumbnail URL. See:
2870             # https://github.com/yt-dlp/yt-dlp/issues/233
2871             # https://github.com/ytdl-org/youtube-dl/issues/28023
2872             if 'maxresdefault' in thumbnail_url:
2873                 thumbnail_url = thumbnail_url.split('?')[0]
2874             thumbnails.append({
2875                 'url': thumbnail_url,
2876                 'height': int_or_none(thumbnail.get('height')),
2877                 'width': int_or_none(thumbnail.get('width')),
2878             })
2879         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2880         if thumbnail_url:
2881             thumbnails.append({
2882                 'url': thumbnail_url,
2883             })
2884         # The best resolution thumbnails sometimes does not appear in the webpage
2885         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2886         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2887         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2888         # TODO: Test them also? - For some videos, even these don't exist
2889         guaranteed_thumbnail_names = [
2890             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2891             'mqdefault', 'mq1', 'mq2', 'mq3',
2892             'default', '1', '2', '3'
2893         ]
2894         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2895         n_thumbnail_names = len(thumbnail_names)
2896
2897         thumbnails.extend({
2898             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2899                 video_id=video_id, name=name, ext=ext,
2900                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2901             '_test_url': name in hq_thumbnail_names,
2902         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2903         for thumb in thumbnails:
2904             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2905             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2906         self._remove_duplicate_formats(thumbnails)
2907
2908         category = get_first(microformats, 'category') or search_meta('genre')
2909         channel_id = str_or_none(
2910             get_first(video_details, 'channelId')
2911             or get_first(microformats, 'externalChannelId')
2912             or search_meta('channelId'))
2913         duration = int_or_none(
2914             get_first(video_details, 'lengthSeconds')
2915             or get_first(microformats, 'lengthSeconds')
2916             or parse_duration(search_meta('duration'))) or None
2917         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2918
2919         live_content = get_first(video_details, 'isLiveContent')
2920         is_upcoming = get_first(video_details, 'isUpcoming')
2921         if is_live is None:
2922             if is_upcoming or live_content is False:
2923                 is_live = False
2924         if is_upcoming is None and (live_content or is_live):
2925             is_upcoming = False
2926         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2927         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2928         if not duration and live_endtime and live_starttime:
2929             duration = live_endtime - live_starttime
2930
2931         info = {
2932             'id': video_id,
2933             'title': self._live_title(video_title) if is_live else video_title,
2934             'formats': formats,
2935             'thumbnails': thumbnails,
2936             'description': video_description,
2937             'upload_date': unified_strdate(
2938                 get_first(microformats, 'uploadDate')
2939                 or search_meta('uploadDate')),
2940             'uploader': get_first(video_details, 'author'),
2941             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2942             'uploader_url': owner_profile_url,
2943             'channel_id': channel_id,
2944             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2945             'duration': duration,
2946             'view_count': int_or_none(
2947                 get_first((video_details, microformats), (..., 'viewCount'))
2948                 or search_meta('interactionCount')),
2949             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2950             'age_limit': 18 if (
2951                 get_first(microformats, 'isFamilySafe') is False
2952                 or search_meta('isFamilyFriendly') == 'false'
2953                 or search_meta('og:restrictions:age') == '18+') else 0,
2954             'webpage_url': webpage_url,
2955             'categories': [category] if category else None,
2956             'tags': keywords,
2957             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2958             'is_live': is_live,
2959             'was_live': (False if is_live or is_upcoming or live_content is False
2960                          else None if is_live is None or is_upcoming is None
2961                          else live_content),
2962             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2963             'release_timestamp': live_starttime,
2964         }
2965
2966         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2967         # Converted into dicts to remove duplicates
2968         captions = {
2969             sub.get('baseUrl'): sub
2970             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2971         translation_languages = {
2972             lang.get('languageCode'): lang.get('languageName')
2973             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2974         subtitles = {}
2975         if pctr:
2976             def process_language(container, base_url, lang_code, sub_name, query):
2977                 lang_subs = container.setdefault(lang_code, [])
2978                 for fmt in self._SUBTITLE_FORMATS:
2979                     query.update({
2980                         'fmt': fmt,
2981                     })
2982                     lang_subs.append({
2983                         'ext': fmt,
2984                         'url': update_url_query(base_url, query),
2985                         'name': sub_name,
2986                     })
2987
2988             for base_url, caption_track in captions.items():
2989                 if not base_url:
2990                     continue
2991                 if caption_track.get('kind') != 'asr':
2992                     lang_code = (
2993                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2994                         or caption_track.get('languageCode'))
2995                     if not lang_code:
2996                         continue
2997                     process_language(
2998                         subtitles, base_url, lang_code,
2999                         traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False),
3000                         {})
3001                     continue
3002                 automatic_captions = {}
3003                 for trans_code, trans_name in translation_languages.items():
3004                     if not trans_code:
3005                         continue
3006                     process_language(
3007                         automatic_captions, base_url, trans_code,
3008                         self._get_text(trans_name, max_runs=1),
3009                         {'tlang': trans_code})
3010                 info['automatic_captions'] = automatic_captions
3011         info['subtitles'] = subtitles
3012
3013         parsed_url = compat_urllib_parse_urlparse(url)
3014         for component in [parsed_url.fragment, parsed_url.query]:
3015             query = compat_parse_qs(component)
3016             for k, v in query.items():
3017                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3018                     d_k += '_time'
3019                     if d_k not in info and k in s_ks:
3020                         info[d_k] = parse_duration(query[k][0])
3021
3022         # Youtube Music Auto-generated description
3023         if video_description:
3024             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3025             if mobj:
3026                 release_year = mobj.group('release_year')
3027                 release_date = mobj.group('release_date')
3028                 if release_date:
3029                     release_date = release_date.replace('-', '')
3030                     if not release_year:
3031                         release_year = release_date[:4]
3032                 info.update({
3033                     'album': mobj.group('album'.strip()),
3034                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3035                     'track': mobj.group('track').strip(),
3036                     'release_date': release_date,
3037                     'release_year': int_or_none(release_year),
3038                 })
3039
3040         initial_data = None
3041         if webpage:
3042             initial_data = self._extract_yt_initial_variable(
3043                 webpage, self._YT_INITIAL_DATA_RE, video_id,
3044                 'yt initial data')
3045         if not initial_data:
3046             query = {'videoId': video_id}
3047             query.update(self._get_checkok_params())
3048             initial_data = self._extract_response(
3049                 item_id=video_id, ep='next', fatal=False,
3050                 ytcfg=master_ytcfg, query=query,
3051                 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3052                 note='Downloading initial data API JSON')
3053
3054         try:
3055             # This will error if there is no livechat
3056             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3057             info['subtitles']['live_chat'] = [{
3058                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3059                 'video_id': video_id,
3060                 'ext': 'json',
3061                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3062             }]
3063         except (KeyError, IndexError, TypeError):
3064             pass
3065
3066         if initial_data:
3067             info['chapters'] = (
3068                 self._extract_chapters_from_json(initial_data, duration)
3069                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3070                 or None)
3071
3072             contents = try_get(
3073                 initial_data,
3074                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3075                 list) or []
3076             for content in contents:
3077                 vpir = content.get('videoPrimaryInfoRenderer')
3078                 if vpir:
3079                     stl = vpir.get('superTitleLink')
3080                     if stl:
3081                         stl = self._get_text(stl)
3082                         if try_get(
3083                                 vpir,
3084                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3085                             info['location'] = stl
3086                         else:
3087                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3088                             if mobj:
3089                                 info.update({
3090                                     'series': mobj.group(1),
3091                                     'season_number': int(mobj.group(2)),
3092                                     'episode_number': int(mobj.group(3)),
3093                                 })
3094                     for tlb in (try_get(
3095                             vpir,
3096                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3097                             list) or []):
3098                         tbr = tlb.get('toggleButtonRenderer') or {}
3099                         for getter, regex in [(
3100                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3101                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3102                                     lambda x: x['accessibility'],
3103                                     lambda x: x['accessibilityData']['accessibilityData'],
3104                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3105                             label = (try_get(tbr, getter, dict) or {}).get('label')
3106                             if label:
3107                                 mobj = re.match(regex, label)
3108                                 if mobj:
3109                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3110                                     break
3111                     sbr_tooltip = try_get(
3112                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3113                     if sbr_tooltip:
3114                         like_count, dislike_count = sbr_tooltip.split(' / ')
3115                         info.update({
3116                             'like_count': str_to_int(like_count),
3117                             'dislike_count': str_to_int(dislike_count),
3118                         })
3119                 vsir = content.get('videoSecondaryInfoRenderer')
3120                 if vsir:
3121                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3122                     rows = try_get(
3123                         vsir,
3124                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3125                         list) or []
3126                     multiple_songs = False
3127                     for row in rows:
3128                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3129                             multiple_songs = True
3130                             break
3131                     for row in rows:
3132                         mrr = row.get('metadataRowRenderer') or {}
3133                         mrr_title = mrr.get('title')
3134                         if not mrr_title:
3135                             continue
3136                         mrr_title = self._get_text(mrr, 'title')
3137                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3138                         if mrr_title == 'License':
3139                             info['license'] = mrr_contents_text
3140                         elif not multiple_songs:
3141                             if mrr_title == 'Album':
3142                                 info['album'] = mrr_contents_text
3143                             elif mrr_title == 'Artist':
3144                                 info['artist'] = mrr_contents_text
3145                             elif mrr_title == 'Song':
3146                                 info['track'] = mrr_contents_text
3147
3148         fallbacks = {
3149             'channel': 'uploader',
3150             'channel_id': 'uploader_id',
3151             'channel_url': 'uploader_url',
3152         }
3153         for to, frm in fallbacks.items():
3154             if not info.get(to):
3155                 info[to] = info.get(frm)
3156
3157         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3158             v = info.get(s_k)
3159             if v:
3160                 info[d_k] = v
3161
3162         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3163         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3164         is_membersonly = None
3165         is_premium = None
3166         if initial_data and is_private is not None:
3167             is_membersonly = False
3168             is_premium = False
3169             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3170             badge_labels = set()
3171             for content in contents:
3172                 if not isinstance(content, dict):
3173                     continue
3174                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3175             for badge_label in badge_labels:
3176                 if badge_label.lower() == 'members only':
3177                     is_membersonly = True
3178                 elif badge_label.lower() == 'premium':
3179                     is_premium = True
3180                 elif badge_label.lower() == 'unlisted':
3181                     is_unlisted = True
3182
3183         info['availability'] = self._availability(
3184             is_private=is_private,
3185             needs_premium=is_premium,
3186             needs_subscription=is_membersonly,
3187             needs_auth=info['age_limit'] >= 18,
3188             is_unlisted=None if is_private is None else is_unlisted)
3189
3190         info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3191
3192         self.mark_watched(video_id, player_responses)
3193
3194         return info
3195
3196
3197 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3198     IE_DESC = 'YouTube.com tab'
3199     _VALID_URL = r'''(?x)
3200                     https?://
3201                         (?:\w+\.)?
3202                         (?:
3203                             youtube(?:kids)?\.com|
3204                             invidio\.us
3205                         )/
3206                         (?:
3207                             (?P<channel_type>channel|c|user|browse)/|
3208                             (?P<not_channel>
3209                                 feed/|hashtag/|
3210                                 (?:playlist|watch)\?.*?\blist=
3211                             )|
3212                             (?!(?:%s)\b)  # Direct URLs
3213                         )
3214                         (?P<id>[^/?\#&]+)
3215                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3216     IE_NAME = 'youtube:tab'
3217
3218     _TESTS = [{
3219         'note': 'playlists, multipage',
3220         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3221         'playlist_mincount': 94,
3222         'info_dict': {
3223             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3224             'title': 'Игорь Клейнер - Playlists',
3225             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3226             'uploader': 'Игорь Клейнер',
3227             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3228         },
3229     }, {
3230         'note': 'playlists, multipage, different order',
3231         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3232         'playlist_mincount': 94,
3233         'info_dict': {
3234             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3235             'title': 'Игорь Клейнер - Playlists',
3236             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3237             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3238             'uploader': 'Игорь Клейнер',
3239         },
3240     }, {
3241         'note': 'playlists, series',
3242         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3243         'playlist_mincount': 5,
3244         'info_dict': {
3245             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3246             'title': '3Blue1Brown - Playlists',
3247             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3248             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3249             'uploader': '3Blue1Brown',
3250         },
3251     }, {
3252         'note': 'playlists, singlepage',
3253         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3254         'playlist_mincount': 4,
3255         'info_dict': {
3256             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3257             'title': 'ThirstForScience - Playlists',
3258             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3259             'uploader': 'ThirstForScience',
3260             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3261         }
3262     }, {
3263         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3264         'only_matching': True,
3265     }, {
3266         'note': 'basic, single video playlist',
3267         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3268         'info_dict': {
3269             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3270             'uploader': 'Sergey M.',
3271             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3272             'title': 'youtube-dl public playlist',
3273         },
3274         'playlist_count': 1,
3275     }, {
3276         'note': 'empty playlist',
3277         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3278         'info_dict': {
3279             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3280             'uploader': 'Sergey M.',
3281             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3282             'title': 'youtube-dl empty playlist',
3283         },
3284         'playlist_count': 0,
3285     }, {
3286         'note': 'Home tab',
3287         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3288         'info_dict': {
3289             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3290             'title': 'lex will - Home',
3291             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3292             'uploader': 'lex will',
3293             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3294         },
3295         'playlist_mincount': 2,
3296     }, {
3297         'note': 'Videos tab',
3298         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3299         'info_dict': {
3300             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3301             'title': 'lex will - Videos',
3302             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3303             'uploader': 'lex will',
3304             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3305         },
3306         'playlist_mincount': 975,
3307     }, {
3308         'note': 'Videos tab, sorted by popular',
3309         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3310         'info_dict': {
3311             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3312             'title': 'lex will - Videos',
3313             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3314             'uploader': 'lex will',
3315             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3316         },
3317         'playlist_mincount': 199,
3318     }, {
3319         'note': 'Playlists tab',
3320         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3321         'info_dict': {
3322             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3323             'title': 'lex will - Playlists',
3324             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3325             'uploader': 'lex will',
3326             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3327         },
3328         'playlist_mincount': 17,
3329     }, {
3330         'note': 'Community tab',
3331         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3332         'info_dict': {
3333             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3334             'title': 'lex will - Community',
3335             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3336             'uploader': 'lex will',
3337             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3338         },
3339         'playlist_mincount': 18,
3340     }, {
3341         'note': 'Channels tab',
3342         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3343         'info_dict': {
3344             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3345             'title': 'lex will - Channels',
3346             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3347             'uploader': 'lex will',
3348             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3349         },
3350         'playlist_mincount': 12,
3351     }, {
3352         'note': 'Search tab',
3353         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3354         'playlist_mincount': 40,
3355         'info_dict': {
3356             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3357             'title': '3Blue1Brown - Search - linear algebra',
3358             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3359             'uploader': '3Blue1Brown',
3360             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3361         },
3362     }, {
3363         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3364         'only_matching': True,
3365     }, {
3366         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3367         'only_matching': True,
3368     }, {
3369         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3370         'only_matching': True,
3371     }, {
3372         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3373         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3374         'info_dict': {
3375             'title': '29C3: Not my department',
3376             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3377             'uploader': 'Christiaan008',
3378             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3379             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3380         },
3381         'playlist_count': 96,
3382     }, {
3383         'note': 'Large playlist',
3384         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3385         'info_dict': {
3386             'title': 'Uploads from Cauchemar',
3387             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3388             'uploader': 'Cauchemar',
3389             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3390         },
3391         'playlist_mincount': 1123,
3392     }, {
3393         'note': 'even larger playlist, 8832 videos',
3394         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3395         'only_matching': True,
3396     }, {
3397         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3398         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3399         'info_dict': {
3400             'title': 'Uploads from Interstellar Movie',
3401             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3402             'uploader': 'Interstellar Movie',
3403             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3404         },
3405         'playlist_mincount': 21,
3406     }, {
3407         'note': 'Playlist with "show unavailable videos" button',
3408         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3409         'info_dict': {
3410             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3411             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3412             'uploader': 'Phim Siêu Nhân Nhật Bản',
3413             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3414         },
3415         'playlist_mincount': 200,
3416     }, {
3417         'note': 'Playlist with unavailable videos in page 7',
3418         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3419         'info_dict': {
3420             'title': 'Uploads from BlankTV',
3421             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3422             'uploader': 'BlankTV',
3423             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3424         },
3425         'playlist_mincount': 1000,
3426     }, {
3427         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3428         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3429         'info_dict': {
3430             'title': 'Data Analysis with Dr Mike Pound',
3431             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3432             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3433             'uploader': 'Computerphile',
3434             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3435         },
3436         'playlist_mincount': 11,
3437     }, {
3438         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3439         'only_matching': True,
3440     }, {
3441         'note': 'Playlist URL that does not actually serve a playlist',
3442         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3443         'info_dict': {
3444             'id': 'FqZTN594JQw',
3445             'ext': 'webm',
3446             'title': "Smiley's People 01 detective, Adventure Series, Action",
3447             'uploader': 'STREEM',
3448             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3449             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3450             'upload_date': '20150526',
3451             'license': 'Standard YouTube License',
3452             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3453             'categories': ['People & Blogs'],
3454             'tags': list,
3455             'view_count': int,
3456             'like_count': int,
3457             'dislike_count': int,
3458         },
3459         'params': {
3460             'skip_download': True,
3461         },
3462         'skip': 'This video is not available.',
3463         'add_ie': [YoutubeIE.ie_key()],
3464     }, {
3465         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3466         'only_matching': True,
3467     }, {
3468         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3469         'only_matching': True,
3470     }, {
3471         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3472         'info_dict': {
3473             'id': '3yImotZU3tw',  # This will keep changing
3474             'ext': 'mp4',
3475             'title': compat_str,
3476             'uploader': 'Sky News',
3477             'uploader_id': 'skynews',
3478             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3479             'upload_date': r're:\d{8}',
3480             'description': compat_str,
3481             'categories': ['News & Politics'],
3482             'tags': list,
3483             'like_count': int,
3484             'dislike_count': int,
3485         },
3486         'params': {
3487             'skip_download': True,
3488         },
3489         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3490     }, {
3491         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3492         'info_dict': {
3493             'id': 'a48o2S1cPoo',
3494             'ext': 'mp4',
3495             'title': 'The Young Turks - Live Main Show',
3496             'uploader': 'The Young Turks',
3497             'uploader_id': 'TheYoungTurks',
3498             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3499             'upload_date': '20150715',
3500             'license': 'Standard YouTube License',
3501             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3502             'categories': ['News & Politics'],
3503             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3504             'like_count': int,
3505             'dislike_count': int,
3506         },
3507         'params': {
3508             'skip_download': True,
3509         },
3510         'only_matching': True,
3511     }, {
3512         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3513         'only_matching': True,
3514     }, {
3515         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3516         'only_matching': True,
3517     }, {
3518         'note': 'A channel that is not live. Should raise error',
3519         'url': 'https://www.youtube.com/user/numberphile/live',
3520         'only_matching': True,
3521     }, {
3522         'url': 'https://www.youtube.com/feed/trending',
3523         'only_matching': True,
3524     }, {
3525         'url': 'https://www.youtube.com/feed/library',
3526         'only_matching': True,
3527     }, {
3528         'url': 'https://www.youtube.com/feed/history',
3529         'only_matching': True,
3530     }, {
3531         'url': 'https://www.youtube.com/feed/subscriptions',
3532         'only_matching': True,
3533     }, {
3534         'url': 'https://www.youtube.com/feed/watch_later',
3535         'only_matching': True,
3536     }, {
3537         'note': 'Recommended - redirects to home page.',
3538         'url': 'https://www.youtube.com/feed/recommended',
3539         'only_matching': True,
3540     }, {
3541         'note': 'inline playlist with not always working continuations',
3542         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3543         'only_matching': True,
3544     }, {
3545         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3546         'only_matching': True,
3547     }, {
3548         'url': 'https://www.youtube.com/course',
3549         'only_matching': True,
3550     }, {
3551         'url': 'https://www.youtube.com/zsecurity',
3552         'only_matching': True,
3553     }, {
3554         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3555         'only_matching': True,
3556     }, {
3557         'url': 'https://www.youtube.com/TheYoungTurks/live',
3558         'only_matching': True,
3559     }, {
3560         'url': 'https://www.youtube.com/hashtag/cctv9',
3561         'info_dict': {
3562             'id': 'cctv9',
3563             'title': '#cctv9',
3564         },
3565         'playlist_mincount': 350,
3566     }, {
3567         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3568         'only_matching': True,
3569     }, {
3570         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3571         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3572         'only_matching': True
3573     }, {
3574         'note': '/browse/ should redirect to /channel/',
3575         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3576         'only_matching': True
3577     }, {
3578         'note': 'VLPL, should redirect to playlist?list=PL...',
3579         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3580         'info_dict': {
3581             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3582             'uploader': 'NoCopyrightSounds',
3583             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3584             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3585             'title': 'NCS Releases',
3586         },
3587         'playlist_mincount': 166,
3588     }, {
3589         'note': 'Topic, should redirect to playlist?list=UU...',
3590         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3591         'info_dict': {
3592             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3593             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3594             'title': 'Uploads from Royalty Free Music - Topic',
3595             'uploader': 'Royalty Free Music - Topic',
3596         },
3597         'expected_warnings': [
3598             'A channel/user page was given',
3599             'The URL does not have a videos tab',
3600         ],
3601         'playlist_mincount': 101,
3602     }, {
3603         'note': 'Topic without a UU playlist',
3604         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3605         'info_dict': {
3606             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3607             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3608         },
3609         'expected_warnings': [
3610             'A channel/user page was given',
3611             'The URL does not have a videos tab',
3612             'Falling back to channel URL',
3613         ],
3614         'playlist_mincount': 9,
3615     }, {
3616         'note': 'Youtube music Album',
3617         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3618         'info_dict': {
3619             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3620             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3621         },
3622         'playlist_count': 50,
3623     }, {
3624         'note': 'unlisted single video playlist',
3625         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3626         'info_dict': {
3627             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3628             'uploader': 'colethedj',
3629             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3630             'title': 'yt-dlp unlisted playlist test',
3631             'availability': 'unlisted'
3632         },
3633         'playlist_count': 1,
3634     }, {
3635         'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
3636         'url': 'https://www.youtube.com/feed/recommended',
3637         'info_dict': {
3638             'id': 'recommended',
3639             'title': 'recommended',
3640         },
3641         'playlist_mincount': 50,
3642         'params': {
3643             'skip_download': True,
3644             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3645         },
3646     }, {
3647         'note': 'API Fallback: /videos tab, sorted by oldest first',
3648         'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
3649         'info_dict': {
3650             'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3651             'title': 'Cody\'sLab - Videos',
3652             'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
3653             'uploader': 'Cody\'sLab',
3654             'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
3655         },
3656         'playlist_mincount': 650,
3657         'params': {
3658             'skip_download': True,
3659             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3660         },
3661     }, {
3662         'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
3663         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3664         'info_dict': {
3665             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3666             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3667             'title': 'Uploads from Royalty Free Music - Topic',
3668             'uploader': 'Royalty Free Music - Topic',
3669         },
3670         'expected_warnings': [
3671             'A channel/user page was given',
3672             'The URL does not have a videos tab',
3673         ],
3674         'playlist_mincount': 101,
3675         'params': {
3676             'skip_download': True,
3677             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
3678         },
3679     }]
3680
3681     @classmethod
3682     def suitable(cls, url):
3683         return False if YoutubeIE.suitable(url) else super(
3684             YoutubeTabIE, cls).suitable(url)
3685
3686     def _extract_channel_id(self, webpage):
3687         channel_id = self._html_search_meta(
3688             'channelId', webpage, 'channel id', default=None)
3689         if channel_id:
3690             return channel_id
3691         channel_url = self._html_search_meta(
3692             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3693              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3694              'twitter:app:url:googleplay'), webpage, 'channel url')
3695         return self._search_regex(
3696             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3697             channel_url, 'channel id')
3698
3699     @staticmethod
3700     def _extract_basic_item_renderer(item):
3701         # Modified from _extract_grid_item_renderer
3702         known_basic_renderers = (
3703             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3704         )
3705         for key, renderer in item.items():
3706             if not isinstance(renderer, dict):
3707                 continue
3708             elif key in known_basic_renderers:
3709                 return renderer
3710             elif key.startswith('grid') and key.endswith('Renderer'):
3711                 return renderer
3712
3713     def _grid_entries(self, grid_renderer):
3714         for item in grid_renderer['items']:
3715             if not isinstance(item, dict):
3716                 continue
3717             renderer = self._extract_basic_item_renderer(item)
3718             if not isinstance(renderer, dict):
3719                 continue
3720             title = self._get_text(renderer, 'title')
3721
3722             # playlist
3723             playlist_id = renderer.get('playlistId')
3724             if playlist_id:
3725                 yield self.url_result(
3726                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3727                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3728                     video_title=title)
3729                 continue
3730             # video
3731             video_id = renderer.get('videoId')
3732             if video_id:
3733                 yield self._extract_video(renderer)
3734                 continue
3735             # channel
3736             channel_id = renderer.get('channelId')
3737             if channel_id:
3738                 yield self.url_result(
3739                     'https://www.youtube.com/channel/%s' % channel_id,
3740                     ie=YoutubeTabIE.ie_key(), video_title=title)
3741                 continue
3742             # generic endpoint URL support
3743             ep_url = urljoin('https://www.youtube.com/', try_get(
3744                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3745                 compat_str))
3746             if ep_url:
3747                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3748                     if ie.suitable(ep_url):
3749                         yield self.url_result(
3750                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3751                         break
3752
3753     def _shelf_entries_from_content(self, shelf_renderer):
3754         content = shelf_renderer.get('content')
3755         if not isinstance(content, dict):
3756             return
3757         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3758         if renderer:
3759             # TODO: add support for nested playlists so each shelf is processed
3760             # as separate playlist
3761             # TODO: this includes only first N items
3762             for entry in self._grid_entries(renderer):
3763                 yield entry
3764         renderer = content.get('horizontalListRenderer')
3765         if renderer:
3766             # TODO
3767             pass
3768
3769     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3770         ep = try_get(
3771             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3772             compat_str)
3773         shelf_url = urljoin('https://www.youtube.com', ep)
3774         if shelf_url:
3775             # Skipping links to another channels, note that checking for
3776             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3777             # will not work
3778             if skip_channels and '/channels?' in shelf_url:
3779                 return
3780             title = self._get_text(shelf_renderer, 'title')
3781             yield self.url_result(shelf_url, video_title=title)
3782         # Shelf may not contain shelf URL, fallback to extraction from content
3783         for entry in self._shelf_entries_from_content(shelf_renderer):
3784             yield entry
3785
3786     def _playlist_entries(self, video_list_renderer):
3787         for content in video_list_renderer['contents']:
3788             if not isinstance(content, dict):
3789                 continue
3790             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3791             if not isinstance(renderer, dict):
3792                 continue
3793             video_id = renderer.get('videoId')
3794             if not video_id:
3795                 continue
3796             yield self._extract_video(renderer)
3797
3798     def _rich_entries(self, rich_grid_renderer):
3799         renderer = try_get(
3800             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3801         video_id = renderer.get('videoId')
3802         if not video_id:
3803             return
3804         yield self._extract_video(renderer)
3805
3806     def _video_entry(self, video_renderer):
3807         video_id = video_renderer.get('videoId')
3808         if video_id:
3809             return self._extract_video(video_renderer)
3810
3811     def _post_thread_entries(self, post_thread_renderer):
3812         post_renderer = try_get(
3813             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3814         if not post_renderer:
3815             return
3816         # video attachment
3817         video_renderer = try_get(
3818             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3819         video_id = video_renderer.get('videoId')
3820         if video_id:
3821             entry = self._extract_video(video_renderer)
3822             if entry:
3823                 yield entry
3824         # playlist attachment
3825         playlist_id = try_get(
3826             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3827         if playlist_id:
3828             yield self.url_result(
3829                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3830                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3831         # inline video links
3832         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3833         for run in runs:
3834             if not isinstance(run, dict):
3835                 continue
3836             ep_url = try_get(
3837                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3838             if not ep_url:
3839                 continue
3840             if not YoutubeIE.suitable(ep_url):
3841                 continue
3842             ep_video_id = YoutubeIE._match_id(ep_url)
3843             if video_id == ep_video_id:
3844                 continue
3845             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3846
3847     def _post_thread_continuation_entries(self, post_thread_continuation):
3848         contents = post_thread_continuation.get('contents')
3849         if not isinstance(contents, list):
3850             return
3851         for content in contents:
3852             renderer = content.get('backstagePostThreadRenderer')
3853             if not isinstance(renderer, dict):
3854                 continue
3855             for entry in self._post_thread_entries(renderer):
3856                 yield entry
3857
3858     r''' # unused
3859     def _rich_grid_entries(self, contents):
3860         for content in contents:
3861             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3862             if video_renderer:
3863                 entry = self._video_entry(video_renderer)
3864                 if entry:
3865                     yield entry
3866     '''
3867     def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3868
3869         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3870             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3871             for content in contents:
3872                 if not isinstance(content, dict):
3873                     continue
3874                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3875                 if not is_renderer:
3876                     renderer = content.get('richItemRenderer')
3877                     if renderer:
3878                         for entry in self._rich_entries(renderer):
3879                             yield entry
3880                         continuation_list[0] = self._extract_continuation(parent_renderer)
3881                     continue
3882                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3883                 for isr_content in isr_contents:
3884                     if not isinstance(isr_content, dict):
3885                         continue
3886
3887                     known_renderers = {
3888                         'playlistVideoListRenderer': self._playlist_entries,
3889                         'gridRenderer': self._grid_entries,
3890                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3891                         'backstagePostThreadRenderer': self._post_thread_entries,
3892                         'videoRenderer': lambda x: [self._video_entry(x)],
3893                     }
3894                     for key, renderer in isr_content.items():
3895                         if key not in known_renderers:
3896                             continue
3897                         for entry in known_renderers[key](renderer):
3898                             if entry:
3899                                 yield entry
3900                         continuation_list[0] = self._extract_continuation(renderer)
3901                         break
3902
3903                 if not continuation_list[0]:
3904                     continuation_list[0] = self._extract_continuation(is_renderer)
3905
3906             if not continuation_list[0]:
3907                 continuation_list[0] = self._extract_continuation(parent_renderer)
3908
3909         continuation_list = [None]  # Python 2 does not support nonlocal
3910         tab_content = try_get(tab, lambda x: x['content'], dict)
3911         if not tab_content:
3912             return
3913         parent_renderer = (
3914             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3915             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3916         for entry in extract_entries(parent_renderer):
3917             yield entry
3918         continuation = continuation_list[0]
3919
3920         for page_num in itertools.count(1):
3921             if not continuation:
3922                 break
3923             headers = self.generate_api_headers(
3924                 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3925             response = self._extract_response(
3926                 item_id='%s page %s' % (item_id, page_num),
3927                 query=continuation, headers=headers, ytcfg=ytcfg,
3928                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3929
3930             if not response:
3931                 break
3932             # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3933             # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3934             visitor_data = self._extract_visitor_data(response) or visitor_data
3935
3936             known_continuation_renderers = {
3937                 'playlistVideoListContinuation': self._playlist_entries,
3938                 'gridContinuation': self._grid_entries,
3939                 'itemSectionContinuation': self._post_thread_continuation_entries,
3940                 'sectionListContinuation': extract_entries,  # for feeds
3941             }
3942             continuation_contents = try_get(
3943                 response, lambda x: x['continuationContents'], dict) or {}
3944             continuation_renderer = None
3945             for key, value in continuation_contents.items():
3946                 if key not in known_continuation_renderers:
3947                     continue
3948                 continuation_renderer = value
3949                 continuation_list = [None]
3950                 for entry in known_continuation_renderers[key](continuation_renderer):
3951                     yield entry
3952                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3953                 break
3954             if continuation_renderer:
3955                 continue
3956
3957             known_renderers = {
3958                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3959                 'gridVideoRenderer': (self._grid_entries, 'items'),
3960                 'gridChannelRenderer': (self._grid_entries, 'items'),
3961                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3962                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3963                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3964                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3965             }
3966             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3967             continuation_items = try_get(
3968                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3969             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3970             video_items_renderer = None
3971             for key, value in continuation_item.items():
3972                 if key not in known_renderers:
3973                     continue
3974                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3975                 continuation_list = [None]
3976                 for entry in known_renderers[key][0](video_items_renderer):
3977                     yield entry
3978                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3979                 break
3980             if video_items_renderer:
3981                 continue
3982             break
3983
3984     @staticmethod
3985     def _extract_selected_tab(tabs):
3986         for tab in tabs:
3987             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3988             if renderer.get('selected') is True:
3989                 return renderer
3990         else:
3991             raise ExtractorError('Unable to find selected tab')
3992
3993     @classmethod
3994     def _extract_uploader(cls, data):
3995         uploader = {}
3996         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3997         owner = try_get(
3998             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3999         if owner:
4000             uploader['uploader'] = owner.get('text')
4001             uploader['uploader_id'] = try_get(
4002                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
4003             uploader['uploader_url'] = urljoin(
4004                 'https://www.youtube.com/',
4005                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
4006         return {k: v for k, v in uploader.items() if v is not None}
4007
4008     def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
4009         playlist_id = title = description = channel_url = channel_name = channel_id = None
4010         thumbnails_list = []
4011         tags = []
4012
4013         selected_tab = self._extract_selected_tab(tabs)
4014         renderer = try_get(
4015             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
4016         if renderer:
4017             channel_name = renderer.get('title')
4018             channel_url = renderer.get('channelUrl')
4019             channel_id = renderer.get('externalId')
4020         else:
4021             renderer = try_get(
4022                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
4023
4024         if renderer:
4025             title = renderer.get('title')
4026             description = renderer.get('description', '')
4027             playlist_id = channel_id
4028             tags = renderer.get('keywords', '').split()
4029             thumbnails_list = (
4030                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
4031                 or try_get(
4032                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
4033                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
4034                     list)
4035                 or [])
4036
4037         thumbnails = []
4038         for t in thumbnails_list:
4039             if not isinstance(t, dict):
4040                 continue
4041             thumbnail_url = url_or_none(t.get('url'))
4042             if not thumbnail_url:
4043                 continue
4044             thumbnails.append({
4045                 'url': thumbnail_url,
4046                 'width': int_or_none(t.get('width')),
4047                 'height': int_or_none(t.get('height')),
4048             })
4049         if playlist_id is None:
4050             playlist_id = item_id
4051         if title is None:
4052             title = (
4053                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
4054                 or playlist_id)
4055         title += format_field(selected_tab, 'title', ' - %s')
4056         title += format_field(selected_tab, 'expandedText', ' - %s')
4057         metadata = {
4058             'playlist_id': playlist_id,
4059             'playlist_title': title,
4060             'playlist_description': description,
4061             'uploader': channel_name,
4062             'uploader_id': channel_id,
4063             'uploader_url': channel_url,
4064             'thumbnails': thumbnails,
4065             'tags': tags,
4066         }
4067         availability = self._extract_availability(data)
4068         if availability:
4069             metadata['availability'] = availability
4070         if not channel_id:
4071             metadata.update(self._extract_uploader(data))
4072         metadata.update({
4073             'channel': metadata['uploader'],
4074             'channel_id': metadata['uploader_id'],
4075             'channel_url': metadata['uploader_url']})
4076         return self.playlist_result(
4077             self._entries(
4078                 selected_tab, playlist_id, ytcfg,
4079                 self._extract_account_syncid(ytcfg, data),
4080                 self._extract_visitor_data(data, ytcfg)),
4081             **metadata)
4082
4083     def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
4084         first_id = last_id = response = None
4085         for page_num in itertools.count(1):
4086             videos = list(self._playlist_entries(playlist))
4087             if not videos:
4088                 return
4089             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4090             if start >= len(videos):
4091                 return
4092             for video in videos[start:]:
4093                 if video['id'] == first_id:
4094                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4095                     return
4096                 yield video
4097             first_id = first_id or videos[0]['id']
4098             last_id = videos[-1]['id']
4099             watch_endpoint = try_get(
4100                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4101             headers = self.generate_api_headers(
4102                 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4103                 visitor_data=self._extract_visitor_data(response, data, ytcfg))
4104             query = {
4105                 'playlistId': playlist_id,
4106                 'videoId': watch_endpoint.get('videoId') or last_id,
4107                 'index': watch_endpoint.get('index') or len(videos),
4108                 'params': watch_endpoint.get('params') or 'OAE%3D'
4109             }
4110             response = self._extract_response(
4111                 item_id='%s page %d' % (playlist_id, page_num),
4112                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4113                 check_get_keys='contents'
4114             )
4115             playlist = try_get(
4116                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4117
4118     def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
4119         title = playlist.get('title') or try_get(
4120             data, lambda x: x['titleText']['simpleText'], compat_str)
4121         playlist_id = playlist.get('playlistId') or item_id
4122
4123         # Delegating everything except mix playlists to regular tab-based playlist URL
4124         playlist_url = urljoin(url, try_get(
4125             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4126             compat_str))
4127         if playlist_url and playlist_url != url:
4128             return self.url_result(
4129                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4130                 video_title=title)
4131
4132         return self.playlist_result(
4133             self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
4134             playlist_id=playlist_id, playlist_title=title)
4135
4136     def _extract_availability(self, data):
4137         """
4138         Gets the availability of a given playlist/tab.
4139         Note: Unless YouTube tells us explicitly, we do not assume it is public
4140         @param data: response
4141         """
4142         is_private = is_unlisted = None
4143         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4144         badge_labels = self._extract_badges(renderer)
4145
4146         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4147         privacy_dropdown_entries = try_get(
4148             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4149         for renderer_dict in privacy_dropdown_entries:
4150             is_selected = try_get(
4151                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4152             if not is_selected:
4153                 continue
4154             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4155             if label:
4156                 badge_labels.add(label.lower())
4157                 break
4158
4159         for badge_label in badge_labels:
4160             if badge_label == 'unlisted':
4161                 is_unlisted = True
4162             elif badge_label == 'private':
4163                 is_private = True
4164             elif badge_label == 'public':
4165                 is_unlisted = is_private = False
4166         return self._availability(is_private, False, False, False, is_unlisted)
4167
4168     @staticmethod
4169     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4170         sidebar_renderer = try_get(
4171             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4172         for item in sidebar_renderer:
4173             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4174             if renderer:
4175                 return renderer
4176
4177     def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
4178         """
4179         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4180         """
4181         browse_id = params = None
4182         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4183         if not renderer:
4184             return
4185         menu_renderer = try_get(
4186             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4187         for menu_item in menu_renderer:
4188             if not isinstance(menu_item, dict):
4189                 continue
4190             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4191             text = try_get(
4192                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4193             if not text or text.lower() != 'show unavailable videos':
4194                 continue
4195             browse_endpoint = try_get(
4196                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4197             browse_id = browse_endpoint.get('browseId')
4198             params = browse_endpoint.get('params')
4199             break
4200
4201         headers = self.generate_api_headers(
4202             ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4203             visitor_data=self._extract_visitor_data(data, ytcfg))
4204         query = {
4205             'params': params or 'wgYCCAA=',
4206             'browseId': browse_id or 'VL%s' % item_id
4207         }
4208         return self._extract_response(
4209             item_id=item_id, headers=headers, query=query,
4210             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4211             note='Downloading API JSON with unavailable videos')
4212
4213     def _extract_webpage(self, url, item_id, fatal=True):
4214         retries = self.get_param('extractor_retries', 3)
4215         count = -1
4216         webpage = data = last_error = None
4217         while count < retries:
4218             count += 1
4219             # Sometimes youtube returns a webpage with incomplete ytInitialData
4220             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4221             if last_error:
4222                 self.report_warning('%s. Retrying ...' % last_error)
4223             try:
4224                 webpage = self._download_webpage(
4225                     url, item_id,
4226                     note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
4227                 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
4228             except ExtractorError as e:
4229                 if isinstance(e.cause, network_exceptions):
4230                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
4231                         last_error = error_to_compat_str(e.cause or e.msg)
4232                         if count < retries:
4233                             continue
4234                 if fatal:
4235                     raise
4236                 self.report_warning(error_to_compat_str(e))
4237                 break
4238             else:
4239                 try:
4240                     self._extract_and_report_alerts(data)
4241                 except ExtractorError as e:
4242                     if fatal:
4243                         raise
4244                     self.report_warning(error_to_compat_str(e))
4245                     break
4246
4247                 if dict_get(data, ('contents', 'currentVideoEndpoint')):
4248                     break
4249
4250                 last_error = 'Incomplete yt initial data received'
4251                 if count >= retries:
4252                     if fatal:
4253                         raise ExtractorError(last_error)
4254                     self.report_warning(last_error)
4255                     break
4256
4257         return webpage, data
4258
4259     def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
4260         data = None
4261         if 'webpage' not in self._configuration_arg('skip'):
4262             webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
4263             ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
4264         if not data:
4265             if not ytcfg and self.is_authenticated:
4266                 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
4267                 if 'authcheck' not in self._configuration_arg('skip') and fatal:
4268                     raise ExtractorError(
4269                         msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
4270                               ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
4271                         expected=True)
4272                 self.report_warning(msg, only_once=True)
4273             data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
4274         return data, ytcfg
4275
4276     def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
4277         headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
4278         resolve_response = self._extract_response(
4279             item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
4280             ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
4281         endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
4282         for ep_key, ep in endpoints.items():
4283             params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
4284             if params:
4285                 return self._extract_response(
4286                     item_id=item_id, query=params, ep=ep, headers=headers,
4287                     ytcfg=ytcfg, fatal=fatal, default_client=default_client,
4288                     check_get_keys=('contents', 'currentVideoEndpoint'))
4289         err_note = 'Failed to resolve url (does the playlist exist?)'
4290         if fatal:
4291             raise ExtractorError(err_note, expected=True)
4292         self.report_warning(err_note, item_id)
4293
4294     @staticmethod
4295     def _smuggle_data(entries, data):
4296         for entry in entries:
4297             if data:
4298                 entry['url'] = smuggle_url(entry['url'], data)
4299             yield entry
4300
4301     def _real_extract(self, url):
4302         url, smuggled_data = unsmuggle_url(url, {})
4303         if self.is_music_url(url):
4304             smuggled_data['is_music_url'] = True
4305         info_dict = self.__real_extract(url, smuggled_data)
4306         if info_dict.get('entries'):
4307             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4308         return info_dict
4309
4310     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4311
4312     def __real_extract(self, url, smuggled_data):
4313         item_id = self._match_id(url)
4314         url = compat_urlparse.urlunparse(
4315             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4316         compat_opts = self.get_param('compat_opts', [])
4317
4318         def get_mobj(url):
4319             mobj = self._url_re.match(url).groupdict()
4320             mobj.update((k, '') for k, v in mobj.items() if v is None)
4321             return mobj
4322
4323         mobj = get_mobj(url)
4324         # Youtube returns incomplete data if tabname is not lower case
4325         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4326         if is_channel:
4327             if smuggled_data.get('is_music_url'):
4328                 if item_id[:2] == 'VL':
4329                     # Youtube music VL channels have an equivalent playlist
4330                     item_id = item_id[2:]
4331                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4332                 elif item_id[:2] == 'MP':
4333                     # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4334                     mdata = self._extract_tab_endpoint(
4335                         'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4336                     murl = traverse_obj(
4337                         mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4338                     if not murl:
4339                         raise ExtractorError('Failed to resolve album to playlist.')
4340                     return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4341                 elif mobj['channel_type'] == 'browse':
4342                     # Youtube music /browse/ should be changed to /channel/
4343                     pre = 'https://www.youtube.com/channel/%s' % item_id
4344         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4345             # Home URLs should redirect to /videos/
4346             self.report_warning(
4347                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4348                 'To download only the videos in the home page, add a "/featured" to the URL')
4349             tab = '/videos'
4350
4351         url = ''.join((pre, tab, post))
4352         mobj = get_mobj(url)
4353
4354         # Handle both video/playlist URLs
4355         qs = parse_qs(url)
4356         video_id = qs.get('v', [None])[0]
4357         playlist_id = qs.get('list', [None])[0]
4358
4359         if not video_id and mobj['not_channel'].startswith('watch'):
4360             if not playlist_id:
4361                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4362                 raise ExtractorError('Unable to recognize tab page')
4363             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4364             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4365             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4366             mobj = get_mobj(url)
4367
4368         if video_id and playlist_id:
4369             if self.get_param('noplaylist'):
4370                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4371                 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4372             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4373
4374         data, ytcfg = self._extract_data(url, item_id)
4375
4376         tabs = try_get(
4377             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4378         if tabs:
4379             selected_tab = self._extract_selected_tab(tabs)
4380             tab_name = selected_tab.get('title', '')
4381             if 'no-youtube-channel-redirect' not in compat_opts:
4382                 if mobj['tab'] == '/live':
4383                     # Live tab should have redirected to the video
4384                     raise ExtractorError('The channel is not currently live', expected=True)
4385                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4386                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4387                         # Topic channels don't have /videos. Use the equivalent playlist instead
4388                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4389                         pl_id = 'UU%s' % item_id[2:]
4390                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4391                         try:
4392                             data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
4393                         except ExtractorError:
4394                             self.report_warning('The playlist gave error. Falling back to channel URL')
4395                     else:
4396                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4397
4398         self.write_debug('Final URL: %s' % url)
4399
4400         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4401         if 'no-youtube-unavailable-videos' not in compat_opts:
4402             data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4403         self._extract_and_report_alerts(data, only_once=True)
4404         tabs = try_get(
4405             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4406         if tabs:
4407             return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4408
4409         playlist = try_get(
4410             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4411         if playlist:
4412             return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4413
4414         video_id = try_get(
4415             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4416             compat_str) or video_id
4417         if video_id:
4418             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4419                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4420             return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4421
4422         raise ExtractorError('Unable to recognize tab page')
4423
4424
4425 class YoutubePlaylistIE(InfoExtractor):
4426     IE_DESC = 'YouTube.com playlists'
4427     _VALID_URL = r'''(?x)(?:
4428                         (?:https?://)?
4429                         (?:\w+\.)?
4430                         (?:
4431                             (?:
4432                                 youtube(?:kids)?\.com|
4433                                 invidio\.us
4434                             )
4435                             /.*?\?.*?\blist=
4436                         )?
4437                         (?P<id>%(playlist_id)s)
4438                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4439     IE_NAME = 'youtube:playlist'
4440     _TESTS = [{
4441         'note': 'issue #673',
4442         'url': 'PLBB231211A4F62143',
4443         'info_dict': {
4444             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4445             'id': 'PLBB231211A4F62143',
4446             'uploader': 'Wickydoo',
4447             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4448             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4449         },
4450         'playlist_mincount': 29,
4451     }, {
4452         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4453         'info_dict': {
4454             'title': 'YDL_safe_search',
4455             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4456         },
4457         'playlist_count': 2,
4458         'skip': 'This playlist is private',
4459     }, {
4460         'note': 'embedded',
4461         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4462         'playlist_count': 4,
4463         'info_dict': {
4464             'title': 'JODA15',
4465             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4466             'uploader': 'milan',
4467             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4468         }
4469     }, {
4470         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4471         'playlist_mincount': 654,
4472         'info_dict': {
4473             'title': '2018 Chinese New Singles (11/6 updated)',
4474             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4475             'uploader': 'LBK',
4476             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4477             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4478         }
4479     }, {
4480         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4481         'only_matching': True,
4482     }, {
4483         # music album playlist
4484         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4485         'only_matching': True,
4486     }]
4487
4488     @classmethod
4489     def suitable(cls, url):
4490         if YoutubeTabIE.suitable(url):
4491             return False
4492         # Hack for lazy extractors until more generic solution is implemented
4493         # (see #28780)
4494         from .youtube import parse_qs
4495         qs = parse_qs(url)
4496         if qs.get('v', [None])[0]:
4497             return False
4498         return super(YoutubePlaylistIE, cls).suitable(url)
4499
4500     def _real_extract(self, url):
4501         playlist_id = self._match_id(url)
4502         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4503         url = update_url_query(
4504             'https://www.youtube.com/playlist',
4505             parse_qs(url) or {'list': playlist_id})
4506         if is_music_url:
4507             url = smuggle_url(url, {'is_music_url': True})
4508         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4509
4510
4511 class YoutubeYtBeIE(InfoExtractor):
4512     IE_DESC = 'youtu.be'
4513     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4514     _TESTS = [{
4515         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4516         'info_dict': {
4517             'id': 'yeWKywCrFtk',
4518             'ext': 'mp4',
4519             'title': 'Small Scale Baler and Braiding Rugs',
4520             'uploader': 'Backus-Page House Museum',
4521             'uploader_id': 'backuspagemuseum',
4522             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4523             'upload_date': '20161008',
4524             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4525             'categories': ['Nonprofits & Activism'],
4526             'tags': list,
4527             'like_count': int,
4528             'dislike_count': int,
4529         },
4530         'params': {
4531             'noplaylist': True,
4532             'skip_download': True,
4533         },
4534     }, {
4535         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4536         'only_matching': True,
4537     }]
4538
4539     def _real_extract(self, url):
4540         mobj = self._match_valid_url(url)
4541         video_id = mobj.group('id')
4542         playlist_id = mobj.group('playlist_id')
4543         return self.url_result(
4544             update_url_query('https://www.youtube.com/watch', {
4545                 'v': video_id,
4546                 'list': playlist_id,
4547                 'feature': 'youtu.be',
4548             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4549
4550
4551 class YoutubeYtUserIE(InfoExtractor):
4552     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4553     _VALID_URL = r'ytuser:(?P<id>.+)'
4554     _TESTS = [{
4555         'url': 'ytuser:phihag',
4556         'only_matching': True,
4557     }]
4558
4559     def _real_extract(self, url):
4560         user_id = self._match_id(url)
4561         return self.url_result(
4562             'https://www.youtube.com/user/%s' % user_id,
4563             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4564
4565
4566 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4567     IE_NAME = 'youtube:favorites'
4568     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4569     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4570     _LOGIN_REQUIRED = True
4571     _TESTS = [{
4572         'url': ':ytfav',
4573         'only_matching': True,
4574     }, {
4575         'url': ':ytfavorites',
4576         'only_matching': True,
4577     }]
4578
4579     def _real_extract(self, url):
4580         return self.url_result(
4581             'https://www.youtube.com/playlist?list=LL',
4582             ie=YoutubeTabIE.ie_key())
4583
4584
4585 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4586     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4587     # there doesn't appear to be a real limit, for example if you search for
4588     # 'python' you get more than 8.000.000 results
4589     _MAX_RESULTS = float('inf')
4590     IE_NAME = 'youtube:search'
4591     _SEARCH_KEY = 'ytsearch'
4592     _SEARCH_PARAMS = None
4593     _TESTS = []
4594
4595     def _search_results(self, query):
4596         data = {'query': query}
4597         if self._SEARCH_PARAMS:
4598             data['params'] = self._SEARCH_PARAMS
4599         continuation = {}
4600         for page_num in itertools.count(1):
4601             data.update(continuation)
4602             search = self._extract_response(
4603                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4604                 check_get_keys=('contents', 'onResponseReceivedCommands')
4605             )
4606             if not search:
4607                 break
4608             slr_contents = try_get(
4609                 search,
4610                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4611                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4612                 list)
4613             if not slr_contents:
4614                 break
4615
4616             # Youtube sometimes adds promoted content to searches,
4617             # changing the index location of videos and token.
4618             # So we search through all entries till we find them.
4619             continuation = None
4620             for slr_content in slr_contents:
4621                 if not continuation:
4622                     continuation = self._extract_continuation({'contents': [slr_content]})
4623
4624                 isr_contents = try_get(
4625                     slr_content,
4626                     lambda x: x['itemSectionRenderer']['contents'],
4627                     list)
4628                 if not isr_contents:
4629                     continue
4630                 for content in isr_contents:
4631                     if not isinstance(content, dict):
4632                         continue
4633                     video = content.get('videoRenderer')
4634                     if not isinstance(video, dict):
4635                         continue
4636                     video_id = video.get('videoId')
4637                     if not video_id:
4638                         continue
4639
4640                     yield self._extract_video(video)
4641
4642             if not continuation:
4643                 break
4644
4645
4646 class YoutubeSearchDateIE(YoutubeSearchIE):
4647     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4648     _SEARCH_KEY = 'ytsearchdate'
4649     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4650     _SEARCH_PARAMS = 'CAI%3D'
4651
4652
4653 class YoutubeSearchURLIE(YoutubeSearchIE):
4654     IE_DESC = 'YouTube.com search URLs'
4655     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4656     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4657     # _MAX_RESULTS = 100
4658     _TESTS = [{
4659         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4660         'playlist_mincount': 5,
4661         'info_dict': {
4662             'id': 'youtube-dl test video',
4663             'title': 'youtube-dl test video',
4664         }
4665     }, {
4666         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4667         'only_matching': True,
4668     }]
4669
4670     @classmethod
4671     def _make_valid_url(cls):
4672         return cls._VALID_URL
4673
4674     def _real_extract(self, url):
4675         qs = parse_qs(url)
4676         query = (qs.get('search_query') or qs.get('q'))[0]
4677         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4678         return self._get_n_results(query, self._MAX_RESULTS)
4679
4680
4681 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4682     """
4683     Base class for feed extractors
4684     Subclasses must define the _FEED_NAME property.
4685     """
4686     _LOGIN_REQUIRED = True
4687     _TESTS = []
4688
4689     @property
4690     def IE_NAME(self):
4691         return 'youtube:%s' % self._FEED_NAME
4692
4693     def _real_extract(self, url):
4694         return self.url_result(
4695             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4696             ie=YoutubeTabIE.ie_key())
4697
4698
4699 class YoutubeWatchLaterIE(InfoExtractor):
4700     IE_NAME = 'youtube:watchlater'
4701     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4702     _VALID_URL = r':ytwatchlater'
4703     _TESTS = [{
4704         'url': ':ytwatchlater',
4705         'only_matching': True,
4706     }]
4707
4708     def _real_extract(self, url):
4709         return self.url_result(
4710             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4711
4712
4713 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4714     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4715     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4716     _FEED_NAME = 'recommended'
4717     _LOGIN_REQUIRED = False
4718     _TESTS = [{
4719         'url': ':ytrec',
4720         'only_matching': True,
4721     }, {
4722         'url': ':ytrecommended',
4723         'only_matching': True,
4724     }, {
4725         'url': 'https://youtube.com',
4726         'only_matching': True,
4727     }]
4728
4729
4730 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4731     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4732     _VALID_URL = r':ytsub(?:scription)?s?'
4733     _FEED_NAME = 'subscriptions'
4734     _TESTS = [{
4735         'url': ':ytsubs',
4736         'only_matching': True,
4737     }, {
4738         'url': ':ytsubscriptions',
4739         'only_matching': True,
4740     }]
4741
4742
4743 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4744     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4745     _VALID_URL = r':ythis(?:tory)?'
4746     _FEED_NAME = 'history'
4747     _TESTS = [{
4748         'url': ':ythistory',
4749         'only_matching': True,
4750     }]
4751
4752
4753 class YoutubeTruncatedURLIE(InfoExtractor):
4754     IE_NAME = 'youtube:truncated_url'
4755     IE_DESC = False  # Do not list
4756     _VALID_URL = r'''(?x)
4757         (?:https?://)?
4758         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4759         (?:watch\?(?:
4760             feature=[a-z_]+|
4761             annotation_id=annotation_[^&]+|
4762             x-yt-cl=[0-9]+|
4763             hl=[^&]*|
4764             t=[0-9]+
4765         )?
4766         |
4767             attribution_link\?a=[^&]+
4768         )
4769         $
4770     '''
4771
4772     _TESTS = [{
4773         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4774         'only_matching': True,
4775     }, {
4776         'url': 'https://www.youtube.com/watch?',
4777         'only_matching': True,
4778     }, {
4779         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4780         'only_matching': True,
4781     }, {
4782         'url': 'https://www.youtube.com/watch?feature=foo',
4783         'only_matching': True,
4784     }, {
4785         'url': 'https://www.youtube.com/watch?hl=en-GB',
4786         'only_matching': True,
4787     }, {
4788         'url': 'https://www.youtube.com/watch?t=2372',
4789         'only_matching': True,
4790     }]
4791
4792     def _real_extract(self, url):
4793         raise ExtractorError(
4794             'Did you forget to quote the URL? Remember that & is a meta '
4795             'character in most shells, so you want to put the URL in quotes, '
4796             'like  youtube-dl '
4797             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4798             ' or simply  youtube-dl BaW_jenozKc  .',
4799             expected=True)
4800
4801
4802 class YoutubeClipIE(InfoExtractor):
4803     IE_NAME = 'youtube:clip'
4804     IE_DESC = False  # Do not list
4805     _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4806
4807     def _real_extract(self, url):
4808         self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4809         return self.url_result(url, 'Generic')
4810
4811
4812 class YoutubeTruncatedIDIE(InfoExtractor):
4813     IE_NAME = 'youtube:truncated_id'
4814     IE_DESC = False  # Do not list
4815     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4816
4817     _TESTS = [{
4818         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4819         'only_matching': True,
4820     }]
4821
4822     def _real_extract(self, url):
4823         video_id = self._match_id(url)
4824         raise ExtractorError(
4825             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4826             expected=True)