yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     is_html,
  42     mimetype2ext,
  43     network_exceptions,
  44     orderedSet,
  45     parse_codecs,
  46     parse_count,
  47     parse_duration,
  48     parse_iso8601,
  49     parse_qs,
  50     qualities,
  51     remove_end,
  52     remove_start,
  53     smuggle_url,
  54     str_or_none,
  55     str_to_int,
  56     traverse_obj,
  57     try_get,
  58     unescapeHTML,
  59     unified_strdate,
  60     unsmuggle_url,
  61     update_url_query,
  62     url_or_none,
  63     urljoin,
  64     variadic,
  65 )
  66
  67
  68 # any clients starting with _ cannot be explicity requested by the user
  69 INNERTUBE_CLIENTS = {
  70     'web': {
  71         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  72         'INNERTUBE_CONTEXT': {
  73             'client': {
  74                 'clientName': 'WEB',
  75                 'clientVersion': '2.20210622.10.00',
  76             }
  77         },
  78         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  79     },
  80     'web_embedded': {
  81         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  82         'INNERTUBE_CONTEXT': {
  83             'client': {
  84                 'clientName': 'WEB_EMBEDDED_PLAYER',
  85                 'clientVersion': '1.20210620.0.1',
  86             },
  87         },
  88         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  89     },
  90     'web_music': {
  91         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  92         'INNERTUBE_HOST': 'music.youtube.com',
  93         'INNERTUBE_CONTEXT': {
  94             'client': {
  95                 'clientName': 'WEB_REMIX',
  96                 'clientVersion': '1.20210621.00.00',
  97             }
  98         },
  99         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 100     },
 101     'web_creator': {
 102         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 103         'INNERTUBE_CONTEXT': {
 104             'client': {
 105                 'clientName': 'WEB_CREATOR',
 106                 'clientVersion': '1.20210621.00.00',
 107             }
 108         },
 109         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 110     },
 111     'android': {
 112         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 113         'INNERTUBE_CONTEXT': {
 114             'client': {
 115                 'clientName': 'ANDROID',
 116                 'clientVersion': '16.20',
 117             }
 118         },
 119         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 120         'REQUIRE_JS_PLAYER': False
 121     },
 122     'android_embedded': {
 123         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 124         'INNERTUBE_CONTEXT': {
 125             'client': {
 126                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 127                 'clientVersion': '16.20',
 128             },
 129         },
 130         'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
 131         'REQUIRE_JS_PLAYER': False
 132     },
 133     'android_music': {
 134         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 135         'INNERTUBE_HOST': 'music.youtube.com',
 136         'INNERTUBE_CONTEXT': {
 137             'client': {
 138                 'clientName': 'ANDROID_MUSIC',
 139                 'clientVersion': '4.32',
 140             }
 141         },
 142         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 143         'REQUIRE_JS_PLAYER': False
 144     },
 145     'android_creator': {
 146         'INNERTUBE_CONTEXT': {
 147             'client': {
 148                 'clientName': 'ANDROID_CREATOR',
 149                 'clientVersion': '21.24.100',
 150             },
 151         },
 152         'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
 153         'REQUIRE_JS_PLAYER': False
 154     },
 155     # ios has HLS live streams
 156     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 157     'ios': {
 158         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 159         'INNERTUBE_CONTEXT': {
 160             'client': {
 161                 'clientName': 'IOS',
 162                 'clientVersion': '16.20',
 163             }
 164         },
 165         'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
 166         'REQUIRE_JS_PLAYER': False
 167     },
 168     'ios_embedded': {
 169         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 170         'INNERTUBE_CONTEXT': {
 171             'client': {
 172                 'clientName': 'IOS_MESSAGES_EXTENSION',
 173                 'clientVersion': '16.20',
 174             },
 175         },
 176         'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
 177         'REQUIRE_JS_PLAYER': False
 178     },
 179     'ios_music': {
 180         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 181         'INNERTUBE_HOST': 'music.youtube.com',
 182         'INNERTUBE_CONTEXT': {
 183             'client': {
 184                 'clientName': 'IOS_MUSIC',
 185                 'clientVersion': '4.32',
 186             },
 187         },
 188         'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
 189         'REQUIRE_JS_PLAYER': False
 190     },
 191     'ios_creator': {
 192         'INNERTUBE_CONTEXT': {
 193             'client': {
 194                 'clientName': 'IOS_CREATOR',
 195                 'clientVersion': '21.24.100',
 196             },
 197         },
 198         'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
 199         'REQUIRE_JS_PLAYER': False
 200     },
 201     # mweb has 'ultralow' formats
 202     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 203     'mweb': {
 204         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 205         'INNERTUBE_CONTEXT': {
 206             'client': {
 207                 'clientName': 'MWEB',
 208                 'clientVersion': '2.20210721.07.00',
 209             }
 210         },
 211         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 212     },
 213 }
 214
 215
 216 def build_innertube_clients():
 217     third_party = {
 218         'embedUrl': 'https://google.com',  # Can be any valid URL
 219     }
 220     base_clients = ('android', 'web', 'ios', 'mweb')
 221     priority = qualities(base_clients[::-1])
 222
 223     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 224         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 225         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 226         ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
 227         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 228         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 229
 230         if client in base_clients:
 231             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 232             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 233             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 234             agegate_ytcfg['priority'] -= 1
 235         elif client.endswith('_embedded'):
 236             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 237             ytcfg['priority'] -= 2
 238         else:
 239             ytcfg['priority'] -= 3
 240
 241
 242 build_innertube_clients()
 243
 244
 245 class YoutubeBaseInfoExtractor(InfoExtractor):
 246     """Provide base functions for Youtube extractors"""
 247
 248     _RESERVED_NAMES = (
 249         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
 250         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 251         r'browse|oembed|get_video_info|iframe_api|s/player|'
 252         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 253
 254     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 255
 256     _NETRC_MACHINE = 'youtube'
 257
 258     # If True it will raise an error if no login info is provided
 259     _LOGIN_REQUIRED = False
 260
 261     r'''  # Unused since login is broken
 262     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 263     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
 264
 265     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
 266     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
 267     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 268     '''
 269
 270     def _login(self):
 271         """
 272         Attempt to log in to YouTube.
 273         True is returned if successful or skipped.
 274         False is returned if login failed.
 275
 276         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 277         """
 278
 279         def warn(message):
 280             self.report_warning(message)
 281
 282         # username+password login is broken
 283         if (self._LOGIN_REQUIRED
 284                 and self.get_param('cookiefile') is None
 285                 and self.get_param('cookiesfrombrowser') is None):
 286             self.raise_login_required(
 287                 'Login details are needed to download this content', method='cookies')
 288         username, password = self._get_login_info()
 289         if username:
 290             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 291         return
 292
 293         # Everything below this is broken!
 294         r'''
 295         # No authentication to be performed
 296         if username is None:
 297             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 298                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 299             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 300             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 301             return True
 302
 303         login_page = self._download_webpage(
 304             self._LOGIN_URL, None,
 305             note='Downloading login page',
 306             errnote='unable to fetch login page', fatal=False)
 307         if login_page is False:
 308             return
 309
 310         login_form = self._hidden_inputs(login_page)
 311
 312         def req(url, f_req, note, errnote):
 313             data = login_form.copy()
 314             data.update({
 315                 'pstMsg': 1,
 316                 'checkConnection': 'youtube',
 317                 'checkedDomains': 'youtube',
 318                 'hl': 'en',
 319                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 320                 'f.req': json.dumps(f_req),
 321                 'flowName': 'GlifWebSignIn',
 322                 'flowEntry': 'ServiceLogin',
 323                 # TODO: reverse actual botguard identifier generation algo
 324                 'bgRequest': '["identifier",""]',
 325             })
 326             return self._download_json(
 327                 url, None, note=note, errnote=errnote,
 328                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 329                 fatal=False,
 330                 data=urlencode_postdata(data), headers={
 331                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 332                     'Google-Accounts-XSRF': 1,
 333                 })
 334
 335         lookup_req = [
 336             username,
 337             None, [], None, 'US', None, None, 2, False, True,
 338             [
 339                 None, None,
 340                 [2, 1, None, 1,
 341                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 342                  None, [], 4],
 343                 1, [None, None, []], None, None, None, True
 344             ],
 345             username,
 346         ]
 347
 348         lookup_results = req(
 349             self._LOOKUP_URL, lookup_req,
 350             'Looking up account info', 'Unable to look up account info')
 351
 352         if lookup_results is False:
 353             return False
 354
 355         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 356         if not user_hash:
 357             warn('Unable to extract user hash')
 358             return False
 359
 360         challenge_req = [
 361             user_hash,
 362             None, 1, None, [1, None, None, None, [password, None, True]],
 363             [
 364                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 365                 1, [None, None, []], None, None, None, True
 366             ]]
 367
 368         challenge_results = req(
 369             self._CHALLENGE_URL, challenge_req,
 370             'Logging in', 'Unable to log in')
 371
 372         if challenge_results is False:
 373             return
 374
 375         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 376         if login_res:
 377             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 378             warn(
 379                 'Unable to login: %s' % 'Invalid password'
 380                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 381             return False
 382
 383         res = try_get(challenge_results, lambda x: x[0][-1], list)
 384         if not res:
 385             warn('Unable to extract result entry')
 386             return False
 387
 388         login_challenge = try_get(res, lambda x: x[0][0], list)
 389         if login_challenge:
 390             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 391             if challenge_str == 'TWO_STEP_VERIFICATION':
 392                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 393                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 394                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 395                 if status == 'QUOTA_EXCEEDED':
 396                     warn('Exceeded the limit of TFA codes, try later')
 397                     return False
 398
 399                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 400                 if not tl:
 401                     warn('Unable to extract TL')
 402                     return False
 403
 404                 tfa_code = self._get_tfa_info('2-step verification code')
 405
 406                 if not tfa_code:
 407                     warn(
 408                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 409                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 410                     return False
 411
 412                 tfa_code = remove_start(tfa_code, 'G-')
 413
 414                 tfa_req = [
 415                     user_hash, None, 2, None,
 416                     [
 417                         9, None, None, None, None, None, None, None,
 418                         [None, tfa_code, True, 2]
 419                     ]]
 420
 421                 tfa_results = req(
 422                     self._TFA_URL.format(tl), tfa_req,
 423                     'Submitting TFA code', 'Unable to submit TFA code')
 424
 425                 if tfa_results is False:
 426                     return False
 427
 428                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 429                 if tfa_res:
 430                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 431                     warn(
 432                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 433                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 434                     return False
 435
 436                 check_cookie_url = try_get(
 437                     tfa_results, lambda x: x[0][-1][2], compat_str)
 438             else:
 439                 CHALLENGES = {
 440                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 441                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 442                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 443                 }
 444                 challenge = CHALLENGES.get(
 445                     challenge_str,
 446                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 447                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 448                 return False
 449         else:
 450             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 451
 452         if not check_cookie_url:
 453             warn('Unable to extract CheckCookie URL')
 454             return False
 455
 456         check_cookie_results = self._download_webpage(
 457             check_cookie_url, None, 'Checking cookie', fatal=False)
 458
 459         if check_cookie_results is False:
 460             return False
 461
 462         if 'https://myaccount.google.com/' not in check_cookie_results:
 463             warn('Unable to log in')
 464             return False
 465
 466         return True
 467         '''
 468
 469     def _initialize_consent(self):
 470         cookies = self._get_cookies('https://www.youtube.com/')
 471         if cookies.get('__Secure-3PSID'):
 472             return
 473         consent_id = None
 474         consent = cookies.get('CONSENT')
 475         if consent:
 476             if 'YES' in consent.value:
 477                 return
 478             consent_id = self._search_regex(
 479                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 480         if not consent_id:
 481             consent_id = random.randint(100, 999)
 482         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 483
 484     def _real_initialize(self):
 485         self._initialize_consent()
 486         if self._downloader is None:
 487             return
 488         if not self._login():
 489             return
 490
 491     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 492     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 493     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 494
 495     def _get_default_ytcfg(self, client='web'):
 496         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 497
 498     def _get_innertube_host(self, client='web'):
 499         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 500
 501     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 502         # try_get but with fallback to default ytcfg client values when present
 503         _func = lambda y: try_get(y, getter, expected_type)
 504         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 505
 506     def _extract_client_name(self, ytcfg, default_client='web'):
 507         return self._ytcfg_get_safe(
 508             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 509                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 510
 511     def _extract_client_version(self, ytcfg, default_client='web'):
 512         return self._ytcfg_get_safe(
 513             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 514                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 515
 516     def _extract_api_key(self, ytcfg=None, default_client='web'):
 517         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 518
 519     def _extract_context(self, ytcfg=None, default_client='web'):
 520         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 521         context = _get_context(ytcfg)
 522         if context:
 523             return context
 524
 525         context = _get_context(self._get_default_ytcfg(default_client))
 526         if not ytcfg:
 527             return context
 528
 529         # Recreate the client context (required)
 530         context['client'].update({
 531             'clientVersion': self._extract_client_version(ytcfg, default_client),
 532             'clientName': self._extract_client_name(ytcfg, default_client),
 533         })
 534         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 535         if visitor_data:
 536             context['client']['visitorData'] = visitor_data
 537         return context
 538
 539     _SAPISID = None
 540
 541     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 542         time_now = round(time.time())
 543         if self._SAPISID is None:
 544             yt_cookies = self._get_cookies('https://www.youtube.com')
 545             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 546             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 547             sapisid_cookie = dict_get(
 548                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 549             if sapisid_cookie and sapisid_cookie.value:
 550                 self._SAPISID = sapisid_cookie.value
 551                 self.write_debug('Extracted SAPISID cookie')
 552                 # SAPISID cookie is required if not already present
 553                 if not yt_cookies.get('SAPISID'):
 554                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 555                     self._set_cookie(
 556                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 557             else:
 558                 self._SAPISID = False
 559         if not self._SAPISID:
 560             return None
 561         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 562         sapisidhash = hashlib.sha1(
 563             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 564         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 565
 566     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 567                   note='Downloading API JSON', errnote='Unable to download API page',
 568                   context=None, api_key=None, api_hostname=None, default_client='web'):
 569
 570         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 571         data.update(query)
 572         real_headers = self.generate_api_headers(default_client=default_client)
 573         real_headers.update({'content-type': 'application/json'})
 574         if headers:
 575             real_headers.update(headers)
 576         return self._download_json(
 577             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 578             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 579             data=json.dumps(data).encode('utf8'), headers=real_headers,
 580             query={'key': api_key or self._extract_api_key()})
 581
 582     def extract_yt_initial_data(self, video_id, webpage):
 583         return self._parse_json(
 584             self._search_regex(
 585                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 586                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 587             video_id)
 588
 589     @staticmethod
 590     def _extract_session_index(*data):
 591         """
 592         Index of current account in account list.
 593         See: https://github.com/yt-dlp/yt-dlp/pull/519
 594         """
 595         for ytcfg in data:
 596             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 597             if session_index is not None:
 598                 return session_index
 599
 600     # Deprecated?
 601     def _extract_identity_token(self, ytcfg=None, webpage=None):
 602         if ytcfg:
 603             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 604             if token:
 605                 return token
 606         if webpage:
 607             return self._search_regex(
 608                 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 609                 'identity token', default=None, fatal=False)
 610
 611     @staticmethod
 612     def _extract_account_syncid(*args):
 613         """
 614         Extract syncId required to download private playlists of secondary channels
 615         @params response and/or ytcfg
 616         """
 617         for data in args:
 618             # ytcfg includes channel_syncid if on secondary channel
 619             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 620             if delegated_sid:
 621                 return delegated_sid
 622             sync_ids = (try_get(
 623                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 624                        lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
 625             if len(sync_ids) >= 2 and sync_ids[1]:
 626                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 627                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 628                 return sync_ids[0]
 629
 630     @property
 631     def is_authenticated(self):
 632         return bool(self._generate_sapisidhash_header())
 633
 634     def extract_ytcfg(self, video_id, webpage):
 635         if not webpage:
 636             return {}
 637         return self._parse_json(
 638             self._search_regex(
 639                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 640                 default='{}'), video_id, fatal=False) or {}
 641
 642     def generate_api_headers(
 643             self, *, ytcfg=None, account_syncid=None, session_index=None,
 644             visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
 645
 646         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 647         headers = {
 648             'X-YouTube-Client-Name': compat_str(
 649                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 650             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 651             'Origin': origin,
 652             'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
 653             'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
 654             'X-Goog-Visitor-Id': visitor_data or try_get(
 655                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 656         }
 657         if session_index is None:
 658             session_index = self._extract_session_index(ytcfg)
 659         if account_syncid or session_index is not None:
 660             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 661
 662         auth = self._generate_sapisidhash_header(origin)
 663         if auth is not None:
 664             headers['Authorization'] = auth
 665             headers['X-Origin'] = origin
 666         return {h: v for h, v in headers.items() if v is not None}
 667
 668     @staticmethod
 669     def _build_api_continuation_query(continuation, ctp=None):
 670         query = {
 671             'continuation': continuation
 672         }
 673         # TODO: Inconsistency with clickTrackingParams.
 674         # Currently we have a fixed ctp contained within context (from ytcfg)
 675         # and a ctp in root query for continuation.
 676         if ctp:
 677             query['clickTracking'] = {'clickTrackingParams': ctp}
 678         return query
 679
 680     @classmethod
 681     def _extract_next_continuation_data(cls, renderer):
 682         next_continuation = try_get(
 683             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 684                        lambda x: x['continuation']['reloadContinuationData']), dict)
 685         if not next_continuation:
 686             return
 687         continuation = next_continuation.get('continuation')
 688         if not continuation:
 689             return
 690         ctp = next_continuation.get('clickTrackingParams')
 691         return cls._build_api_continuation_query(continuation, ctp)
 692
 693     @classmethod
 694     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 695         if isinstance(continuation_ep, dict):
 696             continuation = try_get(
 697                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 698             if not continuation:
 699                 return
 700             ctp = continuation_ep.get('clickTrackingParams')
 701             return cls._build_api_continuation_query(continuation, ctp)
 702
 703     @classmethod
 704     def _extract_continuation(cls, renderer):
 705         next_continuation = cls._extract_next_continuation_data(renderer)
 706         if next_continuation:
 707             return next_continuation
 708
 709         contents = []
 710         for key in ('contents', 'items'):
 711             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 712
 713         for content in contents:
 714             if not isinstance(content, dict):
 715                 continue
 716             continuation_ep = try_get(
 717                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 718                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 719                 dict)
 720             continuation = cls._extract_continuation_ep_data(continuation_ep)
 721             if continuation:
 722                 return continuation
 723
 724     @classmethod
 725     def _extract_alerts(cls, data):
 726         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 727             if not isinstance(alert_dict, dict):
 728                 continue
 729             for alert in alert_dict.values():
 730                 alert_type = alert.get('type')
 731                 if not alert_type:
 732                     continue
 733                 message = cls._get_text(alert, 'text')
 734                 if message:
 735                     yield alert_type, message
 736
 737     def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
 738         errors = []
 739         warnings = []
 740         for alert_type, alert_message in alerts:
 741             if alert_type.lower() == 'error' and fatal:
 742                 errors.append([alert_type, alert_message])
 743             else:
 744                 warnings.append([alert_type, alert_message])
 745
 746         for alert_type, alert_message in (warnings + errors[:-1]):
 747             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
 748         if errors:
 749             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 750
 751     def _extract_and_report_alerts(self, data, *args, **kwargs):
 752         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 753
 754     def _extract_badges(self, renderer: dict):
 755         badges = set()
 756         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 757             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 758             if label:
 759                 badges.add(label.lower())
 760         return badges
 761
 762     @staticmethod
 763     def _get_text(data, *path_list, max_runs=None):
 764         for path in path_list or [None]:
 765             if path is None:
 766                 obj = [data]
 767             else:
 768                 obj = traverse_obj(data, path, default=[])
 769                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 770                     obj = [obj]
 771             for item in obj:
 772                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 773                 if text:
 774                     return text
 775                 runs = try_get(item, lambda x: x['runs'], list) or []
 776                 if not runs and isinstance(item, list):
 777                     runs = item
 778
 779                 runs = runs[:min(len(runs), max_runs or len(runs))]
 780                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 781                 if text:
 782                     return text
 783
 784     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 785                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 786                           default_client='web'):
 787         response = None
 788         last_error = None
 789         count = -1
 790         retries = self.get_param('extractor_retries', 3)
 791         if check_get_keys is None:
 792             check_get_keys = []
 793         while count < retries:
 794             count += 1
 795             if last_error:
 796                 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
 797             try:
 798                 response = self._call_api(
 799                     ep=ep, fatal=True, headers=headers,
 800                     video_id=item_id, query=query,
 801                     context=self._extract_context(ytcfg, default_client),
 802                     api_key=self._extract_api_key(ytcfg, default_client),
 803                     api_hostname=api_hostname, default_client=default_client,
 804                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 805             except ExtractorError as e:
 806                 if isinstance(e.cause, network_exceptions):
 807                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 808                         e.cause.seek(0)
 809                         yt_error = try_get(
 810                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 811                             lambda x: x['error']['message'], compat_str)
 812                         if yt_error:
 813                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 814                     # Downloading page may result in intermittent 5xx HTTP error
 815                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 816                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 817                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 818                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 819                         last_error = error_to_compat_str(e.cause or e.msg)
 820                         if count < retries:
 821                             continue
 822                 if fatal:
 823                     raise
 824                 else:
 825                     self.report_warning(error_to_compat_str(e))
 826                     return
 827
 828             else:
 829                 # Youtube may send alerts if there was an issue with the continuation page
 830                 try:
 831                     self._extract_and_report_alerts(response, expected=False, only_once=True)
 832                 except ExtractorError as e:
 833                     # YouTube servers may return errors we want to retry on in a 200 OK response
 834                     # See: https://github.com/yt-dlp/yt-dlp/issues/839
 835                     if 'unknown error' in e.msg.lower():
 836                         last_error = e.msg
 837                         continue
 838                     if fatal:
 839                         raise
 840                     self.report_warning(error_to_compat_str(e))
 841                     return
 842                 if not check_get_keys or dict_get(response, check_get_keys):
 843                     break
 844                 # Youtube sometimes sends incomplete data
 845                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 846                 last_error = 'Incomplete data received'
 847                 if count >= retries:
 848                     if fatal:
 849                         raise ExtractorError(last_error)
 850                     else:
 851                         self.report_warning(last_error)
 852                         return
 853         return response
 854
 855     @staticmethod
 856     def is_music_url(url):
 857         return re.match(r'https?://music\.youtube\.com/', url) is not None
 858
 859     def _extract_video(self, renderer):
 860         video_id = renderer.get('videoId')
 861         title = self._get_text(renderer, 'title')
 862         description = self._get_text(renderer, 'descriptionSnippet')
 863         duration = parse_duration(self._get_text(
 864             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 865         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 866         view_count = str_to_int(self._search_regex(
 867             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 868             'view count', default=None))
 869
 870         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 871
 872         return {
 873             '_type': 'url',
 874             'ie_key': YoutubeIE.ie_key(),
 875             'id': video_id,
 876             'url': f'https://www.youtube.com/watch?v={video_id}',
 877             'title': title,
 878             'description': description,
 879             'duration': duration,
 880             'view_count': view_count,
 881             'uploader': uploader,
 882         }
 883
 884
 885 class YoutubeIE(YoutubeBaseInfoExtractor):
 886     IE_DESC = 'YouTube.com'
 887     _INVIDIOUS_SITES = (
 888         # invidious-redirect websites
 889         r'(?:www\.)?redirect\.invidious\.io',
 890         r'(?:(?:www|dev)\.)?invidio\.us',
 891         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 892         r'(?:www\.)?invidious\.pussthecat\.org',
 893         r'(?:www\.)?invidious\.zee\.li',
 894         r'(?:www\.)?invidious\.ethibox\.fr',
 895         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 896         # youtube-dl invidious instances list
 897         r'(?:(?:www|no)\.)?invidiou\.sh',
 898         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 899         r'(?:www\.)?invidious\.kabi\.tk',
 900         r'(?:www\.)?invidious\.mastodon\.host',
 901         r'(?:www\.)?invidious\.zapashcanon\.fr',
 902         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 903         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 904         r'(?:www\.)?invidious\.himiko\.cloud',
 905         r'(?:www\.)?invidious\.reallyancient\.tech',
 906         r'(?:www\.)?invidious\.tube',
 907         r'(?:www\.)?invidiou\.site',
 908         r'(?:www\.)?invidious\.site',
 909         r'(?:www\.)?invidious\.xyz',
 910         r'(?:www\.)?invidious\.nixnet\.xyz',
 911         r'(?:www\.)?invidious\.048596\.xyz',
 912         r'(?:www\.)?invidious\.drycat\.fr',
 913         r'(?:www\.)?inv\.skyn3t\.in',
 914         r'(?:www\.)?tube\.poal\.co',
 915         r'(?:www\.)?tube\.connect\.cafe',
 916         r'(?:www\.)?vid\.wxzm\.sx',
 917         r'(?:www\.)?vid\.mint\.lgbt',
 918         r'(?:www\.)?vid\.puffyan\.us',
 919         r'(?:www\.)?yewtu\.be',
 920         r'(?:www\.)?yt\.elukerio\.org',
 921         r'(?:www\.)?yt\.lelux\.fi',
 922         r'(?:www\.)?invidious\.ggc-project\.de',
 923         r'(?:www\.)?yt\.maisputain\.ovh',
 924         r'(?:www\.)?ytprivate\.com',
 925         r'(?:www\.)?invidious\.13ad\.de',
 926         r'(?:www\.)?invidious\.toot\.koeln',
 927         r'(?:www\.)?invidious\.fdn\.fr',
 928         r'(?:www\.)?watch\.nettohikari\.com',
 929         r'(?:www\.)?invidious\.namazso\.eu',
 930         r'(?:www\.)?invidious\.silkky\.cloud',
 931         r'(?:www\.)?invidious\.exonip\.de',
 932         r'(?:www\.)?invidious\.riverside\.rocks',
 933         r'(?:www\.)?invidious\.blamefran\.net',
 934         r'(?:www\.)?invidious\.moomoo\.de',
 935         r'(?:www\.)?ytb\.trom\.tf',
 936         r'(?:www\.)?yt\.cyberhost\.uk',
 937         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 938         r'(?:www\.)?qklhadlycap4cnod\.onion',
 939         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 940         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 941         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 942         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 943         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 944         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 945         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 946         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 947         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 948         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 949     )
 950     _VALID_URL = r"""(?x)^
 951                      (
 952                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 953                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 954                             (?:www\.)?deturl\.com/www\.youtube\.com|
 955                             (?:www\.)?pwnyoutube\.com|
 956                             (?:www\.)?hooktube\.com|
 957                             (?:www\.)?yourepeat\.com|
 958                             tube\.majestyc\.net|
 959                             %(invidious)s|
 960                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 961                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 962                          (?:                                                  # the various things that can precede the ID:
 963                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 964                              |(?:                                             # or the v= param in all its forms
 965                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 966                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 967                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 968                                  v=
 969                              )
 970                          ))
 971                          |(?:
 972                             youtu\.be|                                        # just youtu.be/xxxx
 973                             vid\.plus|                                        # or vid.plus/xxxx
 974                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 975                             %(invidious)s
 976                          )/
 977                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 978                          )
 979                      )?                                                       # all until now is optional -> you can pass the naked ID
 980                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 981                      (?(1).+)?                                                # if we found the ID, everything can follow
 982                      (?:\#|$)""" % {
 983         'invidious': '|'.join(_INVIDIOUS_SITES),
 984     }
 985     _PLAYER_INFO_RE = (
 986         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 987         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 988         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 989     )
 990     _formats = {
 991         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 992         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 993         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 994         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 995         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 996         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 997         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 998         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 999         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
1000         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
1001         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1002         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
1003         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1004         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
1005         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1006         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
1007         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1008         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
1009
1010
1011         # 3D videos
1012         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1013         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1014         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1015         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1016         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1017         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1018         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1019
1020         # Apple HTTP Live Streaming
1021         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1022         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1023         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1024         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1025         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1026         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1027         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1028         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1029
1030         # DASH mp4 video
1031         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1032         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1033         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1034         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1035         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1036         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1037         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1038         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1039         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1040         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1041         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1042         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1043
1044         # Dash mp4 audio
1045         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1046         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1047         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1048         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1049         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1050         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1051         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1052
1053         # Dash webm
1054         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1055         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1056         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1057         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1058         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1059         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1060         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1061         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1062         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1063         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1064         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1065         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1066         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1067         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1068         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1069         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1070         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1071         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1072         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1073         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1074         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1075         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1076
1077         # Dash webm audio
1078         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1079         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1080
1081         # Dash webm audio with opus inside
1082         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1083         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1084         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1085
1086         # RTMP (unnamed)
1087         '_rtmp': {'protocol': 'rtmp'},
1088
1089         # av01 video only formats sometimes served with "unknown" codecs
1090         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1091         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1092         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1093         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1094         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1095         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1096         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1097         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1098     }
1099     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1100
1101     _GEO_BYPASS = False
1102
1103     IE_NAME = 'youtube'
1104     _TESTS = [
1105         {
1106             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1107             'info_dict': {
1108                 'id': 'BaW_jenozKc',
1109                 'ext': 'mp4',
1110                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1111                 'uploader': 'Philipp Hagemeister',
1112                 'uploader_id': 'phihag',
1113                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1114                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1115                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1116                 'upload_date': '20121002',
1117                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1118                 'categories': ['Science & Technology'],
1119                 'tags': ['youtube-dl'],
1120                 'duration': 10,
1121                 'view_count': int,
1122                 'like_count': int,
1123                 'dislike_count': int,
1124                 'start_time': 1,
1125                 'end_time': 9,
1126             }
1127         },
1128         {
1129             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1130             'note': 'Embed-only video (#1746)',
1131             'info_dict': {
1132                 'id': 'yZIXLfi8CZQ',
1133                 'ext': 'mp4',
1134                 'upload_date': '20120608',
1135                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1136                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1137                 'uploader': 'SET India',
1138                 'uploader_id': 'setindia',
1139                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1140                 'age_limit': 18,
1141             },
1142             'skip': 'Private video',
1143         },
1144         {
1145             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1146             'note': 'Use the first video ID in the URL',
1147             'info_dict': {
1148                 'id': 'BaW_jenozKc',
1149                 'ext': 'mp4',
1150                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1151                 'uploader': 'Philipp Hagemeister',
1152                 'uploader_id': 'phihag',
1153                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1154                 'upload_date': '20121002',
1155                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1156                 'categories': ['Science & Technology'],
1157                 'tags': ['youtube-dl'],
1158                 'duration': 10,
1159                 'view_count': int,
1160                 'like_count': int,
1161                 'dislike_count': int,
1162             },
1163             'params': {
1164                 'skip_download': True,
1165             },
1166         },
1167         {
1168             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1169             'note': '256k DASH audio (format 141) via DASH manifest',
1170             'info_dict': {
1171                 'id': 'a9LDPn-MO4I',
1172                 'ext': 'm4a',
1173                 'upload_date': '20121002',
1174                 'uploader_id': '8KVIDEO',
1175                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1176                 'description': '',
1177                 'uploader': '8KVIDEO',
1178                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1179             },
1180             'params': {
1181                 'youtube_include_dash_manifest': True,
1182                 'format': '141',
1183             },
1184             'skip': 'format 141 not served anymore',
1185         },
1186         # DASH manifest with encrypted signature
1187         {
1188             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1189             'info_dict': {
1190                 'id': 'IB3lcPjvWLA',
1191                 'ext': 'm4a',
1192                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1193                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1194                 'duration': 244,
1195                 'uploader': 'AfrojackVEVO',
1196                 'uploader_id': 'AfrojackVEVO',
1197                 'upload_date': '20131011',
1198                 'abr': 129.495,
1199             },
1200             'params': {
1201                 'youtube_include_dash_manifest': True,
1202                 'format': '141/bestaudio[ext=m4a]',
1203             },
1204         },
1205         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1206         {
1207             'note': 'Embed allowed age-gate video',
1208             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1209             'info_dict': {
1210                 'id': 'HtVdAasjOgU',
1211                 'ext': 'mp4',
1212                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1213                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1214                 'duration': 142,
1215                 'uploader': 'The Witcher',
1216                 'uploader_id': 'WitcherGame',
1217                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1218                 'upload_date': '20140605',
1219                 'age_limit': 18,
1220             },
1221         },
1222         {
1223             'note': 'Age-gate video with embed allowed in public site',
1224             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1225             'info_dict': {
1226                 'id': 'HsUATh_Nc2U',
1227                 'ext': 'mp4',
1228                 'title': 'Godzilla 2 (Official Video)',
1229                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1230                 'upload_date': '20200408',
1231                 'uploader_id': 'FlyingKitty900',
1232                 'uploader': 'FlyingKitty',
1233                 'age_limit': 18,
1234             },
1235         },
1236         {
1237             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1238             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1239             'info_dict': {
1240                 'id': 'Tq92D6wQ1mg',
1241                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1242                 'ext': 'mp4',
1243                 'upload_date': '20191227',
1244                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1245                 'uploader': 'Projekt Melody',
1246                 'description': 'md5:17eccca93a786d51bc67646756894066',
1247                 'age_limit': 18,
1248             },
1249         },
1250         {
1251             'note': 'Non-Agegated non-embeddable video',
1252             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1253             'info_dict': {
1254                 'id': 'MeJVWBSsPAY',
1255                 'ext': 'mp4',
1256                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1257                 'uploader': 'Herr Lurik',
1258                 'uploader_id': 'st3in234',
1259                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1260                 'upload_date': '20130730',
1261             },
1262         },
1263         {
1264             'note': 'Non-bypassable age-gated video',
1265             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1266             'only_matching': True,
1267         },
1268         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1269         # YouTube Red ad is not captured for creator
1270         {
1271             'url': '__2ABJjxzNo',
1272             'info_dict': {
1273                 'id': '__2ABJjxzNo',
1274                 'ext': 'mp4',
1275                 'duration': 266,
1276                 'upload_date': '20100430',
1277                 'uploader_id': 'deadmau5',
1278                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1279                 'creator': 'deadmau5',
1280                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1281                 'uploader': 'deadmau5',
1282                 'title': 'Deadmau5 - Some Chords (HD)',
1283                 'alt_title': 'Some Chords',
1284             },
1285             'expected_warnings': [
1286                 'DASH manifest missing',
1287             ]
1288         },
1289         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1290         {
1291             'url': 'lqQg6PlCWgI',
1292             'info_dict': {
1293                 'id': 'lqQg6PlCWgI',
1294                 'ext': 'mp4',
1295                 'duration': 6085,
1296                 'upload_date': '20150827',
1297                 'uploader_id': 'olympic',
1298                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1299                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1300                 'uploader': 'Olympics',
1301                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1302             },
1303             'params': {
1304                 'skip_download': 'requires avconv',
1305             }
1306         },
1307         # Non-square pixels
1308         {
1309             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1310             'info_dict': {
1311                 'id': '_b-2C3KPAM0',
1312                 'ext': 'mp4',
1313                 'stretched_ratio': 16 / 9.,
1314                 'duration': 85,
1315                 'upload_date': '20110310',
1316                 'uploader_id': 'AllenMeow',
1317                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1318                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1319                 'uploader': '孫ᄋᄅ',
1320                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1321             },
1322         },
1323         # url_encoded_fmt_stream_map is empty string
1324         {
1325             'url': 'qEJwOuvDf7I',
1326             'info_dict': {
1327                 'id': 'qEJwOuvDf7I',
1328                 'ext': 'webm',
1329                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1330                 'description': '',
1331                 'upload_date': '20150404',
1332                 'uploader_id': 'spbelect',
1333                 'uploader': 'Наблюдатели Петербурга',
1334             },
1335             'params': {
1336                 'skip_download': 'requires avconv',
1337             },
1338             'skip': 'This live event has ended.',
1339         },
1340         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1341         {
1342             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1343             'info_dict': {
1344                 'id': 'FIl7x6_3R5Y',
1345                 'ext': 'webm',
1346                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1347                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1348                 'duration': 220,
1349                 'upload_date': '20150625',
1350                 'uploader_id': 'dorappi2000',
1351                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1352                 'uploader': 'dorappi2000',
1353                 'formats': 'mincount:31',
1354             },
1355             'skip': 'not actual anymore',
1356         },
1357         # DASH manifest with segment_list
1358         {
1359             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1360             'md5': '8ce563a1d667b599d21064e982ab9e31',
1361             'info_dict': {
1362                 'id': 'CsmdDsKjzN8',
1363                 'ext': 'mp4',
1364                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1365                 'uploader': 'Airtek',
1366                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1367                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1368                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1369             },
1370             'params': {
1371                 'youtube_include_dash_manifest': True,
1372                 'format': '135',  # bestvideo
1373             },
1374             'skip': 'This live event has ended.',
1375         },
1376         {
1377             # Multifeed videos (multiple cameras), URL is for Main Camera
1378             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1379             'info_dict': {
1380                 'id': 'jvGDaLqkpTg',
1381                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1382                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1383             },
1384             'playlist': [{
1385                 'info_dict': {
1386                     'id': 'jvGDaLqkpTg',
1387                     'ext': 'mp4',
1388                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1389                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1390                     'duration': 10643,
1391                     'upload_date': '20161111',
1392                     'uploader': 'Team PGP',
1393                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1394                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1395                 },
1396             }, {
1397                 'info_dict': {
1398                     'id': '3AKt1R1aDnw',
1399                     'ext': 'mp4',
1400                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1401                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1402                     'duration': 10991,
1403                     'upload_date': '20161111',
1404                     'uploader': 'Team PGP',
1405                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1406                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1407                 },
1408             }, {
1409                 'info_dict': {
1410                     'id': 'RtAMM00gpVc',
1411                     'ext': 'mp4',
1412                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1413                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1414                     'duration': 10995,
1415                     'upload_date': '20161111',
1416                     'uploader': 'Team PGP',
1417                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1418                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1419                 },
1420             }, {
1421                 'info_dict': {
1422                     'id': '6N2fdlP3C5U',
1423                     'ext': 'mp4',
1424                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1425                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1426                     'duration': 10990,
1427                     'upload_date': '20161111',
1428                     'uploader': 'Team PGP',
1429                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1430                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1431                 },
1432             }],
1433             'params': {
1434                 'skip_download': True,
1435             },
1436             'skip': 'Not multifeed anymore',
1437         },
1438         {
1439             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1440             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1441             'info_dict': {
1442                 'id': 'gVfLd0zydlo',
1443                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1444             },
1445             'playlist_count': 2,
1446             'skip': 'Not multifeed anymore',
1447         },
1448         {
1449             'url': 'https://vid.plus/FlRa-iH7PGw',
1450             'only_matching': True,
1451         },
1452         {
1453             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1454             'only_matching': True,
1455         },
1456         {
1457             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1458             # Also tests cut-off URL expansion in video description (see
1459             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1460             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1461             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1462             'info_dict': {
1463                 'id': 'lsguqyKfVQg',
1464                 'ext': 'mp4',
1465                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1466                 'alt_title': 'Dark Walk',
1467                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1468                 'duration': 133,
1469                 'upload_date': '20151119',
1470                 'uploader_id': 'IronSoulElf',
1471                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1472                 'uploader': 'IronSoulElf',
1473                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1474                 'track': 'Dark Walk',
1475                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1476                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1477             },
1478             'params': {
1479                 'skip_download': True,
1480             },
1481         },
1482         {
1483             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1484             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1485             'only_matching': True,
1486         },
1487         {
1488             # Video with yt:stretch=17:0
1489             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1490             'info_dict': {
1491                 'id': 'Q39EVAstoRM',
1492                 'ext': 'mp4',
1493                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1494                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1495                 'upload_date': '20151107',
1496                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1497                 'uploader': 'CH GAMER DROID',
1498             },
1499             'params': {
1500                 'skip_download': True,
1501             },
1502             'skip': 'This video does not exist.',
1503         },
1504         {
1505             # Video with incomplete 'yt:stretch=16:'
1506             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1507             'only_matching': True,
1508         },
1509         {
1510             # Video licensed under Creative Commons
1511             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1512             'info_dict': {
1513                 'id': 'M4gD1WSo5mA',
1514                 'ext': 'mp4',
1515                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1516                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1517                 'duration': 721,
1518                 'upload_date': '20150127',
1519                 'uploader_id': 'BerkmanCenter',
1520                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1521                 'uploader': 'The Berkman Klein Center for Internet & Society',
1522                 'license': 'Creative Commons Attribution license (reuse allowed)',
1523             },
1524             'params': {
1525                 'skip_download': True,
1526             },
1527         },
1528         {
1529             # Channel-like uploader_url
1530             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1531             'info_dict': {
1532                 'id': 'eQcmzGIKrzg',
1533                 'ext': 'mp4',
1534                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1535                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1536                 'duration': 4060,
1537                 'upload_date': '20151119',
1538                 'uploader': 'Bernie Sanders',
1539                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1540                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1541                 'license': 'Creative Commons Attribution license (reuse allowed)',
1542             },
1543             'params': {
1544                 'skip_download': True,
1545             },
1546         },
1547         {
1548             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1549             'only_matching': True,
1550         },
1551         {
1552             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1553             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1554             'only_matching': True,
1555         },
1556         {
1557             # Rental video preview
1558             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1559             'info_dict': {
1560                 'id': 'uGpuVWrhIzE',
1561                 'ext': 'mp4',
1562                 'title': 'Piku - Trailer',
1563                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1564                 'upload_date': '20150811',
1565                 'uploader': 'FlixMatrix',
1566                 'uploader_id': 'FlixMatrixKaravan',
1567                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1568                 'license': 'Standard YouTube License',
1569             },
1570             'params': {
1571                 'skip_download': True,
1572             },
1573             'skip': 'This video is not available.',
1574         },
1575         {
1576             # YouTube Red video with episode data
1577             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1578             'info_dict': {
1579                 'id': 'iqKdEhx-dD4',
1580                 'ext': 'mp4',
1581                 'title': 'Isolation - Mind Field (Ep 1)',
1582                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1583                 'duration': 2085,
1584                 'upload_date': '20170118',
1585                 'uploader': 'Vsauce',
1586                 'uploader_id': 'Vsauce',
1587                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1588                 'series': 'Mind Field',
1589                 'season_number': 1,
1590                 'episode_number': 1,
1591             },
1592             'params': {
1593                 'skip_download': True,
1594             },
1595             'expected_warnings': [
1596                 'Skipping DASH manifest',
1597             ],
1598         },
1599         {
1600             # The following content has been identified by the YouTube community
1601             # as inappropriate or offensive to some audiences.
1602             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1603             'info_dict': {
1604                 'id': '6SJNVb0GnPI',
1605                 'ext': 'mp4',
1606                 'title': 'Race Differences in Intelligence',
1607                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1608                 'duration': 965,
1609                 'upload_date': '20140124',
1610                 'uploader': 'New Century Foundation',
1611                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1612                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1613             },
1614             'params': {
1615                 'skip_download': True,
1616             },
1617             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1618         },
1619         {
1620             # itag 212
1621             'url': '1t24XAntNCY',
1622             'only_matching': True,
1623         },
1624         {
1625             # geo restricted to JP
1626             'url': 'sJL6WA-aGkQ',
1627             'only_matching': True,
1628         },
1629         {
1630             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1631             'only_matching': True,
1632         },
1633         {
1634             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1635             'only_matching': True,
1636         },
1637         {
1638             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1639             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1640             'only_matching': True,
1641         },
1642         {
1643             # DRM protected
1644             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1645             'only_matching': True,
1646         },
1647         {
1648             # Video with unsupported adaptive stream type formats
1649             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1650             'info_dict': {
1651                 'id': 'Z4Vy8R84T1U',
1652                 'ext': 'mp4',
1653                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1654                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1655                 'duration': 433,
1656                 'upload_date': '20130923',
1657                 'uploader': 'Amelia Putri Harwita',
1658                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1659                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1660                 'formats': 'maxcount:10',
1661             },
1662             'params': {
1663                 'skip_download': True,
1664                 'youtube_include_dash_manifest': False,
1665             },
1666             'skip': 'not actual anymore',
1667         },
1668         {
1669             # Youtube Music Auto-generated description
1670             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1671             'info_dict': {
1672                 'id': 'MgNrAu2pzNs',
1673                 'ext': 'mp4',
1674                 'title': 'Voyeur Girl',
1675                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1676                 'upload_date': '20190312',
1677                 'uploader': 'Stephen - Topic',
1678                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1679                 'artist': 'Stephen',
1680                 'track': 'Voyeur Girl',
1681                 'album': 'it\'s too much love to know my dear',
1682                 'release_date': '20190313',
1683                 'release_year': 2019,
1684             },
1685             'params': {
1686                 'skip_download': True,
1687             },
1688         },
1689         {
1690             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1691             'only_matching': True,
1692         },
1693         {
1694             # invalid -> valid video id redirection
1695             'url': 'DJztXj2GPfl',
1696             'info_dict': {
1697                 'id': 'DJztXj2GPfk',
1698                 'ext': 'mp4',
1699                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1700                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1701                 'upload_date': '20090125',
1702                 'uploader': 'Prochorowka',
1703                 'uploader_id': 'Prochorowka',
1704                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1705                 'artist': 'Panjabi MC',
1706                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1707                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1708             },
1709             'params': {
1710                 'skip_download': True,
1711             },
1712             'skip': 'Video unavailable',
1713         },
1714         {
1715             # empty description results in an empty string
1716             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1717             'info_dict': {
1718                 'id': 'x41yOUIvK2k',
1719                 'ext': 'mp4',
1720                 'title': 'IMG 3456',
1721                 'description': '',
1722                 'upload_date': '20170613',
1723                 'uploader_id': 'ElevageOrVert',
1724                 'uploader': 'ElevageOrVert',
1725             },
1726             'params': {
1727                 'skip_download': True,
1728             },
1729         },
1730         {
1731             # with '};' inside yt initial data (see [1])
1732             # see [2] for an example with '};' inside ytInitialPlayerResponse
1733             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1734             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1735             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1736             'info_dict': {
1737                 'id': 'CHqg6qOn4no',
1738                 'ext': 'mp4',
1739                 'title': 'Part 77   Sort a list of simple types in c#',
1740                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1741                 'upload_date': '20130831',
1742                 'uploader_id': 'kudvenkat',
1743                 'uploader': 'kudvenkat',
1744             },
1745             'params': {
1746                 'skip_download': True,
1747             },
1748         },
1749         {
1750             # another example of '};' in ytInitialData
1751             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1752             'only_matching': True,
1753         },
1754         {
1755             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1756             'only_matching': True,
1757         },
1758         {
1759             # https://github.com/ytdl-org/youtube-dl/pull/28094
1760             'url': 'OtqTfy26tG0',
1761             'info_dict': {
1762                 'id': 'OtqTfy26tG0',
1763                 'ext': 'mp4',
1764                 'title': 'Burn Out',
1765                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1766                 'upload_date': '20141120',
1767                 'uploader': 'The Cinematic Orchestra - Topic',
1768                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1769                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1770                 'artist': 'The Cinematic Orchestra',
1771                 'track': 'Burn Out',
1772                 'album': 'Every Day',
1773                 'release_data': None,
1774                 'release_year': None,
1775             },
1776             'params': {
1777                 'skip_download': True,
1778             },
1779         },
1780         {
1781             # controversial video, only works with bpctr when authenticated with cookies
1782             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1783             'only_matching': True,
1784         },
1785         {
1786             # controversial video, requires bpctr/contentCheckOk
1787             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1788             'info_dict': {
1789                 'id': 'SZJvDhaSDnc',
1790                 'ext': 'mp4',
1791                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1792                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1793                 'uploader': 'CBS This Morning',
1794                 'uploader_id': 'CBSThisMorning',
1795                 'upload_date': '20140716',
1796                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1797             }
1798         },
1799         {
1800             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1801             'url': 'cBvYw8_A0vQ',
1802             'info_dict': {
1803                 'id': 'cBvYw8_A0vQ',
1804                 'ext': 'mp4',
1805                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1806                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1807                 'upload_date': '20201120',
1808                 'uploader': 'Walk around Japan',
1809                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1810                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1811             },
1812             'params': {
1813                 'skip_download': True,
1814             },
1815         }, {
1816             # Has multiple audio streams
1817             'url': 'WaOKSUlf4TM',
1818             'only_matching': True
1819         }, {
1820             # Requires Premium: has format 141 when requested using YTM url
1821             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1822             'only_matching': True
1823         }, {
1824             # multiple subtitles with same lang_code
1825             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1826             'only_matching': True,
1827         }, {
1828             # Force use android client fallback
1829             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1830             'info_dict': {
1831                 'id': 'YOelRv7fMxY',
1832                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1833                 'ext': '3gp',
1834                 'upload_date': '20210624',
1835                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1836                 'uploader': 'colinfurze',
1837                 'uploader_id': 'colinfurze',
1838                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1839                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1840             },
1841             'params': {
1842                 'format': '17',  # 3gp format available on android
1843                 'extractor_args': {'youtube': {'player_client': ['android']}},
1844             },
1845         },
1846         {
1847             # Skip download of additional client configs (remix client config in this case)
1848             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1849             'only_matching': True,
1850             'params': {
1851                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1852             },
1853         }, {
1854             # shorts
1855             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1856             'only_matching': True,
1857         },
1858     ]
1859
1860     @classmethod
1861     def suitable(cls, url):
1862         from ..utils import parse_qs
1863
1864         qs = parse_qs(url)
1865         if qs.get('list', [None])[0]:
1866             return False
1867         return super(YoutubeIE, cls).suitable(url)
1868
1869     def __init__(self, *args, **kwargs):
1870         super(YoutubeIE, self).__init__(*args, **kwargs)
1871         self._code_cache = {}
1872         self._player_cache = {}
1873
1874     def _extract_player_url(self, *ytcfgs, webpage=None):
1875         player_url = traverse_obj(
1876             ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1877             get_all=False, expected_type=compat_str)
1878         if not player_url:
1879             return
1880         if player_url.startswith('//'):
1881             player_url = 'https:' + player_url
1882         elif not re.match(r'https?://', player_url):
1883             player_url = compat_urlparse.urljoin(
1884                 'https://www.youtube.com', player_url)
1885         return player_url
1886
1887     def _download_player_url(self, video_id, fatal=False):
1888         res = self._download_webpage(
1889             'https://www.youtube.com/iframe_api',
1890             note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1891         if res:
1892             player_version = self._search_regex(
1893                 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1894             if player_version:
1895                 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1896
1897     def _signature_cache_id(self, example_sig):
1898         """ Return a string representation of a signature """
1899         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1900
1901     @classmethod
1902     def _extract_player_info(cls, player_url):
1903         for player_re in cls._PLAYER_INFO_RE:
1904             id_m = re.search(player_re, player_url)
1905             if id_m:
1906                 break
1907         else:
1908             raise ExtractorError('Cannot identify player %r' % player_url)
1909         return id_m.group('id')
1910
1911     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1912         player_id = self._extract_player_info(player_url)
1913         if player_id not in self._code_cache:
1914             code = self._download_webpage(
1915                 player_url, video_id, fatal=fatal,
1916                 note='Downloading player ' + player_id,
1917                 errnote='Download of %s failed' % player_url)
1918             if code:
1919                 self._code_cache[player_id] = code
1920         return player_id in self._code_cache
1921
1922     def _extract_signature_function(self, video_id, player_url, example_sig):
1923         player_id = self._extract_player_info(player_url)
1924
1925         # Read from filesystem cache
1926         func_id = 'js_%s_%s' % (
1927             player_id, self._signature_cache_id(example_sig))
1928         assert os.path.basename(func_id) == func_id
1929
1930         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1931         if cache_spec is not None:
1932             return lambda s: ''.join(s[i] for i in cache_spec)
1933
1934         if self._load_player(video_id, player_url):
1935             code = self._code_cache[player_id]
1936             res = self._parse_sig_js(code)
1937
1938             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1939             cache_res = res(test_string)
1940             cache_spec = [ord(c) for c in cache_res]
1941
1942             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1943             return res
1944
1945     def _print_sig_code(self, func, example_sig):
1946         def gen_sig_code(idxs):
1947             def _genslice(start, end, step):
1948                 starts = '' if start == 0 else str(start)
1949                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1950                 steps = '' if step == 1 else (':%d' % step)
1951                 return 's[%s%s%s]' % (starts, ends, steps)
1952
1953             step = None
1954             # Quelch pyflakes warnings - start will be set when step is set
1955             start = '(Never used)'
1956             for i, prev in zip(idxs[1:], idxs[:-1]):
1957                 if step is not None:
1958                     if i - prev == step:
1959                         continue
1960                     yield _genslice(start, prev, step)
1961                     step = None
1962                     continue
1963                 if i - prev in [-1, 1]:
1964                     step = i - prev
1965                     start = prev
1966                     continue
1967                 else:
1968                     yield 's[%d]' % prev
1969             if step is None:
1970                 yield 's[%d]' % i
1971             else:
1972                 yield _genslice(start, i, step)
1973
1974         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1975         cache_res = func(test_string)
1976         cache_spec = [ord(c) for c in cache_res]
1977         expr_code = ' + '.join(gen_sig_code(cache_spec))
1978         signature_id_tuple = '(%s)' % (
1979             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1980         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1981                 '    return %s\n') % (signature_id_tuple, expr_code)
1982         self.to_screen('Extracted signature function:\n' + code)
1983
1984     def _parse_sig_js(self, jscode):
1985         funcname = self._search_regex(
1986             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1987              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1988              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1989              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1990              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1991              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1992              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1993              # Obsolete patterns
1994              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1995              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1996              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1997              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1998              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1999              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2000              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
2001              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
2002             jscode, 'Initial JS player signature function name', group='sig')
2003
2004         jsi = JSInterpreter(jscode)
2005         initial_function = jsi.extract_function(funcname)
2006         return lambda s: initial_function([s])
2007
2008     def _decrypt_signature(self, s, video_id, player_url):
2009         """Turn the encrypted s field into a working signature"""
2010
2011         if player_url is None:
2012             raise ExtractorError('Cannot decrypt signature without player_url')
2013
2014         try:
2015             player_id = (player_url, self._signature_cache_id(s))
2016             if player_id not in self._player_cache:
2017                 func = self._extract_signature_function(
2018                     video_id, player_url, s
2019                 )
2020                 self._player_cache[player_id] = func
2021             func = self._player_cache[player_id]
2022             if self.get_param('youtube_print_sig_code'):
2023                 self._print_sig_code(func, s)
2024             return func(s)
2025         except Exception as e:
2026             tb = traceback.format_exc()
2027             raise ExtractorError(
2028                 'Signature extraction failed: ' + tb, cause=e)
2029
2030     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2031         """
2032         Extract signatureTimestamp (sts)
2033         Required to tell API what sig/player version is in use.
2034         """
2035         sts = None
2036         if isinstance(ytcfg, dict):
2037             sts = int_or_none(ytcfg.get('STS'))
2038
2039         if not sts:
2040             # Attempt to extract from player
2041             if player_url is None:
2042                 error_msg = 'Cannot extract signature timestamp without player_url.'
2043                 if fatal:
2044                     raise ExtractorError(error_msg)
2045                 self.report_warning(error_msg)
2046                 return
2047             if self._load_player(video_id, player_url, fatal=fatal):
2048                 player_id = self._extract_player_info(player_url)
2049                 code = self._code_cache[player_id]
2050                 sts = int_or_none(self._search_regex(
2051                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2052                     'JS player signature timestamp', group='sts', fatal=fatal))
2053         return sts
2054
2055     def _mark_watched(self, video_id, player_responses):
2056         playback_url = traverse_obj(
2057             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2058             expected_type=url_or_none, get_all=False)
2059         if not playback_url:
2060             self.report_warning('Unable to mark watched')
2061             return
2062         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2063         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2064
2065         # cpn generation algorithm is reverse engineered from base.js.
2066         # In fact it works even with dummy cpn.
2067         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2068         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2069
2070         qs.update({
2071             'ver': ['2'],
2072             'cpn': [cpn],
2073         })
2074         playback_url = compat_urlparse.urlunparse(
2075             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2076
2077         self._download_webpage(
2078             playback_url, video_id, 'Marking watched',
2079             'Unable to mark watched', fatal=False)
2080
2081     @staticmethod
2082     def _extract_urls(webpage):
2083         # Embedded YouTube player
2084         entries = [
2085             unescapeHTML(mobj.group('url'))
2086             for mobj in re.finditer(r'''(?x)
2087             (?:
2088                 <iframe[^>]+?src=|
2089                 data-video-url=|
2090                 <embed[^>]+?src=|
2091                 embedSWF\(?:\s*|
2092                 <object[^>]+data=|
2093                 new\s+SWFObject\(
2094             )
2095             (["\'])
2096                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2097                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2098             \1''', webpage)]
2099
2100         # lazyYT YouTube embed
2101         entries.extend(list(map(
2102             unescapeHTML,
2103             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2104
2105         # Wordpress "YouTube Video Importer" plugin
2106         matches = re.findall(r'''(?x)<div[^>]+
2107             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2108             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2109         entries.extend(m[-1] for m in matches)
2110
2111         return entries
2112
2113     @staticmethod
2114     def _extract_url(webpage):
2115         urls = YoutubeIE._extract_urls(webpage)
2116         return urls[0] if urls else None
2117
2118     @classmethod
2119     def extract_id(cls, url):
2120         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2121         if mobj is None:
2122             raise ExtractorError('Invalid URL: %s' % url)
2123         return mobj.group('id')
2124
2125     def _extract_chapters_from_json(self, data, duration):
2126         chapter_list = traverse_obj(
2127             data, (
2128                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2129                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2130             ), expected_type=list)
2131
2132         return self._extract_chapters(
2133             chapter_list,
2134             chapter_time=lambda chapter: float_or_none(
2135                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2136             chapter_title=lambda chapter: traverse_obj(
2137                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2138             duration=duration)
2139
2140     def _extract_chapters_from_engagement_panel(self, data, duration):
2141         content_list = traverse_obj(
2142             data,
2143             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2144             expected_type=list, default=[])
2145         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2146         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2147
2148         return next((
2149             filter(None, (
2150                 self._extract_chapters(
2151                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2152                     chapter_time, chapter_title, duration)
2153                 for contents in content_list
2154             ))), [])
2155
2156     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2157         chapters = []
2158         last_chapter = {'start_time': 0}
2159         for idx, chapter in enumerate(chapter_list or []):
2160             title = chapter_title(chapter)
2161             start_time = chapter_time(chapter)
2162             if start_time is None:
2163                 continue
2164             last_chapter['end_time'] = start_time
2165             if start_time < last_chapter['start_time']:
2166                 if idx == 1:
2167                     chapters.pop()
2168                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2169                 else:
2170                     self.report_warning(f'Invalid start time for chapter "{title}"')
2171                     continue
2172             last_chapter = {'start_time': start_time, 'title': title}
2173             chapters.append(last_chapter)
2174         last_chapter['end_time'] = duration
2175         return chapters
2176
2177     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2178         return self._parse_json(self._search_regex(
2179             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2180              regex), webpage, name, default='{}'), video_id, fatal=False)
2181
2182     @staticmethod
2183     def parse_time_text(time_text):
2184         """
2185         Parse the comment time text
2186         time_text is in the format 'X units ago (edited)'
2187         """
2188         time_text_split = time_text.split(' ')
2189         if len(time_text_split) >= 3:
2190             try:
2191                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2192             except ValueError:
2193                 return None
2194
2195     def _extract_comment(self, comment_renderer, parent=None):
2196         comment_id = comment_renderer.get('commentId')
2197         if not comment_id:
2198             return
2199
2200         text = self._get_text(comment_renderer, 'contentText')
2201
2202         # note: timestamp is an estimate calculated from the current time and time_text
2203         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2204         time_text_dt = self.parse_time_text(time_text)
2205         if isinstance(time_text_dt, datetime.datetime):
2206             timestamp = calendar.timegm(time_text_dt.timetuple())
2207         author = self._get_text(comment_renderer, 'authorText')
2208         author_id = try_get(comment_renderer,
2209                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2210
2211         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2212                                                        lambda x: x['likeCount']), compat_str)) or 0
2213         author_thumbnail = try_get(comment_renderer,
2214                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2215
2216         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2217         is_favorited = 'creatorHeart' in (try_get(
2218             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2219         return {
2220             'id': comment_id,
2221             'text': text,
2222             'timestamp': timestamp,
2223             'time_text': time_text,
2224             'like_count': votes,
2225             'is_favorited': is_favorited,
2226             'author': author,
2227             'author_id': author_id,
2228             'author_thumbnail': author_thumbnail,
2229             'author_is_uploader': author_is_uploader,
2230             'parent': parent or 'root'
2231         }
2232
2233     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2234
2235         def extract_header(contents):
2236             _total_comments = 0
2237             _continuation = None
2238             for content in contents:
2239                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2240                 expected_comment_count = parse_count(self._get_text(
2241                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2242
2243                 if expected_comment_count:
2244                     comment_counts[1] = expected_comment_count
2245                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2246                     _total_comments = comment_counts[1]
2247                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2248                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2249
2250                 sort_menu_item = try_get(
2251                     comments_header_renderer,
2252                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2253                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2254
2255                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2256                 if not _continuation:
2257                     continue
2258
2259                 sort_text = sort_menu_item.get('title')
2260                 if isinstance(sort_text, compat_str):
2261                     sort_text = sort_text.lower()
2262                 else:
2263                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2264                 self.to_screen('Sorting comments by %s' % sort_text)
2265                 break
2266             return _total_comments, _continuation
2267
2268         def extract_thread(contents):
2269             if not parent:
2270                 comment_counts[2] = 0
2271             for content in contents:
2272                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2273                 comment_renderer = try_get(
2274                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2275                     content, (lambda x: x['commentRenderer'], dict))
2276
2277                 if not comment_renderer:
2278                     continue
2279                 comment = self._extract_comment(comment_renderer, parent)
2280                 if not comment:
2281                     continue
2282                 comment_counts[0] += 1
2283                 yield comment
2284                 # Attempt to get the replies
2285                 comment_replies_renderer = try_get(
2286                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2287
2288                 if comment_replies_renderer:
2289                     comment_counts[2] += 1
2290                     comment_entries_iter = self._comment_entries(
2291                         comment_replies_renderer, ytcfg, video_id,
2292                         parent=comment.get('id'), comment_counts=comment_counts)
2293
2294                     for reply_comment in comment_entries_iter:
2295                         yield reply_comment
2296
2297         # YouTube comments have a max depth of 2
2298         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2299         if max_depth == 1 and parent:
2300             return
2301         if not comment_counts:
2302             # comment so far, est. total comments, current comment thread #
2303             comment_counts = [0, 0, 0]
2304
2305         continuation = self._extract_continuation(root_continuation_data)
2306         if continuation and len(continuation['continuation']) < 27:
2307             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2308             continuation_token = self._generate_comment_continuation(video_id)
2309             continuation = self._build_api_continuation_query(continuation_token, None)
2310
2311         visitor_data = None
2312         is_first_continuation = parent is None
2313
2314         for page_num in itertools.count(0):
2315             if not continuation:
2316                 break
2317             headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2318             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2319             if page_num == 0:
2320                 if is_first_continuation:
2321                     note_prefix = 'Downloading comment section API JSON'
2322                 else:
2323                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2324                         comment_counts[2], comment_prog_str)
2325             else:
2326                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2327                     '       ' if parent else '', ' replies' if parent else '',
2328                     page_num, comment_prog_str)
2329
2330             response = self._extract_response(
2331                 item_id=None, query=continuation,
2332                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2333                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2334             if not response:
2335                 break
2336             visitor_data = try_get(
2337                 response,
2338                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2339                 compat_str) or visitor_data
2340
2341             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2342
2343             continuation = None
2344             if isinstance(continuation_contents, list):
2345                 for continuation_section in continuation_contents:
2346                     if not isinstance(continuation_section, dict):
2347                         continue
2348                     continuation_items = try_get(
2349                         continuation_section,
2350                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2351                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2352                         list) or []
2353                     if is_first_continuation:
2354                         total_comments, continuation = extract_header(continuation_items)
2355                         if total_comments:
2356                             yield total_comments
2357                         is_first_continuation = False
2358                         if continuation:
2359                             break
2360                         continue
2361                     count = 0
2362                     for count, entry in enumerate(extract_thread(continuation_items)):
2363                         yield entry
2364                     continuation = self._extract_continuation({'contents': continuation_items})
2365                     if continuation:
2366                         # Sometimes YouTube provides a continuation without any comments
2367                         # In most cases we end up just downloading these with very little comments to come.
2368                         if count == 0:
2369                             if not parent:
2370                                 self.report_warning('No comments received - assuming end of comments')
2371                             continuation = None
2372                         break
2373
2374             # Deprecated response structure
2375             elif isinstance(continuation_contents, dict):
2376                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2377                 for key, continuation_renderer in continuation_contents.items():
2378                     if key not in known_continuation_renderers:
2379                         continue
2380                     if not isinstance(continuation_renderer, dict):
2381                         continue
2382                     if is_first_continuation:
2383                         header_continuation_items = [continuation_renderer.get('header') or {}]
2384                         total_comments, continuation = extract_header(header_continuation_items)
2385                         if total_comments:
2386                             yield total_comments
2387                         is_first_continuation = False
2388                         if continuation:
2389                             break
2390
2391                     # Sometimes YouTube provides a continuation without any comments
2392                     # In most cases we end up just downloading these with very little comments to come.
2393                     count = 0
2394                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2395                         yield entry
2396                     continuation = self._extract_continuation(continuation_renderer)
2397                     if count == 0:
2398                         if not parent:
2399                             self.report_warning('No comments received - assuming end of comments')
2400                         continuation = None
2401                     break
2402
2403     @staticmethod
2404     def _generate_comment_continuation(video_id):
2405         """
2406         Generates initial comment section continuation token from given video id
2407         """
2408         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2409         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2410         new_continuation_intlist = list(itertools.chain.from_iterable(
2411             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2412         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2413
2414     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2415         """Entry for comment extraction"""
2416         def _real_comment_extract(contents):
2417             yield from self._comment_entries(
2418                 traverse_obj(contents, (..., 'itemSectionRenderer'), get_all=False), ytcfg, video_id)
2419
2420         comments = []
2421         estimated_total = 0
2422         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2423         # Force English regardless of account setting to prevent parsing issues
2424         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2425         ytcfg = copy.deepcopy(ytcfg)
2426         traverse_obj(
2427             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2428         try:
2429             for comment in _real_comment_extract(contents):
2430                 if len(comments) >= max_comments:
2431                     break
2432                 if isinstance(comment, int):
2433                     estimated_total = comment
2434                     continue
2435                 comments.append(comment)
2436         except KeyboardInterrupt:
2437             self.to_screen('Interrupted by user')
2438         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2439         return {
2440             'comments': comments,
2441             'comment_count': len(comments),
2442         }
2443
2444     @staticmethod
2445     def _get_checkok_params():
2446         return {'contentCheckOk': True, 'racyCheckOk': True}
2447
2448     @classmethod
2449     def _generate_player_context(cls, sts=None):
2450         context = {
2451             'html5Preference': 'HTML5_PREF_WANTS',
2452         }
2453         if sts is not None:
2454             context['signatureTimestamp'] = sts
2455         return {
2456             'playbackContext': {
2457                 'contentPlaybackContext': context
2458             },
2459             **cls._get_checkok_params()
2460         }
2461
2462     @staticmethod
2463     def _is_agegated(player_response):
2464         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2465             return True
2466
2467         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2468         AGE_GATE_REASONS = (
2469             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2470             'age_verification_required', 'age_check_required',  # status
2471         )
2472         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2473
2474     @staticmethod
2475     def _is_unplayable(player_response):
2476         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2477
2478     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2479
2480         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2481         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2482         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2483         headers = self.generate_api_headers(
2484             ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2485
2486         yt_query = {'videoId': video_id}
2487         yt_query.update(self._generate_player_context(sts))
2488         return self._extract_response(
2489             item_id=video_id, ep='player', query=yt_query,
2490             ytcfg=player_ytcfg, headers=headers, fatal=True,
2491             default_client=client,
2492             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2493         ) or None
2494
2495     def _get_requested_clients(self, url, smuggled_data):
2496         requested_clients = []
2497         allowed_clients = sorted(
2498             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2499             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2500         for client in self._configuration_arg('player_client'):
2501             if client in allowed_clients:
2502                 requested_clients.append(client)
2503             elif client == 'all':
2504                 requested_clients.extend(allowed_clients)
2505             else:
2506                 self.report_warning(f'Skipping unsupported client {client}')
2507         if not requested_clients:
2508             requested_clients = ['android', 'web']
2509
2510         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2511             requested_clients.extend(
2512                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2513
2514         return orderedSet(requested_clients)
2515
2516     def _extract_player_ytcfg(self, client, video_id):
2517         url = {
2518             'web_music': 'https://music.youtube.com',
2519             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2520         }.get(client)
2521         if not url:
2522             return {}
2523         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2524         return self.extract_ytcfg(video_id, webpage) or {}
2525
2526     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2527         initial_pr = None
2528         if webpage:
2529             initial_pr = self._extract_yt_initial_variable(
2530                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2531                 video_id, 'initial player response')
2532
2533         original_clients = clients
2534         clients = clients[::-1]
2535         prs = []
2536
2537         def append_client(client_name):
2538             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2539                 clients.append(client_name)
2540
2541         # Android player_response does not have microFormats which are needed for
2542         # extraction of some data. So we return the initial_pr with formats
2543         # stripped out even if not requested by the user
2544         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2545         if initial_pr:
2546             pr = dict(initial_pr)
2547             pr['streamingData'] = None
2548             prs.append(pr)
2549
2550         last_error = None
2551         tried_iframe_fallback = False
2552         player_url = None
2553         while clients:
2554             client = clients.pop()
2555             player_ytcfg = master_ytcfg if client == 'web' else {}
2556             if 'configs' not in self._configuration_arg('player_skip'):
2557                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2558
2559             player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2560             require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2561             if 'js' in self._configuration_arg('player_skip'):
2562                 require_js_player = False
2563                 player_url = None
2564
2565             if not player_url and not tried_iframe_fallback and require_js_player:
2566                 player_url = self._download_player_url(video_id)
2567                 tried_iframe_fallback = True
2568
2569             try:
2570                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2571                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2572             except ExtractorError as e:
2573                 if last_error:
2574                     self.report_warning(last_error)
2575                 last_error = e
2576                 continue
2577
2578             if pr:
2579                 prs.append(pr)
2580
2581             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2582             if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2583                 append_client(client.replace('_agegate', '_creator'))
2584             elif self._is_agegated(pr):
2585                 append_client(f'{client}_agegate')
2586
2587         if last_error:
2588             if not len(prs):
2589                 raise last_error
2590             self.report_warning(last_error)
2591         return prs, player_url
2592
2593     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2594         itags, stream_ids = [], []
2595         itag_qualities, res_qualities = {}, {}
2596         q = qualities([
2597             # Normally tiny is the smallest video-only formats. But
2598             # audio-only formats with unknown quality may get tagged as tiny
2599             'tiny',
2600             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2601             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2602         ])
2603         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2604
2605         for fmt in streaming_formats:
2606             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2607                 continue
2608
2609             itag = str_or_none(fmt.get('itag'))
2610             audio_track = fmt.get('audioTrack') or {}
2611             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2612             if stream_id in stream_ids:
2613                 continue
2614
2615             quality = fmt.get('quality')
2616             height = int_or_none(fmt.get('height'))
2617             if quality == 'tiny' or not quality:
2618                 quality = fmt.get('audioQuality', '').lower() or quality
2619             # The 3gp format (17) in android client has a quality of "small",
2620             # but is actually worse than other formats
2621             if itag == '17':
2622                 quality = 'tiny'
2623             if quality:
2624                 if itag:
2625                     itag_qualities[itag] = quality
2626                 if height:
2627                     res_qualities[height] = quality
2628             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2629             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2630             # number of fragment that would subsequently requested with (`&sq=N`)
2631             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2632                 continue
2633
2634             fmt_url = fmt.get('url')
2635             if not fmt_url:
2636                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2637                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2638                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2639                 if not (sc and fmt_url and encrypted_sig):
2640                     continue
2641                 if not player_url:
2642                     continue
2643                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2644                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2645                 fmt_url += '&' + sp + '=' + signature
2646
2647             if itag:
2648                 itags.append(itag)
2649                 stream_ids.append(stream_id)
2650
2651             tbr = float_or_none(
2652                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2653             dct = {
2654                 'asr': int_or_none(fmt.get('audioSampleRate')),
2655                 'filesize': int_or_none(fmt.get('contentLength')),
2656                 'format_id': itag,
2657                 'format_note': ', '.join(filter(None, (
2658                     '%s%s' % (audio_track.get('displayName') or '',
2659                               ' (default)' if audio_track.get('audioIsDefault') else ''),
2660                     fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2661                 'fps': int_or_none(fmt.get('fps')),
2662                 'height': height,
2663                 'quality': q(quality),
2664                 'tbr': tbr,
2665                 'url': fmt_url,
2666                 'width': int_or_none(fmt.get('width')),
2667                 'language': audio_track.get('id', '').split('.')[0],
2668                 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2669             }
2670             mime_mobj = re.match(
2671                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2672             if mime_mobj:
2673                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2674                 dct.update(parse_codecs(mime_mobj.group(2)))
2675             no_audio = dct.get('acodec') == 'none'
2676             no_video = dct.get('vcodec') == 'none'
2677             if no_audio:
2678                 dct['vbr'] = tbr
2679             if no_video:
2680                 dct['abr'] = tbr
2681             if no_audio or no_video:
2682                 dct['downloader_options'] = {
2683                     # Youtube throttles chunks >~10M
2684                     'http_chunk_size': 10485760,
2685                 }
2686                 if dct.get('ext'):
2687                     dct['container'] = dct['ext'] + '_dash'
2688             yield dct
2689
2690         skip_manifests = self._configuration_arg('skip')
2691         get_dash = (
2692             (not is_live or self._configuration_arg('include_live_dash'))
2693             and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2694         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2695
2696         def guess_quality(f):
2697             for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2698                 if val in qdict:
2699                     return q(qdict[val])
2700             return -1
2701
2702         for sd in streaming_data:
2703             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2704             if hls_manifest_url:
2705                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2706                     itag = self._search_regex(
2707                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2708                     if itag in itags:
2709                         continue
2710                     if itag:
2711                         f['format_id'] = itag
2712                         itags.append(itag)
2713                     f['quality'] = guess_quality(f)
2714                     yield f
2715
2716             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2717             if dash_manifest_url:
2718                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2719                     itag = f['format_id']
2720                     if itag in itags:
2721                         continue
2722                     if itag:
2723                         itags.append(itag)
2724                     f['quality'] = guess_quality(f)
2725                     filesize = int_or_none(self._search_regex(
2726                         r'/clen/(\d+)', f.get('fragment_base_url')
2727                         or f['url'], 'file size', default=None))
2728                     if filesize:
2729                         f['filesize'] = filesize
2730                     yield f
2731
2732     def _real_extract(self, url):
2733         url, smuggled_data = unsmuggle_url(url, {})
2734         video_id = self._match_id(url)
2735
2736         base_url = self.http_scheme() + '//www.youtube.com/'
2737         webpage_url = base_url + 'watch?v=' + video_id
2738         webpage = None
2739         if 'webpage' not in self._configuration_arg('player_skip'):
2740             webpage = self._download_webpage(
2741                 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2742
2743         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2744
2745         player_responses, player_url = self._extract_player_responses(
2746             self._get_requested_clients(url, smuggled_data),
2747             video_id, webpage, master_ytcfg)
2748
2749         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2750
2751         playability_statuses = traverse_obj(
2752             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2753
2754         trailer_video_id = get_first(
2755             playability_statuses,
2756             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2757             expected_type=str)
2758         if trailer_video_id:
2759             return self.url_result(
2760                 trailer_video_id, self.ie_key(), trailer_video_id)
2761
2762         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2763                        if webpage else (lambda x: None))
2764
2765         video_details = traverse_obj(
2766             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2767         microformats = traverse_obj(
2768             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2769             expected_type=dict, default=[])
2770         video_title = (
2771             get_first(video_details, 'title')
2772             or self._get_text(microformats, (..., 'title'))
2773             or search_meta(['og:title', 'twitter:title', 'title']))
2774         video_description = get_first(video_details, 'shortDescription')
2775
2776         if not smuggled_data.get('force_singlefeed', False):
2777             if not self.get_param('noplaylist'):
2778                 multifeed_metadata_list = get_first(
2779                     player_responses,
2780                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2781                     expected_type=str)
2782                 if multifeed_metadata_list:
2783                     entries = []
2784                     feed_ids = []
2785                     for feed in multifeed_metadata_list.split(','):
2786                         # Unquote should take place before split on comma (,) since textual
2787                         # fields may contain comma as well (see
2788                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2789                         feed_data = compat_parse_qs(
2790                             compat_urllib_parse_unquote_plus(feed))
2791
2792                         def feed_entry(name):
2793                             return try_get(
2794                                 feed_data, lambda x: x[name][0], compat_str)
2795
2796                         feed_id = feed_entry('id')
2797                         if not feed_id:
2798                             continue
2799                         feed_title = feed_entry('title')
2800                         title = video_title
2801                         if feed_title:
2802                             title += ' (%s)' % feed_title
2803                         entries.append({
2804                             '_type': 'url_transparent',
2805                             'ie_key': 'Youtube',
2806                             'url': smuggle_url(
2807                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2808                                 {'force_singlefeed': True}),
2809                             'title': title,
2810                         })
2811                         feed_ids.append(feed_id)
2812                     self.to_screen(
2813                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2814                         % (', '.join(feed_ids), video_id))
2815                     return self.playlist_result(
2816                         entries, video_id, video_title, video_description)
2817             else:
2818                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2819
2820         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2821         is_live = get_first(video_details, 'isLive')
2822         if is_live is None:
2823             is_live = get_first(live_broadcast_details, 'isLiveNow')
2824
2825         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2826         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2827
2828         if not formats:
2829             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2830                 self.report_drm(video_id)
2831             pemr = get_first(
2832                 playability_statuses,
2833                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2834             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2835             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2836             if subreason:
2837                 if subreason == 'The uploader has not made this video available in your country.':
2838                     countries = get_first(microformats, 'availableCountries')
2839                     if not countries:
2840                         regions_allowed = search_meta('regionsAllowed')
2841                         countries = regions_allowed.split(',') if regions_allowed else None
2842                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2843                 reason += f'. {subreason}'
2844             if reason:
2845                 self.raise_no_formats(reason, expected=True)
2846
2847         for f in formats:
2848             if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']:  # throttled
2849                 f['source_preference'] = -10
2850                 # TODO: this method is not reliable
2851                 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2852
2853         # Source is given priority since formats that throttle are given lower source_preference
2854         # When throttling issue is fully fixed, remove this
2855         self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
2856
2857         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2858         if not keywords and webpage:
2859             keywords = [
2860                 unescapeHTML(m.group('content'))
2861                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2862         for keyword in keywords:
2863             if keyword.startswith('yt:stretch='):
2864                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2865                 if mobj:
2866                     # NB: float is intentional for forcing float division
2867                     w, h = (float(v) for v in mobj.groups())
2868                     if w > 0 and h > 0:
2869                         ratio = w / h
2870                         for f in formats:
2871                             if f.get('vcodec') != 'none':
2872                                 f['stretched_ratio'] = ratio
2873                         break
2874
2875         thumbnails = []
2876         thumbnail_dicts = traverse_obj(
2877             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2878             expected_type=dict, default=[])
2879         for thumbnail in thumbnail_dicts:
2880             thumbnail_url = thumbnail.get('url')
2881             if not thumbnail_url:
2882                 continue
2883             # Sometimes youtube gives a wrong thumbnail URL. See:
2884             # https://github.com/yt-dlp/yt-dlp/issues/233
2885             # https://github.com/ytdl-org/youtube-dl/issues/28023
2886             if 'maxresdefault' in thumbnail_url:
2887                 thumbnail_url = thumbnail_url.split('?')[0]
2888             thumbnails.append({
2889                 'url': thumbnail_url,
2890                 'height': int_or_none(thumbnail.get('height')),
2891                 'width': int_or_none(thumbnail.get('width')),
2892             })
2893         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2894         if thumbnail_url:
2895             thumbnails.append({
2896                 'url': thumbnail_url,
2897             })
2898         # The best resolution thumbnails sometimes does not appear in the webpage
2899         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2900         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2901         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2902         # TODO: Test them also? - For some videos, even these don't exist
2903         guaranteed_thumbnail_names = [
2904             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2905             'mqdefault', 'mq1', 'mq2', 'mq3',
2906             'default', '1', '2', '3'
2907         ]
2908         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2909         n_thumbnail_names = len(thumbnail_names)
2910
2911         thumbnails.extend({
2912             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2913                 video_id=video_id, name=name, ext=ext,
2914                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2915             '_test_url': name in hq_thumbnail_names,
2916         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2917         for thumb in thumbnails:
2918             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2919             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2920         self._remove_duplicate_formats(thumbnails)
2921
2922         category = get_first(microformats, 'category') or search_meta('genre')
2923         channel_id = str_or_none(
2924             get_first(video_details, 'channelId')
2925             or get_first(microformats, 'externalChannelId')
2926             or search_meta('channelId'))
2927         duration = int_or_none(
2928             get_first(video_details, 'lengthSeconds')
2929             or get_first(microformats, 'lengthSeconds')
2930             or parse_duration(search_meta('duration'))) or None
2931         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2932
2933         live_content = get_first(video_details, 'isLiveContent')
2934         is_upcoming = get_first(video_details, 'isUpcoming')
2935         if is_live is None:
2936             if is_upcoming or live_content is False:
2937                 is_live = False
2938         if is_upcoming is None and (live_content or is_live):
2939             is_upcoming = False
2940         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2941         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2942         if not duration and live_endtime and live_starttime:
2943             duration = live_endtime - live_starttime
2944
2945         info = {
2946             'id': video_id,
2947             'title': self._live_title(video_title) if is_live else video_title,
2948             'formats': formats,
2949             'thumbnails': thumbnails,
2950             'description': video_description,
2951             'upload_date': unified_strdate(
2952                 get_first(microformats, 'uploadDate')
2953                 or search_meta('uploadDate')),
2954             'uploader': get_first(video_details, 'author'),
2955             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2956             'uploader_url': owner_profile_url,
2957             'channel_id': channel_id,
2958             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2959             'duration': duration,
2960             'view_count': int_or_none(
2961                 get_first((video_details, microformats), (..., 'viewCount'))
2962                 or search_meta('interactionCount')),
2963             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2964             'age_limit': 18 if (
2965                 get_first(microformats, 'isFamilySafe') is False
2966                 or search_meta('isFamilyFriendly') == 'false'
2967                 or search_meta('og:restrictions:age') == '18+') else 0,
2968             'webpage_url': webpage_url,
2969             'categories': [category] if category else None,
2970             'tags': keywords,
2971             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2972             'is_live': is_live,
2973             'was_live': (False if is_live or is_upcoming or live_content is False
2974                          else None if is_live is None or is_upcoming is None
2975                          else live_content),
2976             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2977             'release_timestamp': live_starttime,
2978         }
2979
2980         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2981         # Converted into dicts to remove duplicates
2982         captions = {
2983             sub.get('baseUrl'): sub
2984             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2985         translation_languages = {
2986             lang.get('languageCode'): lang.get('languageName')
2987             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2988         subtitles = {}
2989         if pctr:
2990             def process_language(container, base_url, lang_code, sub_name, query):
2991                 lang_subs = container.setdefault(lang_code, [])
2992                 for fmt in self._SUBTITLE_FORMATS:
2993                     query.update({
2994                         'fmt': fmt,
2995                     })
2996                     lang_subs.append({
2997                         'ext': fmt,
2998                         'url': update_url_query(base_url, query),
2999                         'name': sub_name,
3000                     })
3001
3002             for base_url, caption_track in captions.items():
3003                 if not base_url:
3004                     continue
3005                 if caption_track.get('kind') != 'asr':
3006                     lang_code = (
3007                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
3008                         or caption_track.get('languageCode'))
3009                     if not lang_code:
3010                         continue
3011                     process_language(
3012                         subtitles, base_url, lang_code,
3013                         traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False),
3014                         {})
3015                     continue
3016                 automatic_captions = {}
3017                 for trans_code, trans_name in translation_languages.items():
3018                     if not trans_code:
3019                         continue
3020                     process_language(
3021                         automatic_captions, base_url, trans_code,
3022                         self._get_text(trans_name, max_runs=1),
3023                         {'tlang': trans_code})
3024                 info['automatic_captions'] = automatic_captions
3025         info['subtitles'] = subtitles
3026
3027         parsed_url = compat_urllib_parse_urlparse(url)
3028         for component in [parsed_url.fragment, parsed_url.query]:
3029             query = compat_parse_qs(component)
3030             for k, v in query.items():
3031                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3032                     d_k += '_time'
3033                     if d_k not in info and k in s_ks:
3034                         info[d_k] = parse_duration(query[k][0])
3035
3036         # Youtube Music Auto-generated description
3037         if video_description:
3038             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3039             if mobj:
3040                 release_year = mobj.group('release_year')
3041                 release_date = mobj.group('release_date')
3042                 if release_date:
3043                     release_date = release_date.replace('-', '')
3044                     if not release_year:
3045                         release_year = release_date[:4]
3046                 info.update({
3047                     'album': mobj.group('album'.strip()),
3048                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3049                     'track': mobj.group('track').strip(),
3050                     'release_date': release_date,
3051                     'release_year': int_or_none(release_year),
3052                 })
3053
3054         initial_data = None
3055         if webpage:
3056             initial_data = self._extract_yt_initial_variable(
3057                 webpage, self._YT_INITIAL_DATA_RE, video_id,
3058                 'yt initial data')
3059         if not initial_data:
3060             query = {'videoId': video_id}
3061             query.update(self._get_checkok_params())
3062             initial_data = self._extract_response(
3063                 item_id=video_id, ep='next', fatal=False,
3064                 ytcfg=master_ytcfg, query=query,
3065                 headers=self.generate_api_headers(ytcfg=master_ytcfg),
3066                 note='Downloading initial data API JSON')
3067
3068         try:
3069             # This will error if there is no livechat
3070             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3071             info['subtitles']['live_chat'] = [{
3072                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3073                 'video_id': video_id,
3074                 'ext': 'json',
3075                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3076             }]
3077         except (KeyError, IndexError, TypeError):
3078             pass
3079
3080         if initial_data:
3081             info['chapters'] = (
3082                 self._extract_chapters_from_json(initial_data, duration)
3083                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3084                 or None)
3085
3086             contents = try_get(
3087                 initial_data,
3088                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3089                 list) or []
3090             for content in contents:
3091                 vpir = content.get('videoPrimaryInfoRenderer')
3092                 if vpir:
3093                     stl = vpir.get('superTitleLink')
3094                     if stl:
3095                         stl = self._get_text(stl)
3096                         if try_get(
3097                                 vpir,
3098                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3099                             info['location'] = stl
3100                         else:
3101                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3102                             if mobj:
3103                                 info.update({
3104                                     'series': mobj.group(1),
3105                                     'season_number': int(mobj.group(2)),
3106                                     'episode_number': int(mobj.group(3)),
3107                                 })
3108                     for tlb in (try_get(
3109                             vpir,
3110                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3111                             list) or []):
3112                         tbr = tlb.get('toggleButtonRenderer') or {}
3113                         for getter, regex in [(
3114                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3115                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3116                                     lambda x: x['accessibility'],
3117                                     lambda x: x['accessibilityData']['accessibilityData'],
3118                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3119                             label = (try_get(tbr, getter, dict) or {}).get('label')
3120                             if label:
3121                                 mobj = re.match(regex, label)
3122                                 if mobj:
3123                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3124                                     break
3125                     sbr_tooltip = try_get(
3126                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3127                     if sbr_tooltip:
3128                         like_count, dislike_count = sbr_tooltip.split(' / ')
3129                         info.update({
3130                             'like_count': str_to_int(like_count),
3131                             'dislike_count': str_to_int(dislike_count),
3132                         })
3133                 vsir = content.get('videoSecondaryInfoRenderer')
3134                 if vsir:
3135                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3136                     rows = try_get(
3137                         vsir,
3138                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3139                         list) or []
3140                     multiple_songs = False
3141                     for row in rows:
3142                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3143                             multiple_songs = True
3144                             break
3145                     for row in rows:
3146                         mrr = row.get('metadataRowRenderer') or {}
3147                         mrr_title = mrr.get('title')
3148                         if not mrr_title:
3149                             continue
3150                         mrr_title = self._get_text(mrr, 'title')
3151                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3152                         if mrr_title == 'License':
3153                             info['license'] = mrr_contents_text
3154                         elif not multiple_songs:
3155                             if mrr_title == 'Album':
3156                                 info['album'] = mrr_contents_text
3157                             elif mrr_title == 'Artist':
3158                                 info['artist'] = mrr_contents_text
3159                             elif mrr_title == 'Song':
3160                                 info['track'] = mrr_contents_text
3161
3162         fallbacks = {
3163             'channel': 'uploader',
3164             'channel_id': 'uploader_id',
3165             'channel_url': 'uploader_url',
3166         }
3167         for to, frm in fallbacks.items():
3168             if not info.get(to):
3169                 info[to] = info.get(frm)
3170
3171         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3172             v = info.get(s_k)
3173             if v:
3174                 info[d_k] = v
3175
3176         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3177         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3178         is_membersonly = None
3179         is_premium = None
3180         if initial_data and is_private is not None:
3181             is_membersonly = False
3182             is_premium = False
3183             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3184             badge_labels = set()
3185             for content in contents:
3186                 if not isinstance(content, dict):
3187                     continue
3188                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3189             for badge_label in badge_labels:
3190                 if badge_label.lower() == 'members only':
3191                     is_membersonly = True
3192                 elif badge_label.lower() == 'premium':
3193                     is_premium = True
3194                 elif badge_label.lower() == 'unlisted':
3195                     is_unlisted = True
3196
3197         info['availability'] = self._availability(
3198             is_private=is_private,
3199             needs_premium=is_premium,
3200             needs_subscription=is_membersonly,
3201             needs_auth=info['age_limit'] >= 18,
3202             is_unlisted=None if is_private is None else is_unlisted)
3203
3204         if self.get_param('getcomments', False):
3205             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3206
3207         self.mark_watched(video_id, player_responses)
3208
3209         return info
3210
3211
3212 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3213     IE_DESC = 'YouTube.com tab'
3214     _VALID_URL = r'''(?x)
3215                     https?://
3216                         (?:\w+\.)?
3217                         (?:
3218                             youtube(?:kids)?\.com|
3219                             invidio\.us
3220                         )/
3221                         (?:
3222                             (?P<channel_type>channel|c|user|browse)/|
3223                             (?P<not_channel>
3224                                 feed/|hashtag/|
3225                                 (?:playlist|watch)\?.*?\blist=
3226                             )|
3227                             (?!(?:%s)\b)  # Direct URLs
3228                         )
3229                         (?P<id>[^/?\#&]+)
3230                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3231     IE_NAME = 'youtube:tab'
3232
3233     _TESTS = [{
3234         'note': 'playlists, multipage',
3235         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3236         'playlist_mincount': 94,
3237         'info_dict': {
3238             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3239             'title': 'Игорь Клейнер - Playlists',
3240             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3241             'uploader': 'Игорь Клейнер',
3242             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3243         },
3244     }, {
3245         'note': 'playlists, multipage, different order',
3246         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3247         'playlist_mincount': 94,
3248         'info_dict': {
3249             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3250             'title': 'Игорь Клейнер - Playlists',
3251             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3252             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3253             'uploader': 'Игорь Клейнер',
3254         },
3255     }, {
3256         'note': 'playlists, series',
3257         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3258         'playlist_mincount': 5,
3259         'info_dict': {
3260             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3261             'title': '3Blue1Brown - Playlists',
3262             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3263             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3264             'uploader': '3Blue1Brown',
3265         },
3266     }, {
3267         'note': 'playlists, singlepage',
3268         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3269         'playlist_mincount': 4,
3270         'info_dict': {
3271             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3272             'title': 'ThirstForScience - Playlists',
3273             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3274             'uploader': 'ThirstForScience',
3275             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3276         }
3277     }, {
3278         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3279         'only_matching': True,
3280     }, {
3281         'note': 'basic, single video playlist',
3282         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3283         'info_dict': {
3284             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3285             'uploader': 'Sergey M.',
3286             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3287             'title': 'youtube-dl public playlist',
3288         },
3289         'playlist_count': 1,
3290     }, {
3291         'note': 'empty playlist',
3292         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3293         'info_dict': {
3294             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3295             'uploader': 'Sergey M.',
3296             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3297             'title': 'youtube-dl empty playlist',
3298         },
3299         'playlist_count': 0,
3300     }, {
3301         'note': 'Home tab',
3302         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3303         'info_dict': {
3304             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3305             'title': 'lex will - Home',
3306             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3307             'uploader': 'lex will',
3308             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3309         },
3310         'playlist_mincount': 2,
3311     }, {
3312         'note': 'Videos tab',
3313         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3314         'info_dict': {
3315             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3316             'title': 'lex will - Videos',
3317             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3318             'uploader': 'lex will',
3319             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3320         },
3321         'playlist_mincount': 975,
3322     }, {
3323         'note': 'Videos tab, sorted by popular',
3324         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3325         'info_dict': {
3326             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3327             'title': 'lex will - Videos',
3328             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3329             'uploader': 'lex will',
3330             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3331         },
3332         'playlist_mincount': 199,
3333     }, {
3334         'note': 'Playlists tab',
3335         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3336         'info_dict': {
3337             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3338             'title': 'lex will - Playlists',
3339             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3340             'uploader': 'lex will',
3341             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3342         },
3343         'playlist_mincount': 17,
3344     }, {
3345         'note': 'Community tab',
3346         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3347         'info_dict': {
3348             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3349             'title': 'lex will - Community',
3350             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3351             'uploader': 'lex will',
3352             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3353         },
3354         'playlist_mincount': 18,
3355     }, {
3356         'note': 'Channels tab',
3357         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3358         'info_dict': {
3359             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3360             'title': 'lex will - Channels',
3361             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3362             'uploader': 'lex will',
3363             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3364         },
3365         'playlist_mincount': 12,
3366     }, {
3367         'note': 'Search tab',
3368         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3369         'playlist_mincount': 40,
3370         'info_dict': {
3371             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3372             'title': '3Blue1Brown - Search - linear algebra',
3373             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3374             'uploader': '3Blue1Brown',
3375             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3376         },
3377     }, {
3378         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3379         'only_matching': True,
3380     }, {
3381         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3382         'only_matching': True,
3383     }, {
3384         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3385         'only_matching': True,
3386     }, {
3387         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3388         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3389         'info_dict': {
3390             'title': '29C3: Not my department',
3391             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3392             'uploader': 'Christiaan008',
3393             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3394             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3395         },
3396         'playlist_count': 96,
3397     }, {
3398         'note': 'Large playlist',
3399         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3400         'info_dict': {
3401             'title': 'Uploads from Cauchemar',
3402             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3403             'uploader': 'Cauchemar',
3404             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3405         },
3406         'playlist_mincount': 1123,
3407     }, {
3408         'note': 'even larger playlist, 8832 videos',
3409         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3410         'only_matching': True,
3411     }, {
3412         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3413         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3414         'info_dict': {
3415             'title': 'Uploads from Interstellar Movie',
3416             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3417             'uploader': 'Interstellar Movie',
3418             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3419         },
3420         'playlist_mincount': 21,
3421     }, {
3422         'note': 'Playlist with "show unavailable videos" button',
3423         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3424         'info_dict': {
3425             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3426             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3427             'uploader': 'Phim Siêu Nhân Nhật Bản',
3428             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3429         },
3430         'playlist_mincount': 200,
3431     }, {
3432         'note': 'Playlist with unavailable videos in page 7',
3433         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3434         'info_dict': {
3435             'title': 'Uploads from BlankTV',
3436             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3437             'uploader': 'BlankTV',
3438             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3439         },
3440         'playlist_mincount': 1000,
3441     }, {
3442         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3443         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3444         'info_dict': {
3445             'title': 'Data Analysis with Dr Mike Pound',
3446             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3447             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3448             'uploader': 'Computerphile',
3449             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3450         },
3451         'playlist_mincount': 11,
3452     }, {
3453         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3454         'only_matching': True,
3455     }, {
3456         'note': 'Playlist URL that does not actually serve a playlist',
3457         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3458         'info_dict': {
3459             'id': 'FqZTN594JQw',
3460             'ext': 'webm',
3461             'title': "Smiley's People 01 detective, Adventure Series, Action",
3462             'uploader': 'STREEM',
3463             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3464             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3465             'upload_date': '20150526',
3466             'license': 'Standard YouTube License',
3467             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3468             'categories': ['People & Blogs'],
3469             'tags': list,
3470             'view_count': int,
3471             'like_count': int,
3472             'dislike_count': int,
3473         },
3474         'params': {
3475             'skip_download': True,
3476         },
3477         'skip': 'This video is not available.',
3478         'add_ie': [YoutubeIE.ie_key()],
3479     }, {
3480         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3481         'only_matching': True,
3482     }, {
3483         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3484         'only_matching': True,
3485     }, {
3486         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3487         'info_dict': {
3488             'id': '3yImotZU3tw',  # This will keep changing
3489             'ext': 'mp4',
3490             'title': compat_str,
3491             'uploader': 'Sky News',
3492             'uploader_id': 'skynews',
3493             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3494             'upload_date': r're:\d{8}',
3495             'description': compat_str,
3496             'categories': ['News & Politics'],
3497             'tags': list,
3498             'like_count': int,
3499             'dislike_count': int,
3500         },
3501         'params': {
3502             'skip_download': True,
3503         },
3504         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3505     }, {
3506         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3507         'info_dict': {
3508             'id': 'a48o2S1cPoo',
3509             'ext': 'mp4',
3510             'title': 'The Young Turks - Live Main Show',
3511             'uploader': 'The Young Turks',
3512             'uploader_id': 'TheYoungTurks',
3513             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3514             'upload_date': '20150715',
3515             'license': 'Standard YouTube License',
3516             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3517             'categories': ['News & Politics'],
3518             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3519             'like_count': int,
3520             'dislike_count': int,
3521         },
3522         'params': {
3523             'skip_download': True,
3524         },
3525         'only_matching': True,
3526     }, {
3527         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3528         'only_matching': True,
3529     }, {
3530         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3531         'only_matching': True,
3532     }, {
3533         'note': 'A channel that is not live. Should raise error',
3534         'url': 'https://www.youtube.com/user/numberphile/live',
3535         'only_matching': True,
3536     }, {
3537         'url': 'https://www.youtube.com/feed/trending',
3538         'only_matching': True,
3539     }, {
3540         'url': 'https://www.youtube.com/feed/library',
3541         'only_matching': True,
3542     }, {
3543         'url': 'https://www.youtube.com/feed/history',
3544         'only_matching': True,
3545     }, {
3546         'url': 'https://www.youtube.com/feed/subscriptions',
3547         'only_matching': True,
3548     }, {
3549         'url': 'https://www.youtube.com/feed/watch_later',
3550         'only_matching': True,
3551     }, {
3552         'note': 'Recommended - redirects to home page',
3553         'url': 'https://www.youtube.com/feed/recommended',
3554         'only_matching': True,
3555     }, {
3556         'note': 'inline playlist with not always working continuations',
3557         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3558         'only_matching': True,
3559     }, {
3560         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3561         'only_matching': True,
3562     }, {
3563         'url': 'https://www.youtube.com/course',
3564         'only_matching': True,
3565     }, {
3566         'url': 'https://www.youtube.com/zsecurity',
3567         'only_matching': True,
3568     }, {
3569         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3570         'only_matching': True,
3571     }, {
3572         'url': 'https://www.youtube.com/TheYoungTurks/live',
3573         'only_matching': True,
3574     }, {
3575         'url': 'https://www.youtube.com/hashtag/cctv9',
3576         'info_dict': {
3577             'id': 'cctv9',
3578             'title': '#cctv9',
3579         },
3580         'playlist_mincount': 350,
3581     }, {
3582         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3583         'only_matching': True,
3584     }, {
3585         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3586         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3587         'only_matching': True
3588     }, {
3589         'note': '/browse/ should redirect to /channel/',
3590         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3591         'only_matching': True
3592     }, {
3593         'note': 'VLPL, should redirect to playlist?list=PL...',
3594         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3595         'info_dict': {
3596             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3597             'uploader': 'NoCopyrightSounds',
3598             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3599             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3600             'title': 'NCS Releases',
3601         },
3602         'playlist_mincount': 166,
3603     }, {
3604         'note': 'Topic, should redirect to playlist?list=UU...',
3605         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3606         'info_dict': {
3607             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3608             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3609             'title': 'Uploads from Royalty Free Music - Topic',
3610             'uploader': 'Royalty Free Music - Topic',
3611         },
3612         'expected_warnings': [
3613             'A channel/user page was given',
3614             'The URL does not have a videos tab',
3615         ],
3616         'playlist_mincount': 101,
3617     }, {
3618         'note': 'Topic without a UU playlist',
3619         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3620         'info_dict': {
3621             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3622             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3623         },
3624         'expected_warnings': [
3625             'A channel/user page was given',
3626             'The URL does not have a videos tab',
3627             'Falling back to channel URL',
3628         ],
3629         'playlist_mincount': 9,
3630     }, {
3631         'note': 'Youtube music Album',
3632         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3633         'info_dict': {
3634             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3635             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3636         },
3637         'playlist_count': 50,
3638     }, {
3639         'note': 'unlisted single video playlist',
3640         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3641         'info_dict': {
3642             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3643             'uploader': 'colethedj',
3644             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3645             'title': 'yt-dlp unlisted playlist test',
3646             'availability': 'unlisted'
3647         },
3648         'playlist_count': 1,
3649     }]
3650
3651     @classmethod
3652     def suitable(cls, url):
3653         return False if YoutubeIE.suitable(url) else super(
3654             YoutubeTabIE, cls).suitable(url)
3655
3656     def _extract_channel_id(self, webpage):
3657         channel_id = self._html_search_meta(
3658             'channelId', webpage, 'channel id', default=None)
3659         if channel_id:
3660             return channel_id
3661         channel_url = self._html_search_meta(
3662             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3663              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3664              'twitter:app:url:googleplay'), webpage, 'channel url')
3665         return self._search_regex(
3666             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3667             channel_url, 'channel id')
3668
3669     @staticmethod
3670     def _extract_basic_item_renderer(item):
3671         # Modified from _extract_grid_item_renderer
3672         known_basic_renderers = (
3673             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3674         )
3675         for key, renderer in item.items():
3676             if not isinstance(renderer, dict):
3677                 continue
3678             elif key in known_basic_renderers:
3679                 return renderer
3680             elif key.startswith('grid') and key.endswith('Renderer'):
3681                 return renderer
3682
3683     def _grid_entries(self, grid_renderer):
3684         for item in grid_renderer['items']:
3685             if not isinstance(item, dict):
3686                 continue
3687             renderer = self._extract_basic_item_renderer(item)
3688             if not isinstance(renderer, dict):
3689                 continue
3690             title = self._get_text(renderer, 'title')
3691
3692             # playlist
3693             playlist_id = renderer.get('playlistId')
3694             if playlist_id:
3695                 yield self.url_result(
3696                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3697                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3698                     video_title=title)
3699                 continue
3700             # video
3701             video_id = renderer.get('videoId')
3702             if video_id:
3703                 yield self._extract_video(renderer)
3704                 continue
3705             # channel
3706             channel_id = renderer.get('channelId')
3707             if channel_id:
3708                 yield self.url_result(
3709                     'https://www.youtube.com/channel/%s' % channel_id,
3710                     ie=YoutubeTabIE.ie_key(), video_title=title)
3711                 continue
3712             # generic endpoint URL support
3713             ep_url = urljoin('https://www.youtube.com/', try_get(
3714                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3715                 compat_str))
3716             if ep_url:
3717                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3718                     if ie.suitable(ep_url):
3719                         yield self.url_result(
3720                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3721                         break
3722
3723     def _shelf_entries_from_content(self, shelf_renderer):
3724         content = shelf_renderer.get('content')
3725         if not isinstance(content, dict):
3726             return
3727         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3728         if renderer:
3729             # TODO: add support for nested playlists so each shelf is processed
3730             # as separate playlist
3731             # TODO: this includes only first N items
3732             for entry in self._grid_entries(renderer):
3733                 yield entry
3734         renderer = content.get('horizontalListRenderer')
3735         if renderer:
3736             # TODO
3737             pass
3738
3739     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3740         ep = try_get(
3741             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3742             compat_str)
3743         shelf_url = urljoin('https://www.youtube.com', ep)
3744         if shelf_url:
3745             # Skipping links to another channels, note that checking for
3746             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3747             # will not work
3748             if skip_channels and '/channels?' in shelf_url:
3749                 return
3750             title = self._get_text(shelf_renderer, 'title')
3751             yield self.url_result(shelf_url, video_title=title)
3752         # Shelf may not contain shelf URL, fallback to extraction from content
3753         for entry in self._shelf_entries_from_content(shelf_renderer):
3754             yield entry
3755
3756     def _playlist_entries(self, video_list_renderer):
3757         for content in video_list_renderer['contents']:
3758             if not isinstance(content, dict):
3759                 continue
3760             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3761             if not isinstance(renderer, dict):
3762                 continue
3763             video_id = renderer.get('videoId')
3764             if not video_id:
3765                 continue
3766             yield self._extract_video(renderer)
3767
3768     def _rich_entries(self, rich_grid_renderer):
3769         renderer = try_get(
3770             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3771         video_id = renderer.get('videoId')
3772         if not video_id:
3773             return
3774         yield self._extract_video(renderer)
3775
3776     def _video_entry(self, video_renderer):
3777         video_id = video_renderer.get('videoId')
3778         if video_id:
3779             return self._extract_video(video_renderer)
3780
3781     def _post_thread_entries(self, post_thread_renderer):
3782         post_renderer = try_get(
3783             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3784         if not post_renderer:
3785             return
3786         # video attachment
3787         video_renderer = try_get(
3788             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3789         video_id = video_renderer.get('videoId')
3790         if video_id:
3791             entry = self._extract_video(video_renderer)
3792             if entry:
3793                 yield entry
3794         # playlist attachment
3795         playlist_id = try_get(
3796             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3797         if playlist_id:
3798             yield self.url_result(
3799                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3800                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3801         # inline video links
3802         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3803         for run in runs:
3804             if not isinstance(run, dict):
3805                 continue
3806             ep_url = try_get(
3807                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3808             if not ep_url:
3809                 continue
3810             if not YoutubeIE.suitable(ep_url):
3811                 continue
3812             ep_video_id = YoutubeIE._match_id(ep_url)
3813             if video_id == ep_video_id:
3814                 continue
3815             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3816
3817     def _post_thread_continuation_entries(self, post_thread_continuation):
3818         contents = post_thread_continuation.get('contents')
3819         if not isinstance(contents, list):
3820             return
3821         for content in contents:
3822             renderer = content.get('backstagePostThreadRenderer')
3823             if not isinstance(renderer, dict):
3824                 continue
3825             for entry in self._post_thread_entries(renderer):
3826                 yield entry
3827
3828     r''' # unused
3829     def _rich_grid_entries(self, contents):
3830         for content in contents:
3831             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3832             if video_renderer:
3833                 entry = self._video_entry(video_renderer)
3834                 if entry:
3835                     yield entry
3836     '''
3837     def _entries(self, tab, item_id, account_syncid, ytcfg):
3838
3839         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3840             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3841             for content in contents:
3842                 if not isinstance(content, dict):
3843                     continue
3844                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3845                 if not is_renderer:
3846                     renderer = content.get('richItemRenderer')
3847                     if renderer:
3848                         for entry in self._rich_entries(renderer):
3849                             yield entry
3850                         continuation_list[0] = self._extract_continuation(parent_renderer)
3851                     continue
3852                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3853                 for isr_content in isr_contents:
3854                     if not isinstance(isr_content, dict):
3855                         continue
3856
3857                     known_renderers = {
3858                         'playlistVideoListRenderer': self._playlist_entries,
3859                         'gridRenderer': self._grid_entries,
3860                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3861                         'backstagePostThreadRenderer': self._post_thread_entries,
3862                         'videoRenderer': lambda x: [self._video_entry(x)],
3863                     }
3864                     for key, renderer in isr_content.items():
3865                         if key not in known_renderers:
3866                             continue
3867                         for entry in known_renderers[key](renderer):
3868                             if entry:
3869                                 yield entry
3870                         continuation_list[0] = self._extract_continuation(renderer)
3871                         break
3872
3873                 if not continuation_list[0]:
3874                     continuation_list[0] = self._extract_continuation(is_renderer)
3875
3876             if not continuation_list[0]:
3877                 continuation_list[0] = self._extract_continuation(parent_renderer)
3878
3879         continuation_list = [None]  # Python 2 doesnot support nonlocal
3880         tab_content = try_get(tab, lambda x: x['content'], dict)
3881         if not tab_content:
3882             return
3883         parent_renderer = (
3884             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3885             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3886         for entry in extract_entries(parent_renderer):
3887             yield entry
3888         continuation = continuation_list[0]
3889         visitor_data = None
3890
3891         for page_num in itertools.count(1):
3892             if not continuation:
3893                 break
3894             headers = self.generate_api_headers(
3895                 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3896             response = self._extract_response(
3897                 item_id='%s page %s' % (item_id, page_num),
3898                 query=continuation, headers=headers, ytcfg=ytcfg,
3899                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3900
3901             if not response:
3902                 break
3903             visitor_data = try_get(
3904                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3905
3906             known_continuation_renderers = {
3907                 'playlistVideoListContinuation': self._playlist_entries,
3908                 'gridContinuation': self._grid_entries,
3909                 'itemSectionContinuation': self._post_thread_continuation_entries,
3910                 'sectionListContinuation': extract_entries,  # for feeds
3911             }
3912             continuation_contents = try_get(
3913                 response, lambda x: x['continuationContents'], dict) or {}
3914             continuation_renderer = None
3915             for key, value in continuation_contents.items():
3916                 if key not in known_continuation_renderers:
3917                     continue
3918                 continuation_renderer = value
3919                 continuation_list = [None]
3920                 for entry in known_continuation_renderers[key](continuation_renderer):
3921                     yield entry
3922                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3923                 break
3924             if continuation_renderer:
3925                 continue
3926
3927             known_renderers = {
3928                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3929                 'gridVideoRenderer': (self._grid_entries, 'items'),
3930                 'gridChannelRenderer': (self._grid_entries, 'items'),
3931                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3932                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3933                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3934                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3935             }
3936             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3937             continuation_items = try_get(
3938                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3939             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3940             video_items_renderer = None
3941             for key, value in continuation_item.items():
3942                 if key not in known_renderers:
3943                     continue
3944                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3945                 continuation_list = [None]
3946                 for entry in known_renderers[key][0](video_items_renderer):
3947                     yield entry
3948                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3949                 break
3950             if video_items_renderer:
3951                 continue
3952             break
3953
3954     @staticmethod
3955     def _extract_selected_tab(tabs):
3956         for tab in tabs:
3957             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3958             if renderer.get('selected') is True:
3959                 return renderer
3960         else:
3961             raise ExtractorError('Unable to find selected tab')
3962
3963     @classmethod
3964     def _extract_uploader(cls, data):
3965         uploader = {}
3966         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3967         owner = try_get(
3968             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3969         if owner:
3970             uploader['uploader'] = owner.get('text')
3971             uploader['uploader_id'] = try_get(
3972                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3973             uploader['uploader_url'] = urljoin(
3974                 'https://www.youtube.com/',
3975                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3976         return {k: v for k, v in uploader.items() if v is not None}
3977
3978     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3979         playlist_id = title = description = channel_url = channel_name = channel_id = None
3980         thumbnails_list = tags = []
3981
3982         selected_tab = self._extract_selected_tab(tabs)
3983         renderer = try_get(
3984             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3985         if renderer:
3986             channel_name = renderer.get('title')
3987             channel_url = renderer.get('channelUrl')
3988             channel_id = renderer.get('externalId')
3989         else:
3990             renderer = try_get(
3991                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3992
3993         if renderer:
3994             title = renderer.get('title')
3995             description = renderer.get('description', '')
3996             playlist_id = channel_id
3997             tags = renderer.get('keywords', '').split()
3998             thumbnails_list = (
3999                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
4000                 or try_get(
4001                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
4002                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
4003                     list)
4004                 or [])
4005
4006         thumbnails = []
4007         for t in thumbnails_list:
4008             if not isinstance(t, dict):
4009                 continue
4010             thumbnail_url = url_or_none(t.get('url'))
4011             if not thumbnail_url:
4012                 continue
4013             thumbnails.append({
4014                 'url': thumbnail_url,
4015                 'width': int_or_none(t.get('width')),
4016                 'height': int_or_none(t.get('height')),
4017             })
4018         if playlist_id is None:
4019             playlist_id = item_id
4020         if title is None:
4021             title = (
4022                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
4023                 or playlist_id)
4024         title += format_field(selected_tab, 'title', ' - %s')
4025         title += format_field(selected_tab, 'expandedText', ' - %s')
4026         metadata = {
4027             'playlist_id': playlist_id,
4028             'playlist_title': title,
4029             'playlist_description': description,
4030             'uploader': channel_name,
4031             'uploader_id': channel_id,
4032             'uploader_url': channel_url,
4033             'thumbnails': thumbnails,
4034             'tags': tags,
4035         }
4036         availability = self._extract_availability(data)
4037         if availability:
4038             metadata['availability'] = availability
4039         if not channel_id:
4040             metadata.update(self._extract_uploader(data))
4041         metadata.update({
4042             'channel': metadata['uploader'],
4043             'channel_id': metadata['uploader_id'],
4044             'channel_url': metadata['uploader_url']})
4045         ytcfg = self.extract_ytcfg(item_id, webpage)
4046         return self.playlist_result(
4047             self._entries(
4048                 selected_tab, playlist_id,
4049                 self._extract_account_syncid(ytcfg, data), ytcfg),
4050             **metadata)
4051
4052     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4053         first_id = last_id = None
4054         ytcfg = self.extract_ytcfg(playlist_id, webpage)
4055         headers = self.generate_api_headers(
4056             ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data))
4057         for page_num in itertools.count(1):
4058             videos = list(self._playlist_entries(playlist))
4059             if not videos:
4060                 return
4061             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4062             if start >= len(videos):
4063                 return
4064             for video in videos[start:]:
4065                 if video['id'] == first_id:
4066                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4067                     return
4068                 yield video
4069             first_id = first_id or videos[0]['id']
4070             last_id = videos[-1]['id']
4071             watch_endpoint = try_get(
4072                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4073             query = {
4074                 'playlistId': playlist_id,
4075                 'videoId': watch_endpoint.get('videoId') or last_id,
4076                 'index': watch_endpoint.get('index') or len(videos),
4077                 'params': watch_endpoint.get('params') or 'OAE%3D'
4078             }
4079             response = self._extract_response(
4080                 item_id='%s page %d' % (playlist_id, page_num),
4081                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4082                 check_get_keys='contents'
4083             )
4084             playlist = try_get(
4085                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4086
4087     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4088         title = playlist.get('title') or try_get(
4089             data, lambda x: x['titleText']['simpleText'], compat_str)
4090         playlist_id = playlist.get('playlistId') or item_id
4091
4092         # Delegating everything except mix playlists to regular tab-based playlist URL
4093         playlist_url = urljoin(url, try_get(
4094             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4095             compat_str))
4096         if playlist_url and playlist_url != url:
4097             return self.url_result(
4098                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4099                 video_title=title)
4100
4101         return self.playlist_result(
4102             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4103             playlist_id=playlist_id, playlist_title=title)
4104
4105     def _extract_availability(self, data):
4106         """
4107         Gets the availability of a given playlist/tab.
4108         Note: Unless YouTube tells us explicitly, we do not assume it is public
4109         @param data: response
4110         """
4111         is_private = is_unlisted = None
4112         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4113         badge_labels = self._extract_badges(renderer)
4114
4115         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4116         privacy_dropdown_entries = try_get(
4117             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4118         for renderer_dict in privacy_dropdown_entries:
4119             is_selected = try_get(
4120                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4121             if not is_selected:
4122                 continue
4123             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4124             if label:
4125                 badge_labels.add(label.lower())
4126                 break
4127
4128         for badge_label in badge_labels:
4129             if badge_label == 'unlisted':
4130                 is_unlisted = True
4131             elif badge_label == 'private':
4132                 is_private = True
4133             elif badge_label == 'public':
4134                 is_unlisted = is_private = False
4135         return self._availability(is_private, False, False, False, is_unlisted)
4136
4137     @staticmethod
4138     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4139         sidebar_renderer = try_get(
4140             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4141         for item in sidebar_renderer:
4142             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4143             if renderer:
4144                 return renderer
4145
4146     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4147         """
4148         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4149         """
4150         browse_id = params = None
4151         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4152         if not renderer:
4153             return
4154         menu_renderer = try_get(
4155             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4156         for menu_item in menu_renderer:
4157             if not isinstance(menu_item, dict):
4158                 continue
4159             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4160             text = try_get(
4161                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4162             if not text or text.lower() != 'show unavailable videos':
4163                 continue
4164             browse_endpoint = try_get(
4165                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4166             browse_id = browse_endpoint.get('browseId')
4167             params = browse_endpoint.get('params')
4168             break
4169
4170         ytcfg = self.extract_ytcfg(item_id, webpage)
4171         headers = self.generate_api_headers(
4172             ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4173             visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4174         query = {
4175             'params': params or 'wgYCCAA=',
4176             'browseId': browse_id or 'VL%s' % item_id
4177         }
4178         return self._extract_response(
4179             item_id=item_id, headers=headers, query=query,
4180             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4181             note='Downloading API JSON with unavailable videos')
4182
4183     def _extract_webpage(self, url, item_id):
4184         retries = self.get_param('extractor_retries', 3)
4185         count = -1
4186         last_error = 'Incomplete yt initial data recieved'
4187         while count < retries:
4188             count += 1
4189             # Sometimes youtube returns a webpage with incomplete ytInitialData
4190             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4191             if count:
4192                 self.report_warning('%s. Retrying ...' % last_error)
4193             webpage = self._download_webpage(
4194                 url, item_id,
4195                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4196             data = self.extract_yt_initial_data(item_id, webpage)
4197             if data.get('contents') or data.get('currentVideoEndpoint'):
4198                 break
4199             # Extract alerts here only when there is error
4200             self._extract_and_report_alerts(data)
4201             if count >= retries:
4202                 raise ExtractorError(last_error)
4203         return webpage, data
4204
4205     @staticmethod
4206     def _smuggle_data(entries, data):
4207         for entry in entries:
4208             if data:
4209                 entry['url'] = smuggle_url(entry['url'], data)
4210             yield entry
4211
4212     def _real_extract(self, url):
4213         url, smuggled_data = unsmuggle_url(url, {})
4214         if self.is_music_url(url):
4215             smuggled_data['is_music_url'] = True
4216         info_dict = self.__real_extract(url, smuggled_data)
4217         if info_dict.get('entries'):
4218             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4219         return info_dict
4220
4221     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4222
4223     def __real_extract(self, url, smuggled_data):
4224         item_id = self._match_id(url)
4225         url = compat_urlparse.urlunparse(
4226             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4227         compat_opts = self.get_param('compat_opts', [])
4228
4229         def get_mobj(url):
4230             mobj = self._url_re.match(url).groupdict()
4231             mobj.update((k, '') for k, v in mobj.items() if v is None)
4232             return mobj
4233
4234         mobj = get_mobj(url)
4235         # Youtube returns incomplete data if tabname is not lower case
4236         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4237
4238         if is_channel:
4239             if smuggled_data.get('is_music_url'):
4240                 if item_id[:2] == 'VL':
4241                     # Youtube music VL channels have an equivalent playlist
4242                     item_id = item_id[2:]
4243                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4244                 elif item_id[:2] == 'MP':
4245                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4246                     item_id = self._search_regex(
4247                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4248                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4249                         'playlist id')
4250                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4251                 elif mobj['channel_type'] == 'browse':
4252                     # Youtube music /browse/ should be changed to /channel/
4253                     pre = 'https://www.youtube.com/channel/%s' % item_id
4254         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4255             # Home URLs should redirect to /videos/
4256             self.report_warning(
4257                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4258                 'To download only the videos in the home page, add a "/featured" to the URL')
4259             tab = '/videos'
4260
4261         url = ''.join((pre, tab, post))
4262         mobj = get_mobj(url)
4263
4264         # Handle both video/playlist URLs
4265         qs = parse_qs(url)
4266         video_id = qs.get('v', [None])[0]
4267         playlist_id = qs.get('list', [None])[0]
4268
4269         if not video_id and mobj['not_channel'].startswith('watch'):
4270             if not playlist_id:
4271                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4272                 raise ExtractorError('Unable to recognize tab page')
4273             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4274             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4275             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4276             mobj = get_mobj(url)
4277
4278         if video_id and playlist_id:
4279             if self.get_param('noplaylist'):
4280                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4281                 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4282             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4283
4284         webpage, data = self._extract_webpage(url, item_id)
4285
4286         tabs = try_get(
4287             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4288         if tabs:
4289             selected_tab = self._extract_selected_tab(tabs)
4290             tab_name = selected_tab.get('title', '')
4291             if 'no-youtube-channel-redirect' not in compat_opts:
4292                 if mobj['tab'] == '/live':
4293                     # Live tab should have redirected to the video
4294                     raise ExtractorError('The channel is not currently live', expected=True)
4295                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4296                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4297                         # Topic channels don't have /videos. Use the equivalent playlist instead
4298                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4299                         pl_id = 'UU%s' % item_id[2:]
4300                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4301                         try:
4302                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4303                             for alert_type, alert_message in self._extract_alerts(pl_data):
4304                                 if alert_type == 'error':
4305                                     raise ExtractorError('Youtube said: %s' % alert_message)
4306                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4307                         except ExtractorError:
4308                             self.report_warning('The playlist gave error. Falling back to channel URL')
4309                     else:
4310                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4311
4312         self.write_debug('Final URL: %s' % url)
4313
4314         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4315         if 'no-youtube-unavailable-videos' not in compat_opts:
4316             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4317         self._extract_and_report_alerts(data, only_once=True)
4318         tabs = try_get(
4319             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4320         if tabs:
4321             return self._extract_from_tabs(item_id, webpage, data, tabs)
4322
4323         playlist = try_get(
4324             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4325         if playlist:
4326             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4327
4328         video_id = try_get(
4329             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4330             compat_str) or video_id
4331         if video_id:
4332             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4333                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4334             return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4335
4336         raise ExtractorError('Unable to recognize tab page')
4337
4338
4339 class YoutubePlaylistIE(InfoExtractor):
4340     IE_DESC = 'YouTube.com playlists'
4341     _VALID_URL = r'''(?x)(?:
4342                         (?:https?://)?
4343                         (?:\w+\.)?
4344                         (?:
4345                             (?:
4346                                 youtube(?:kids)?\.com|
4347                                 invidio\.us
4348                             )
4349                             /.*?\?.*?\blist=
4350                         )?
4351                         (?P<id>%(playlist_id)s)
4352                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4353     IE_NAME = 'youtube:playlist'
4354     _TESTS = [{
4355         'note': 'issue #673',
4356         'url': 'PLBB231211A4F62143',
4357         'info_dict': {
4358             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4359             'id': 'PLBB231211A4F62143',
4360             'uploader': 'Wickydoo',
4361             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4362             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4363         },
4364         'playlist_mincount': 29,
4365     }, {
4366         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4367         'info_dict': {
4368             'title': 'YDL_safe_search',
4369             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4370         },
4371         'playlist_count': 2,
4372         'skip': 'This playlist is private',
4373     }, {
4374         'note': 'embedded',
4375         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4376         'playlist_count': 4,
4377         'info_dict': {
4378             'title': 'JODA15',
4379             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4380             'uploader': 'milan',
4381             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4382         }
4383     }, {
4384         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4385         'playlist_mincount': 654,
4386         'info_dict': {
4387             'title': '2018 Chinese New Singles (11/6 updated)',
4388             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4389             'uploader': 'LBK',
4390             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4391             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4392         }
4393     }, {
4394         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4395         'only_matching': True,
4396     }, {
4397         # music album playlist
4398         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4399         'only_matching': True,
4400     }]
4401
4402     @classmethod
4403     def suitable(cls, url):
4404         if YoutubeTabIE.suitable(url):
4405             return False
4406         # Hack for lazy extractors until more generic solution is implemented
4407         # (see #28780)
4408         from .youtube import parse_qs
4409         qs = parse_qs(url)
4410         if qs.get('v', [None])[0]:
4411             return False
4412         return super(YoutubePlaylistIE, cls).suitable(url)
4413
4414     def _real_extract(self, url):
4415         playlist_id = self._match_id(url)
4416         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4417         url = update_url_query(
4418             'https://www.youtube.com/playlist',
4419             parse_qs(url) or {'list': playlist_id})
4420         if is_music_url:
4421             url = smuggle_url(url, {'is_music_url': True})
4422         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4423
4424
4425 class YoutubeYtBeIE(InfoExtractor):
4426     IE_DESC = 'youtu.be'
4427     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4428     _TESTS = [{
4429         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4430         'info_dict': {
4431             'id': 'yeWKywCrFtk',
4432             'ext': 'mp4',
4433             'title': 'Small Scale Baler and Braiding Rugs',
4434             'uploader': 'Backus-Page House Museum',
4435             'uploader_id': 'backuspagemuseum',
4436             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4437             'upload_date': '20161008',
4438             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4439             'categories': ['Nonprofits & Activism'],
4440             'tags': list,
4441             'like_count': int,
4442             'dislike_count': int,
4443         },
4444         'params': {
4445             'noplaylist': True,
4446             'skip_download': True,
4447         },
4448     }, {
4449         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4450         'only_matching': True,
4451     }]
4452
4453     def _real_extract(self, url):
4454         mobj = self._match_valid_url(url)
4455         video_id = mobj.group('id')
4456         playlist_id = mobj.group('playlist_id')
4457         return self.url_result(
4458             update_url_query('https://www.youtube.com/watch', {
4459                 'v': video_id,
4460                 'list': playlist_id,
4461                 'feature': 'youtu.be',
4462             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4463
4464
4465 class YoutubeYtUserIE(InfoExtractor):
4466     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4467     _VALID_URL = r'ytuser:(?P<id>.+)'
4468     _TESTS = [{
4469         'url': 'ytuser:phihag',
4470         'only_matching': True,
4471     }]
4472
4473     def _real_extract(self, url):
4474         user_id = self._match_id(url)
4475         return self.url_result(
4476             'https://www.youtube.com/user/%s' % user_id,
4477             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4478
4479
4480 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4481     IE_NAME = 'youtube:favorites'
4482     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4483     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4484     _LOGIN_REQUIRED = True
4485     _TESTS = [{
4486         'url': ':ytfav',
4487         'only_matching': True,
4488     }, {
4489         'url': ':ytfavorites',
4490         'only_matching': True,
4491     }]
4492
4493     def _real_extract(self, url):
4494         return self.url_result(
4495             'https://www.youtube.com/playlist?list=LL',
4496             ie=YoutubeTabIE.ie_key())
4497
4498
4499 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4500     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4501     # there doesn't appear to be a real limit, for example if you search for
4502     # 'python' you get more than 8.000.000 results
4503     _MAX_RESULTS = float('inf')
4504     IE_NAME = 'youtube:search'
4505     _SEARCH_KEY = 'ytsearch'
4506     _SEARCH_PARAMS = None
4507     _TESTS = []
4508
4509     def _entries(self, query, n):
4510         data = {'query': query}
4511         if self._SEARCH_PARAMS:
4512             data['params'] = self._SEARCH_PARAMS
4513         total = 0
4514         continuation = {}
4515         for page_num in itertools.count(1):
4516             data.update(continuation)
4517             search = self._extract_response(
4518                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4519                 check_get_keys=('contents', 'onResponseReceivedCommands')
4520             )
4521             if not search:
4522                 break
4523             slr_contents = try_get(
4524                 search,
4525                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4526                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4527                 list)
4528             if not slr_contents:
4529                 break
4530
4531             # Youtube sometimes adds promoted content to searches,
4532             # changing the index location of videos and token.
4533             # So we search through all entries till we find them.
4534             continuation = None
4535             for slr_content in slr_contents:
4536                 if not continuation:
4537                     continuation = self._extract_continuation({'contents': [slr_content]})
4538
4539                 isr_contents = try_get(
4540                     slr_content,
4541                     lambda x: x['itemSectionRenderer']['contents'],
4542                     list)
4543                 if not isr_contents:
4544                     continue
4545                 for content in isr_contents:
4546                     if not isinstance(content, dict):
4547                         continue
4548                     video = content.get('videoRenderer')
4549                     if not isinstance(video, dict):
4550                         continue
4551                     video_id = video.get('videoId')
4552                     if not video_id:
4553                         continue
4554
4555                     yield self._extract_video(video)
4556                     total += 1
4557                     if total == n:
4558                         return
4559
4560             if not continuation:
4561                 break
4562
4563     def _get_n_results(self, query, n):
4564         """Get a specified number of results for a query"""
4565         return self.playlist_result(self._entries(query, n), query, query)
4566
4567
4568 class YoutubeSearchDateIE(YoutubeSearchIE):
4569     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4570     _SEARCH_KEY = 'ytsearchdate'
4571     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4572     _SEARCH_PARAMS = 'CAI%3D'
4573
4574
4575 class YoutubeSearchURLIE(YoutubeSearchIE):
4576     IE_DESC = 'YouTube.com search URLs'
4577     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4578     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4579     # _MAX_RESULTS = 100
4580     _TESTS = [{
4581         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4582         'playlist_mincount': 5,
4583         'info_dict': {
4584             'id': 'youtube-dl test video',
4585             'title': 'youtube-dl test video',
4586         }
4587     }, {
4588         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4589         'only_matching': True,
4590     }]
4591
4592     @classmethod
4593     def _make_valid_url(cls):
4594         return cls._VALID_URL
4595
4596     def _real_extract(self, url):
4597         qs = parse_qs(url)
4598         query = (qs.get('search_query') or qs.get('q'))[0]
4599         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4600         return self._get_n_results(query, self._MAX_RESULTS)
4601
4602
4603 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4604     """
4605     Base class for feed extractors
4606     Subclasses must define the _FEED_NAME property.
4607     """
4608     _LOGIN_REQUIRED = True
4609     _TESTS = []
4610
4611     @property
4612     def IE_NAME(self):
4613         return 'youtube:%s' % self._FEED_NAME
4614
4615     def _real_extract(self, url):
4616         return self.url_result(
4617             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4618             ie=YoutubeTabIE.ie_key())
4619
4620
4621 class YoutubeWatchLaterIE(InfoExtractor):
4622     IE_NAME = 'youtube:watchlater'
4623     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4624     _VALID_URL = r':ytwatchlater'
4625     _TESTS = [{
4626         'url': ':ytwatchlater',
4627         'only_matching': True,
4628     }]
4629
4630     def _real_extract(self, url):
4631         return self.url_result(
4632             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4633
4634
4635 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4636     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4637     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4638     _FEED_NAME = 'recommended'
4639     _LOGIN_REQUIRED = False
4640     _TESTS = [{
4641         'url': ':ytrec',
4642         'only_matching': True,
4643     }, {
4644         'url': ':ytrecommended',
4645         'only_matching': True,
4646     }, {
4647         'url': 'https://youtube.com',
4648         'only_matching': True,
4649     }]
4650
4651
4652 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4653     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4654     _VALID_URL = r':ytsub(?:scription)?s?'
4655     _FEED_NAME = 'subscriptions'
4656     _TESTS = [{
4657         'url': ':ytsubs',
4658         'only_matching': True,
4659     }, {
4660         'url': ':ytsubscriptions',
4661         'only_matching': True,
4662     }]
4663
4664
4665 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4666     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4667     _VALID_URL = r':ythis(?:tory)?'
4668     _FEED_NAME = 'history'
4669     _TESTS = [{
4670         'url': ':ythistory',
4671         'only_matching': True,
4672     }]
4673
4674
4675 class YoutubeTruncatedURLIE(InfoExtractor):
4676     IE_NAME = 'youtube:truncated_url'
4677     IE_DESC = False  # Do not list
4678     _VALID_URL = r'''(?x)
4679         (?:https?://)?
4680         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4681         (?:watch\?(?:
4682             feature=[a-z_]+|
4683             annotation_id=annotation_[^&]+|
4684             x-yt-cl=[0-9]+|
4685             hl=[^&]*|
4686             t=[0-9]+
4687         )?
4688         |
4689             attribution_link\?a=[^&]+
4690         )
4691         $
4692     '''
4693
4694     _TESTS = [{
4695         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4696         'only_matching': True,
4697     }, {
4698         'url': 'https://www.youtube.com/watch?',
4699         'only_matching': True,
4700     }, {
4701         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4702         'only_matching': True,
4703     }, {
4704         'url': 'https://www.youtube.com/watch?feature=foo',
4705         'only_matching': True,
4706     }, {
4707         'url': 'https://www.youtube.com/watch?hl=en-GB',
4708         'only_matching': True,
4709     }, {
4710         'url': 'https://www.youtube.com/watch?t=2372',
4711         'only_matching': True,
4712     }]
4713
4714     def _real_extract(self, url):
4715         raise ExtractorError(
4716             'Did you forget to quote the URL? Remember that & is a meta '
4717             'character in most shells, so you want to put the URL in quotes, '
4718             'like  youtube-dl '
4719             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4720             ' or simply  youtube-dl BaW_jenozKc  .',
4721             expected=True)
4722
4723
4724 class YoutubeClipIE(InfoExtractor):
4725     IE_NAME = 'youtube:clip'
4726     IE_DESC = False  # Do not list
4727     _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4728
4729     def _real_extract(self, url):
4730         self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4731         return self.url_result(url, 'Generic')
4732
4733
4734 class YoutubeTruncatedIDIE(InfoExtractor):
4735     IE_NAME = 'youtube:truncated_id'
4736     IE_DESC = False  # Do not list
4737     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4738
4739     _TESTS = [{
4740         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4741         'only_matching': True,
4742     }]
4743
4744     def _real_extract(self, url):
4745         video_id = self._match_id(url)
4746         raise ExtractorError(
4747             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4748             expected=True)