yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     is_html,
  42     mimetype2ext,
  43     network_exceptions,
  44     orderedSet,
  45     parse_codecs,
  46     parse_count,
  47     parse_duration,
  48     parse_iso8601,
  49     parse_qs,
  50     qualities,
  51     remove_start,
  52     smuggle_url,
  53     str_or_none,
  54     str_to_int,
  55     traverse_obj,
  56     try_get,
  57     unescapeHTML,
  58     unified_strdate,
  59     unsmuggle_url,
  60     update_url_query,
  61     url_or_none,
  62     urlencode_postdata,
  63     urljoin,
  64     variadic,
  65 )
  66
  67
  68 # any clients starting with _ cannot be explicity requested by the user
  69 INNERTUBE_CLIENTS = {
  70     'web': {
  71         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  72         'INNERTUBE_CONTEXT': {
  73             'client': {
  74                 'clientName': 'WEB',
  75                 'clientVersion': '2.20210622.10.00',
  76             }
  77         },
  78         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  79     },
  80     'web_embedded': {
  81         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  82         'INNERTUBE_CONTEXT': {
  83             'client': {
  84                 'clientName': 'WEB_EMBEDDED_PLAYER',
  85                 'clientVersion': '1.20210620.0.1',
  86             },
  87         },
  88         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  89     },
  90     'web_music': {
  91         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  92         'INNERTUBE_HOST': 'music.youtube.com',
  93         'INNERTUBE_CONTEXT': {
  94             'client': {
  95                 'clientName': 'WEB_REMIX',
  96                 'clientVersion': '1.20210621.00.00',
  97             }
  98         },
  99         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 100     },
 101     'web_creator': {
 102         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 103         'INNERTUBE_CONTEXT': {
 104             'client': {
 105                 'clientName': 'WEB_CREATOR',
 106                 'clientVersion': '1.20210621.00.00',
 107             }
 108         },
 109         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 110     },
 111     'android': {
 112         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 113         'INNERTUBE_CONTEXT': {
 114             'client': {
 115                 'clientName': 'ANDROID',
 116                 'clientVersion': '16.20',
 117             }
 118         },
 119         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 120     },
 121     'android_embedded': {
 122         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 123         'INNERTUBE_CONTEXT': {
 124             'client': {
 125                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 126                 'clientVersion': '16.20',
 127             },
 128         },
 129         'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 130     },
 131     'android_music': {
 132         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 133         'INNERTUBE_HOST': 'music.youtube.com',
 134         'INNERTUBE_CONTEXT': {
 135             'client': {
 136                 'clientName': 'ANDROID_MUSIC',
 137                 'clientVersion': '4.32',
 138             }
 139         },
 140         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 141     },
 142     'android_creator': {
 143         'INNERTUBE_CONTEXT': {
 144             'client': {
 145                 'clientName': 'ANDROID_CREATOR',
 146                 'clientVersion': '21.24.100',
 147             },
 148         },
 149         'INNERTUBE_CONTEXT_CLIENT_NAME': 14
 150     },
 151     # ios has HLS live streams
 152     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 153     'ios': {
 154         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 155         'INNERTUBE_CONTEXT': {
 156             'client': {
 157                 'clientName': 'IOS',
 158                 'clientVersion': '16.20',
 159             }
 160         },
 161         'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 162     },
 163     'ios_embedded': {
 164         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 165         'INNERTUBE_CONTEXT': {
 166             'client': {
 167                 'clientName': 'IOS_MESSAGES_EXTENSION',
 168                 'clientVersion': '16.20',
 169             },
 170         },
 171         'INNERTUBE_CONTEXT_CLIENT_NAME': 66
 172     },
 173     'ios_music': {
 174         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 175         'INNERTUBE_HOST': 'music.youtube.com',
 176         'INNERTUBE_CONTEXT': {
 177             'client': {
 178                 'clientName': 'IOS_MUSIC',
 179                 'clientVersion': '4.32',
 180             },
 181         },
 182         'INNERTUBE_CONTEXT_CLIENT_NAME': 26
 183     },
 184     'ios_creator': {
 185         'INNERTUBE_CONTEXT': {
 186             'client': {
 187                 'clientName': 'IOS_CREATOR',
 188                 'clientVersion': '21.24.100',
 189             },
 190         },
 191         'INNERTUBE_CONTEXT_CLIENT_NAME': 15
 192     },
 193     # mweb has 'ultralow' formats
 194     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 195     'mweb': {
 196         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 197         'INNERTUBE_CONTEXT': {
 198             'client': {
 199                 'clientName': 'MWEB',
 200                 'clientVersion': '2.20210721.07.00',
 201             }
 202         },
 203         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 204     },
 205 }
 206
 207
 208 def build_innertube_clients():
 209     third_party = {
 210         'embedUrl': 'https://google.com',  # Can be any valid URL
 211     }
 212     base_clients = ('android', 'web', 'ios', 'mweb')
 213     priority = qualities(base_clients[::-1])
 214
 215     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 216         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 217         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 218         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 219         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 220
 221         if client in base_clients:
 222             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 223             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 224             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 225             agegate_ytcfg['priority'] -= 1
 226         elif client.endswith('_embedded'):
 227             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 228             ytcfg['priority'] -= 2
 229         else:
 230             ytcfg['priority'] -= 3
 231
 232
 233 build_innertube_clients()
 234
 235
 236 class YoutubeBaseInfoExtractor(InfoExtractor):
 237     """Provide base functions for Youtube extractors"""
 238
 239     _RESERVED_NAMES = (
 240         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
 241         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 242         r'browse|oembed|get_video_info|iframe_api|s/player|'
 243         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 244
 245     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 246
 247     _NETRC_MACHINE = 'youtube'
 248
 249     # If True it will raise an error if no login info is provided
 250     _LOGIN_REQUIRED = False
 251
 252     r'''  # Unused since login is broken
 253     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 254     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
 255
 256     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
 257     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
 258     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 259     '''
 260
 261     def _login(self):
 262         """
 263         Attempt to log in to YouTube.
 264         True is returned if successful or skipped.
 265         False is returned if login failed.
 266
 267         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 268         """
 269
 270         def warn(message):
 271             self.report_warning(message)
 272
 273         # username+password login is broken
 274         if (self._LOGIN_REQUIRED
 275                 and self.get_param('cookiefile') is None
 276                 and self.get_param('cookiesfrombrowser') is None):
 277             self.raise_login_required(
 278                 'Login details are needed to download this content', method='cookies')
 279         username, password = self._get_login_info()
 280         if username:
 281             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 282         return
 283
 284         # Everything below this is broken!
 285         r'''
 286         # No authentication to be performed
 287         if username is None:
 288             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 289                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 290             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 291             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 292             return True
 293
 294         login_page = self._download_webpage(
 295             self._LOGIN_URL, None,
 296             note='Downloading login page',
 297             errnote='unable to fetch login page', fatal=False)
 298         if login_page is False:
 299             return
 300
 301         login_form = self._hidden_inputs(login_page)
 302
 303         def req(url, f_req, note, errnote):
 304             data = login_form.copy()
 305             data.update({
 306                 'pstMsg': 1,
 307                 'checkConnection': 'youtube',
 308                 'checkedDomains': 'youtube',
 309                 'hl': 'en',
 310                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 311                 'f.req': json.dumps(f_req),
 312                 'flowName': 'GlifWebSignIn',
 313                 'flowEntry': 'ServiceLogin',
 314                 # TODO: reverse actual botguard identifier generation algo
 315                 'bgRequest': '["identifier",""]',
 316             })
 317             return self._download_json(
 318                 url, None, note=note, errnote=errnote,
 319                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 320                 fatal=False,
 321                 data=urlencode_postdata(data), headers={
 322                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 323                     'Google-Accounts-XSRF': 1,
 324                 })
 325
 326         lookup_req = [
 327             username,
 328             None, [], None, 'US', None, None, 2, False, True,
 329             [
 330                 None, None,
 331                 [2, 1, None, 1,
 332                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 333                  None, [], 4],
 334                 1, [None, None, []], None, None, None, True
 335             ],
 336             username,
 337         ]
 338
 339         lookup_results = req(
 340             self._LOOKUP_URL, lookup_req,
 341             'Looking up account info', 'Unable to look up account info')
 342
 343         if lookup_results is False:
 344             return False
 345
 346         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 347         if not user_hash:
 348             warn('Unable to extract user hash')
 349             return False
 350
 351         challenge_req = [
 352             user_hash,
 353             None, 1, None, [1, None, None, None, [password, None, True]],
 354             [
 355                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 356                 1, [None, None, []], None, None, None, True
 357             ]]
 358
 359         challenge_results = req(
 360             self._CHALLENGE_URL, challenge_req,
 361             'Logging in', 'Unable to log in')
 362
 363         if challenge_results is False:
 364             return
 365
 366         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 367         if login_res:
 368             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 369             warn(
 370                 'Unable to login: %s' % 'Invalid password'
 371                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 372             return False
 373
 374         res = try_get(challenge_results, lambda x: x[0][-1], list)
 375         if not res:
 376             warn('Unable to extract result entry')
 377             return False
 378
 379         login_challenge = try_get(res, lambda x: x[0][0], list)
 380         if login_challenge:
 381             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 382             if challenge_str == 'TWO_STEP_VERIFICATION':
 383                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 384                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 385                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 386                 if status == 'QUOTA_EXCEEDED':
 387                     warn('Exceeded the limit of TFA codes, try later')
 388                     return False
 389
 390                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 391                 if not tl:
 392                     warn('Unable to extract TL')
 393                     return False
 394
 395                 tfa_code = self._get_tfa_info('2-step verification code')
 396
 397                 if not tfa_code:
 398                     warn(
 399                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 400                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 401                     return False
 402
 403                 tfa_code = remove_start(tfa_code, 'G-')
 404
 405                 tfa_req = [
 406                     user_hash, None, 2, None,
 407                     [
 408                         9, None, None, None, None, None, None, None,
 409                         [None, tfa_code, True, 2]
 410                     ]]
 411
 412                 tfa_results = req(
 413                     self._TFA_URL.format(tl), tfa_req,
 414                     'Submitting TFA code', 'Unable to submit TFA code')
 415
 416                 if tfa_results is False:
 417                     return False
 418
 419                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 420                 if tfa_res:
 421                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 422                     warn(
 423                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 424                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 425                     return False
 426
 427                 check_cookie_url = try_get(
 428                     tfa_results, lambda x: x[0][-1][2], compat_str)
 429             else:
 430                 CHALLENGES = {
 431                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 432                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 433                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 434                 }
 435                 challenge = CHALLENGES.get(
 436                     challenge_str,
 437                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 438                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 439                 return False
 440         else:
 441             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 442
 443         if not check_cookie_url:
 444             warn('Unable to extract CheckCookie URL')
 445             return False
 446
 447         check_cookie_results = self._download_webpage(
 448             check_cookie_url, None, 'Checking cookie', fatal=False)
 449
 450         if check_cookie_results is False:
 451             return False
 452
 453         if 'https://myaccount.google.com/' not in check_cookie_results:
 454             warn('Unable to log in')
 455             return False
 456
 457         return True
 458         '''
 459
 460     def _initialize_consent(self):
 461         cookies = self._get_cookies('https://www.youtube.com/')
 462         if cookies.get('__Secure-3PSID'):
 463             return
 464         consent_id = None
 465         consent = cookies.get('CONSENT')
 466         if consent:
 467             if 'YES' in consent.value:
 468                 return
 469             consent_id = self._search_regex(
 470                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 471         if not consent_id:
 472             consent_id = random.randint(100, 999)
 473         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 474
 475     def _real_initialize(self):
 476         self._initialize_consent()
 477         if self._downloader is None:
 478             return
 479         if not self._login():
 480             return
 481
 482     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 483     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 484     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 485
 486     def _get_default_ytcfg(self, client='web'):
 487         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 488
 489     def _get_innertube_host(self, client='web'):
 490         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 491
 492     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 493         # try_get but with fallback to default ytcfg client values when present
 494         _func = lambda y: try_get(y, getter, expected_type)
 495         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 496
 497     def _extract_client_name(self, ytcfg, default_client='web'):
 498         return self._ytcfg_get_safe(
 499             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 500                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 501
 502     @staticmethod
 503     def _extract_session_index(*data):
 504         for ytcfg in data:
 505             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 506             if session_index is not None:
 507                 return session_index
 508
 509     def _extract_client_version(self, ytcfg, default_client='web'):
 510         return self._ytcfg_get_safe(
 511             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 512                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 513
 514     def _extract_api_key(self, ytcfg=None, default_client='web'):
 515         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 516
 517     def _extract_context(self, ytcfg=None, default_client='web'):
 518         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 519         context = _get_context(ytcfg)
 520         if context:
 521             return context
 522
 523         context = _get_context(self._get_default_ytcfg(default_client))
 524         if not ytcfg:
 525             return context
 526
 527         # Recreate the client context (required)
 528         context['client'].update({
 529             'clientVersion': self._extract_client_version(ytcfg, default_client),
 530             'clientName': self._extract_client_name(ytcfg, default_client),
 531         })
 532         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 533         if visitor_data:
 534             context['client']['visitorData'] = visitor_data
 535         return context
 536
 537     _SAPISID = None
 538
 539     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 540         time_now = round(time.time())
 541         if self._SAPISID is None:
 542             yt_cookies = self._get_cookies('https://www.youtube.com')
 543             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 544             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 545             sapisid_cookie = dict_get(
 546                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 547             if sapisid_cookie and sapisid_cookie.value:
 548                 self._SAPISID = sapisid_cookie.value
 549                 self.write_debug('Extracted SAPISID cookie')
 550                 # SAPISID cookie is required if not already present
 551                 if not yt_cookies.get('SAPISID'):
 552                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 553                     self._set_cookie(
 554                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 555             else:
 556                 self._SAPISID = False
 557         if not self._SAPISID:
 558             return None
 559         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 560         sapisidhash = hashlib.sha1(
 561             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 562         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 563
 564     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 565                   note='Downloading API JSON', errnote='Unable to download API page',
 566                   context=None, api_key=None, api_hostname=None, default_client='web'):
 567
 568         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 569         data.update(query)
 570         real_headers = self.generate_api_headers(default_client=default_client)
 571         real_headers.update({'content-type': 'application/json'})
 572         if headers:
 573             real_headers.update(headers)
 574         return self._download_json(
 575             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 576             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 577             data=json.dumps(data).encode('utf8'), headers=real_headers,
 578             query={'key': api_key or self._extract_api_key()})
 579
 580     def extract_yt_initial_data(self, video_id, webpage):
 581         return self._parse_json(
 582             self._search_regex(
 583                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 584                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 585             video_id)
 586
 587     def _extract_identity_token(self, webpage, item_id):
 588         if not webpage:
 589             return None
 590         ytcfg = self.extract_ytcfg(item_id, webpage)
 591         if ytcfg:
 592             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 593             if token:
 594                 return token
 595         return self._search_regex(
 596             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 597             'identity token', default=None)
 598
 599     @staticmethod
 600     def _extract_account_syncid(*args):
 601         """
 602         Extract syncId required to download private playlists of secondary channels
 603         @params response and/or ytcfg
 604         """
 605         for data in args:
 606             # ytcfg includes channel_syncid if on secondary channel
 607             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 608             if delegated_sid:
 609                 return delegated_sid
 610             sync_ids = (try_get(
 611                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 612                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 613             if len(sync_ids) >= 2 and sync_ids[1]:
 614                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 615                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 616                 return sync_ids[0]
 617
 618     def extract_ytcfg(self, video_id, webpage):
 619         if not webpage:
 620             return {}
 621         return self._parse_json(
 622             self._search_regex(
 623                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 624                 default='{}'), video_id, fatal=False) or {}
 625
 626     def generate_api_headers(
 627             self, ytcfg=None, identity_token=None, account_syncid=None,
 628             visitor_data=None, api_hostname=None, default_client='web', session_index=None):
 629         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 630         headers = {
 631             'X-YouTube-Client-Name': compat_str(
 632                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 633             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 634             'Origin': origin
 635         }
 636         if not visitor_data and ytcfg:
 637             visitor_data = try_get(
 638                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 639         if identity_token:
 640             headers['X-Youtube-Identity-Token'] = identity_token
 641         if account_syncid:
 642             headers['X-Goog-PageId'] = account_syncid
 643         if session_index is None and ytcfg:
 644             session_index = self._extract_session_index(ytcfg)
 645         if account_syncid or session_index is not None:
 646             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 647         if visitor_data:
 648             headers['X-Goog-Visitor-Id'] = visitor_data
 649         auth = self._generate_sapisidhash_header(origin)
 650         if auth is not None:
 651             headers['Authorization'] = auth
 652             headers['X-Origin'] = origin
 653         return headers
 654
 655     @staticmethod
 656     def _build_api_continuation_query(continuation, ctp=None):
 657         query = {
 658             'continuation': continuation
 659         }
 660         # TODO: Inconsistency with clickTrackingParams.
 661         # Currently we have a fixed ctp contained within context (from ytcfg)
 662         # and a ctp in root query for continuation.
 663         if ctp:
 664             query['clickTracking'] = {'clickTrackingParams': ctp}
 665         return query
 666
 667     @classmethod
 668     def _extract_next_continuation_data(cls, renderer):
 669         next_continuation = try_get(
 670             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 671                        lambda x: x['continuation']['reloadContinuationData']), dict)
 672         if not next_continuation:
 673             return
 674         continuation = next_continuation.get('continuation')
 675         if not continuation:
 676             return
 677         ctp = next_continuation.get('clickTrackingParams')
 678         return cls._build_api_continuation_query(continuation, ctp)
 679
 680     @classmethod
 681     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 682         if isinstance(continuation_ep, dict):
 683             continuation = try_get(
 684                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 685             if not continuation:
 686                 return
 687             ctp = continuation_ep.get('clickTrackingParams')
 688             return cls._build_api_continuation_query(continuation, ctp)
 689
 690     @classmethod
 691     def _extract_continuation(cls, renderer):
 692         next_continuation = cls._extract_next_continuation_data(renderer)
 693         if next_continuation:
 694             return next_continuation
 695
 696         contents = []
 697         for key in ('contents', 'items'):
 698             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 699
 700         for content in contents:
 701             if not isinstance(content, dict):
 702                 continue
 703             continuation_ep = try_get(
 704                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 705                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 706                 dict)
 707             continuation = cls._extract_continuation_ep_data(continuation_ep)
 708             if continuation:
 709                 return continuation
 710
 711     @classmethod
 712     def _extract_alerts(cls, data):
 713         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 714             if not isinstance(alert_dict, dict):
 715                 continue
 716             for alert in alert_dict.values():
 717                 alert_type = alert.get('type')
 718                 if not alert_type:
 719                     continue
 720                 message = cls._get_text(alert, 'text')
 721                 if message:
 722                     yield alert_type, message
 723
 724     def _report_alerts(self, alerts, expected=True, fatal=True):
 725         errors = []
 726         warnings = []
 727         for alert_type, alert_message in alerts:
 728             if alert_type.lower() == 'error' and fatal:
 729                 errors.append([alert_type, alert_message])
 730             else:
 731                 warnings.append([alert_type, alert_message])
 732
 733         for alert_type, alert_message in (warnings + errors[:-1]):
 734             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 735         if errors:
 736             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 737
 738     def _extract_and_report_alerts(self, data, *args, **kwargs):
 739         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 740
 741     def _extract_badges(self, renderer: dict):
 742         badges = set()
 743         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 744             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 745             if label:
 746                 badges.add(label.lower())
 747         return badges
 748
 749     @staticmethod
 750     def _get_text(data, *path_list, max_runs=None):
 751         for path in path_list or [None]:
 752             if path is None:
 753                 obj = [data]
 754             else:
 755                 obj = traverse_obj(data, path, default=[])
 756                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 757                     obj = [obj]
 758             for item in obj:
 759                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 760                 if text:
 761                     return text
 762                 runs = try_get(item, lambda x: x['runs'], list) or []
 763                 if not runs and isinstance(item, list):
 764                     runs = item
 765
 766                 runs = runs[:min(len(runs), max_runs or len(runs))]
 767                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 768                 if text:
 769                     return text
 770
 771     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 772                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 773                           default_client='web'):
 774         response = None
 775         last_error = None
 776         count = -1
 777         retries = self.get_param('extractor_retries', 3)
 778         if check_get_keys is None:
 779             check_get_keys = []
 780         while count < retries:
 781             count += 1
 782             if last_error:
 783                 self.report_warning('%s. Retrying ...' % last_error)
 784             try:
 785                 response = self._call_api(
 786                     ep=ep, fatal=True, headers=headers,
 787                     video_id=item_id, query=query,
 788                     context=self._extract_context(ytcfg, default_client),
 789                     api_key=self._extract_api_key(ytcfg, default_client),
 790                     api_hostname=api_hostname, default_client=default_client,
 791                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 792             except ExtractorError as e:
 793                 if isinstance(e.cause, network_exceptions):
 794                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 795                         e.cause.seek(0)
 796                         yt_error = try_get(
 797                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 798                             lambda x: x['error']['message'], compat_str)
 799                         if yt_error:
 800                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 801                     # Downloading page may result in intermittent 5xx HTTP error
 802                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 803                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 804                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 805                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 806                         last_error = error_to_compat_str(e.cause or e)
 807                         if count < retries:
 808                             continue
 809                 if fatal:
 810                     raise
 811                 else:
 812                     self.report_warning(error_to_compat_str(e))
 813                     return
 814
 815             else:
 816                 # Youtube may send alerts if there was an issue with the continuation page
 817                 try:
 818                     self._extract_and_report_alerts(response, expected=False)
 819                 except ExtractorError as e:
 820                     if fatal:
 821                         raise
 822                     self.report_warning(error_to_compat_str(e))
 823                     return
 824                 if not check_get_keys or dict_get(response, check_get_keys):
 825                     break
 826                 # Youtube sometimes sends incomplete data
 827                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 828                 last_error = 'Incomplete data received'
 829                 if count >= retries:
 830                     if fatal:
 831                         raise ExtractorError(last_error)
 832                     else:
 833                         self.report_warning(last_error)
 834                         return
 835         return response
 836
 837     @staticmethod
 838     def is_music_url(url):
 839         return re.match(r'https?://music\.youtube\.com/', url) is not None
 840
 841     def _extract_video(self, renderer):
 842         video_id = renderer.get('videoId')
 843         title = self._get_text(renderer, 'title')
 844         description = self._get_text(renderer, 'descriptionSnippet')
 845         duration = parse_duration(self._get_text(
 846             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 847         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 848         view_count = str_to_int(self._search_regex(
 849             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 850             'view count', default=None))
 851
 852         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 853
 854         return {
 855             '_type': 'url',
 856             'ie_key': YoutubeIE.ie_key(),
 857             'id': video_id,
 858             'url': video_id,
 859             'title': title,
 860             'description': description,
 861             'duration': duration,
 862             'view_count': view_count,
 863             'uploader': uploader,
 864         }
 865
 866
 867 class YoutubeIE(YoutubeBaseInfoExtractor):
 868     IE_DESC = 'YouTube.com'
 869     _INVIDIOUS_SITES = (
 870         # invidious-redirect websites
 871         r'(?:www\.)?redirect\.invidious\.io',
 872         r'(?:(?:www|dev)\.)?invidio\.us',
 873         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 874         r'(?:www\.)?invidious\.pussthecat\.org',
 875         r'(?:www\.)?invidious\.zee\.li',
 876         r'(?:www\.)?invidious\.ethibox\.fr',
 877         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 878         # youtube-dl invidious instances list
 879         r'(?:(?:www|no)\.)?invidiou\.sh',
 880         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 881         r'(?:www\.)?invidious\.kabi\.tk',
 882         r'(?:www\.)?invidious\.mastodon\.host',
 883         r'(?:www\.)?invidious\.zapashcanon\.fr',
 884         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 885         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 886         r'(?:www\.)?invidious\.himiko\.cloud',
 887         r'(?:www\.)?invidious\.reallyancient\.tech',
 888         r'(?:www\.)?invidious\.tube',
 889         r'(?:www\.)?invidiou\.site',
 890         r'(?:www\.)?invidious\.site',
 891         r'(?:www\.)?invidious\.xyz',
 892         r'(?:www\.)?invidious\.nixnet\.xyz',
 893         r'(?:www\.)?invidious\.048596\.xyz',
 894         r'(?:www\.)?invidious\.drycat\.fr',
 895         r'(?:www\.)?inv\.skyn3t\.in',
 896         r'(?:www\.)?tube\.poal\.co',
 897         r'(?:www\.)?tube\.connect\.cafe',
 898         r'(?:www\.)?vid\.wxzm\.sx',
 899         r'(?:www\.)?vid\.mint\.lgbt',
 900         r'(?:www\.)?vid\.puffyan\.us',
 901         r'(?:www\.)?yewtu\.be',
 902         r'(?:www\.)?yt\.elukerio\.org',
 903         r'(?:www\.)?yt\.lelux\.fi',
 904         r'(?:www\.)?invidious\.ggc-project\.de',
 905         r'(?:www\.)?yt\.maisputain\.ovh',
 906         r'(?:www\.)?ytprivate\.com',
 907         r'(?:www\.)?invidious\.13ad\.de',
 908         r'(?:www\.)?invidious\.toot\.koeln',
 909         r'(?:www\.)?invidious\.fdn\.fr',
 910         r'(?:www\.)?watch\.nettohikari\.com',
 911         r'(?:www\.)?invidious\.namazso\.eu',
 912         r'(?:www\.)?invidious\.silkky\.cloud',
 913         r'(?:www\.)?invidious\.exonip\.de',
 914         r'(?:www\.)?invidious\.riverside\.rocks',
 915         r'(?:www\.)?invidious\.blamefran\.net',
 916         r'(?:www\.)?invidious\.moomoo\.de',
 917         r'(?:www\.)?ytb\.trom\.tf',
 918         r'(?:www\.)?yt\.cyberhost\.uk',
 919         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 920         r'(?:www\.)?qklhadlycap4cnod\.onion',
 921         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 922         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 923         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 924         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 925         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 926         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 927         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 928         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 929         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 930         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 931     )
 932     _VALID_URL = r"""(?x)^
 933                      (
 934                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 935                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 936                             (?:www\.)?deturl\.com/www\.youtube\.com|
 937                             (?:www\.)?pwnyoutube\.com|
 938                             (?:www\.)?hooktube\.com|
 939                             (?:www\.)?yourepeat\.com|
 940                             tube\.majestyc\.net|
 941                             %(invidious)s|
 942                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 943                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 944                          (?:                                                  # the various things that can precede the ID:
 945                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 946                              |(?:                                             # or the v= param in all its forms
 947                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 948                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 949                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 950                                  v=
 951                              )
 952                          ))
 953                          |(?:
 954                             youtu\.be|                                        # just youtu.be/xxxx
 955                             vid\.plus|                                        # or vid.plus/xxxx
 956                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 957                             %(invidious)s
 958                          )/
 959                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 960                          )
 961                      )?                                                       # all until now is optional -> you can pass the naked ID
 962                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 963                      (?(1).+)?                                                # if we found the ID, everything can follow
 964                      (?:\#|$)""" % {
 965         'invidious': '|'.join(_INVIDIOUS_SITES),
 966     }
 967     _PLAYER_INFO_RE = (
 968         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 969         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 970         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 971     )
 972     _formats = {
 973         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 974         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 975         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 976         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 977         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 978         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 979         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 980         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 981         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 982         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 983         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 984         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 985         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 986         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 987         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 988         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 989         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 990         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 991
 992
 993         # 3D videos
 994         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 995         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 996         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 997         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 998         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 999         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1000         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1001
1002         # Apple HTTP Live Streaming
1003         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1004         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1005         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1006         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1007         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1008         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1009         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1010         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1011
1012         # DASH mp4 video
1013         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1014         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1015         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1016         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1017         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1018         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1019         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1020         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1021         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1022         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1023         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1024         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1025
1026         # Dash mp4 audio
1027         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1028         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1029         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1030         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1031         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1032         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1033         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1034
1035         # Dash webm
1036         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1037         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1038         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1039         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1040         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1041         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1042         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1043         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1044         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1045         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1046         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1047         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1048         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1049         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1050         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1051         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1052         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1053         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1054         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1055         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1056         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1057         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1058
1059         # Dash webm audio
1060         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1061         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1062
1063         # Dash webm audio with opus inside
1064         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1065         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1066         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1067
1068         # RTMP (unnamed)
1069         '_rtmp': {'protocol': 'rtmp'},
1070
1071         # av01 video only formats sometimes served with "unknown" codecs
1072         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1073         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1074         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1075         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1076     }
1077     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1078
1079     _GEO_BYPASS = False
1080
1081     IE_NAME = 'youtube'
1082     _TESTS = [
1083         {
1084             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1085             'info_dict': {
1086                 'id': 'BaW_jenozKc',
1087                 'ext': 'mp4',
1088                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1089                 'uploader': 'Philipp Hagemeister',
1090                 'uploader_id': 'phihag',
1091                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1092                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1093                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1094                 'upload_date': '20121002',
1095                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1096                 'categories': ['Science & Technology'],
1097                 'tags': ['youtube-dl'],
1098                 'duration': 10,
1099                 'view_count': int,
1100                 'like_count': int,
1101                 'dislike_count': int,
1102                 'start_time': 1,
1103                 'end_time': 9,
1104             }
1105         },
1106         {
1107             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1108             'note': 'Embed-only video (#1746)',
1109             'info_dict': {
1110                 'id': 'yZIXLfi8CZQ',
1111                 'ext': 'mp4',
1112                 'upload_date': '20120608',
1113                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1114                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1115                 'uploader': 'SET India',
1116                 'uploader_id': 'setindia',
1117                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1118                 'age_limit': 18,
1119             },
1120             'skip': 'Private video',
1121         },
1122         {
1123             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1124             'note': 'Use the first video ID in the URL',
1125             'info_dict': {
1126                 'id': 'BaW_jenozKc',
1127                 'ext': 'mp4',
1128                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1129                 'uploader': 'Philipp Hagemeister',
1130                 'uploader_id': 'phihag',
1131                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1132                 'upload_date': '20121002',
1133                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1134                 'categories': ['Science & Technology'],
1135                 'tags': ['youtube-dl'],
1136                 'duration': 10,
1137                 'view_count': int,
1138                 'like_count': int,
1139                 'dislike_count': int,
1140             },
1141             'params': {
1142                 'skip_download': True,
1143             },
1144         },
1145         {
1146             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1147             'note': '256k DASH audio (format 141) via DASH manifest',
1148             'info_dict': {
1149                 'id': 'a9LDPn-MO4I',
1150                 'ext': 'm4a',
1151                 'upload_date': '20121002',
1152                 'uploader_id': '8KVIDEO',
1153                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1154                 'description': '',
1155                 'uploader': '8KVIDEO',
1156                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1157             },
1158             'params': {
1159                 'youtube_include_dash_manifest': True,
1160                 'format': '141',
1161             },
1162             'skip': 'format 141 not served anymore',
1163         },
1164         # DASH manifest with encrypted signature
1165         {
1166             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1167             'info_dict': {
1168                 'id': 'IB3lcPjvWLA',
1169                 'ext': 'm4a',
1170                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1171                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1172                 'duration': 244,
1173                 'uploader': 'AfrojackVEVO',
1174                 'uploader_id': 'AfrojackVEVO',
1175                 'upload_date': '20131011',
1176                 'abr': 129.495,
1177             },
1178             'params': {
1179                 'youtube_include_dash_manifest': True,
1180                 'format': '141/bestaudio[ext=m4a]',
1181             },
1182         },
1183         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1184         {
1185             'note': 'Embed allowed age-gate video',
1186             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1187             'info_dict': {
1188                 'id': 'HtVdAasjOgU',
1189                 'ext': 'mp4',
1190                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1191                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1192                 'duration': 142,
1193                 'uploader': 'The Witcher',
1194                 'uploader_id': 'WitcherGame',
1195                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1196                 'upload_date': '20140605',
1197                 'age_limit': 18,
1198             },
1199         },
1200         {
1201             'note': 'Age-gate video with embed allowed in public site',
1202             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1203             'info_dict': {
1204                 'id': 'HsUATh_Nc2U',
1205                 'ext': 'mp4',
1206                 'title': 'Godzilla 2 (Official Video)',
1207                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1208                 'upload_date': '20200408',
1209                 'uploader_id': 'FlyingKitty900',
1210                 'uploader': 'FlyingKitty',
1211                 'age_limit': 18,
1212             },
1213         },
1214         {
1215             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1216             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1217             'info_dict': {
1218                 'id': 'Tq92D6wQ1mg',
1219                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1220                 'ext': 'mp4',
1221                 'upload_date': '20191227',
1222                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1223                 'uploader': 'Projekt Melody',
1224                 'description': 'md5:17eccca93a786d51bc67646756894066',
1225                 'age_limit': 18,
1226             },
1227         },
1228         {
1229             'note': 'Non-Agegated non-embeddable video',
1230             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1231             'info_dict': {
1232                 'id': 'MeJVWBSsPAY',
1233                 'ext': 'mp4',
1234                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1235                 'uploader': 'Herr Lurik',
1236                 'uploader_id': 'st3in234',
1237                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1238                 'upload_date': '20130730',
1239             },
1240         },
1241         {
1242             'note': 'Non-bypassable age-gated video',
1243             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1244             'only_matching': True,
1245         },
1246         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1247         # YouTube Red ad is not captured for creator
1248         {
1249             'url': '__2ABJjxzNo',
1250             'info_dict': {
1251                 'id': '__2ABJjxzNo',
1252                 'ext': 'mp4',
1253                 'duration': 266,
1254                 'upload_date': '20100430',
1255                 'uploader_id': 'deadmau5',
1256                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1257                 'creator': 'deadmau5',
1258                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1259                 'uploader': 'deadmau5',
1260                 'title': 'Deadmau5 - Some Chords (HD)',
1261                 'alt_title': 'Some Chords',
1262             },
1263             'expected_warnings': [
1264                 'DASH manifest missing',
1265             ]
1266         },
1267         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1268         {
1269             'url': 'lqQg6PlCWgI',
1270             'info_dict': {
1271                 'id': 'lqQg6PlCWgI',
1272                 'ext': 'mp4',
1273                 'duration': 6085,
1274                 'upload_date': '20150827',
1275                 'uploader_id': 'olympic',
1276                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1277                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1278                 'uploader': 'Olympics',
1279                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1280             },
1281             'params': {
1282                 'skip_download': 'requires avconv',
1283             }
1284         },
1285         # Non-square pixels
1286         {
1287             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1288             'info_dict': {
1289                 'id': '_b-2C3KPAM0',
1290                 'ext': 'mp4',
1291                 'stretched_ratio': 16 / 9.,
1292                 'duration': 85,
1293                 'upload_date': '20110310',
1294                 'uploader_id': 'AllenMeow',
1295                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1296                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1297                 'uploader': '孫ᄋᄅ',
1298                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1299             },
1300         },
1301         # url_encoded_fmt_stream_map is empty string
1302         {
1303             'url': 'qEJwOuvDf7I',
1304             'info_dict': {
1305                 'id': 'qEJwOuvDf7I',
1306                 'ext': 'webm',
1307                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1308                 'description': '',
1309                 'upload_date': '20150404',
1310                 'uploader_id': 'spbelect',
1311                 'uploader': 'Наблюдатели Петербурга',
1312             },
1313             'params': {
1314                 'skip_download': 'requires avconv',
1315             },
1316             'skip': 'This live event has ended.',
1317         },
1318         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1319         {
1320             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1321             'info_dict': {
1322                 'id': 'FIl7x6_3R5Y',
1323                 'ext': 'webm',
1324                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1325                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1326                 'duration': 220,
1327                 'upload_date': '20150625',
1328                 'uploader_id': 'dorappi2000',
1329                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1330                 'uploader': 'dorappi2000',
1331                 'formats': 'mincount:31',
1332             },
1333             'skip': 'not actual anymore',
1334         },
1335         # DASH manifest with segment_list
1336         {
1337             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1338             'md5': '8ce563a1d667b599d21064e982ab9e31',
1339             'info_dict': {
1340                 'id': 'CsmdDsKjzN8',
1341                 'ext': 'mp4',
1342                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1343                 'uploader': 'Airtek',
1344                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1345                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1346                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1347             },
1348             'params': {
1349                 'youtube_include_dash_manifest': True,
1350                 'format': '135',  # bestvideo
1351             },
1352             'skip': 'This live event has ended.',
1353         },
1354         {
1355             # Multifeed videos (multiple cameras), URL is for Main Camera
1356             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1357             'info_dict': {
1358                 'id': 'jvGDaLqkpTg',
1359                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1360                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1361             },
1362             'playlist': [{
1363                 'info_dict': {
1364                     'id': 'jvGDaLqkpTg',
1365                     'ext': 'mp4',
1366                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1367                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1368                     'duration': 10643,
1369                     'upload_date': '20161111',
1370                     'uploader': 'Team PGP',
1371                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1372                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1373                 },
1374             }, {
1375                 'info_dict': {
1376                     'id': '3AKt1R1aDnw',
1377                     'ext': 'mp4',
1378                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1379                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1380                     'duration': 10991,
1381                     'upload_date': '20161111',
1382                     'uploader': 'Team PGP',
1383                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1384                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1385                 },
1386             }, {
1387                 'info_dict': {
1388                     'id': 'RtAMM00gpVc',
1389                     'ext': 'mp4',
1390                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1391                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1392                     'duration': 10995,
1393                     'upload_date': '20161111',
1394                     'uploader': 'Team PGP',
1395                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1396                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1397                 },
1398             }, {
1399                 'info_dict': {
1400                     'id': '6N2fdlP3C5U',
1401                     'ext': 'mp4',
1402                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1403                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1404                     'duration': 10990,
1405                     'upload_date': '20161111',
1406                     'uploader': 'Team PGP',
1407                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1408                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1409                 },
1410             }],
1411             'params': {
1412                 'skip_download': True,
1413             },
1414             'skip': 'Not multifeed anymore',
1415         },
1416         {
1417             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1418             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1419             'info_dict': {
1420                 'id': 'gVfLd0zydlo',
1421                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1422             },
1423             'playlist_count': 2,
1424             'skip': 'Not multifeed anymore',
1425         },
1426         {
1427             'url': 'https://vid.plus/FlRa-iH7PGw',
1428             'only_matching': True,
1429         },
1430         {
1431             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1432             'only_matching': True,
1433         },
1434         {
1435             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1436             # Also tests cut-off URL expansion in video description (see
1437             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1438             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1439             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1440             'info_dict': {
1441                 'id': 'lsguqyKfVQg',
1442                 'ext': 'mp4',
1443                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1444                 'alt_title': 'Dark Walk',
1445                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1446                 'duration': 133,
1447                 'upload_date': '20151119',
1448                 'uploader_id': 'IronSoulElf',
1449                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1450                 'uploader': 'IronSoulElf',
1451                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1452                 'track': 'Dark Walk',
1453                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1454                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1455             },
1456             'params': {
1457                 'skip_download': True,
1458             },
1459         },
1460         {
1461             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1462             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1463             'only_matching': True,
1464         },
1465         {
1466             # Video with yt:stretch=17:0
1467             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1468             'info_dict': {
1469                 'id': 'Q39EVAstoRM',
1470                 'ext': 'mp4',
1471                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1472                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1473                 'upload_date': '20151107',
1474                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1475                 'uploader': 'CH GAMER DROID',
1476             },
1477             'params': {
1478                 'skip_download': True,
1479             },
1480             'skip': 'This video does not exist.',
1481         },
1482         {
1483             # Video with incomplete 'yt:stretch=16:'
1484             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1485             'only_matching': True,
1486         },
1487         {
1488             # Video licensed under Creative Commons
1489             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1490             'info_dict': {
1491                 'id': 'M4gD1WSo5mA',
1492                 'ext': 'mp4',
1493                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1494                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1495                 'duration': 721,
1496                 'upload_date': '20150127',
1497                 'uploader_id': 'BerkmanCenter',
1498                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1499                 'uploader': 'The Berkman Klein Center for Internet & Society',
1500                 'license': 'Creative Commons Attribution license (reuse allowed)',
1501             },
1502             'params': {
1503                 'skip_download': True,
1504             },
1505         },
1506         {
1507             # Channel-like uploader_url
1508             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1509             'info_dict': {
1510                 'id': 'eQcmzGIKrzg',
1511                 'ext': 'mp4',
1512                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1513                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1514                 'duration': 4060,
1515                 'upload_date': '20151119',
1516                 'uploader': 'Bernie Sanders',
1517                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1518                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1519                 'license': 'Creative Commons Attribution license (reuse allowed)',
1520             },
1521             'params': {
1522                 'skip_download': True,
1523             },
1524         },
1525         {
1526             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1527             'only_matching': True,
1528         },
1529         {
1530             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1531             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1532             'only_matching': True,
1533         },
1534         {
1535             # Rental video preview
1536             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1537             'info_dict': {
1538                 'id': 'uGpuVWrhIzE',
1539                 'ext': 'mp4',
1540                 'title': 'Piku - Trailer',
1541                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1542                 'upload_date': '20150811',
1543                 'uploader': 'FlixMatrix',
1544                 'uploader_id': 'FlixMatrixKaravan',
1545                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1546                 'license': 'Standard YouTube License',
1547             },
1548             'params': {
1549                 'skip_download': True,
1550             },
1551             'skip': 'This video is not available.',
1552         },
1553         {
1554             # YouTube Red video with episode data
1555             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1556             'info_dict': {
1557                 'id': 'iqKdEhx-dD4',
1558                 'ext': 'mp4',
1559                 'title': 'Isolation - Mind Field (Ep 1)',
1560                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1561                 'duration': 2085,
1562                 'upload_date': '20170118',
1563                 'uploader': 'Vsauce',
1564                 'uploader_id': 'Vsauce',
1565                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1566                 'series': 'Mind Field',
1567                 'season_number': 1,
1568                 'episode_number': 1,
1569             },
1570             'params': {
1571                 'skip_download': True,
1572             },
1573             'expected_warnings': [
1574                 'Skipping DASH manifest',
1575             ],
1576         },
1577         {
1578             # The following content has been identified by the YouTube community
1579             # as inappropriate or offensive to some audiences.
1580             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1581             'info_dict': {
1582                 'id': '6SJNVb0GnPI',
1583                 'ext': 'mp4',
1584                 'title': 'Race Differences in Intelligence',
1585                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1586                 'duration': 965,
1587                 'upload_date': '20140124',
1588                 'uploader': 'New Century Foundation',
1589                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1590                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1591             },
1592             'params': {
1593                 'skip_download': True,
1594             },
1595             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1596         },
1597         {
1598             # itag 212
1599             'url': '1t24XAntNCY',
1600             'only_matching': True,
1601         },
1602         {
1603             # geo restricted to JP
1604             'url': 'sJL6WA-aGkQ',
1605             'only_matching': True,
1606         },
1607         {
1608             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1609             'only_matching': True,
1610         },
1611         {
1612             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1613             'only_matching': True,
1614         },
1615         {
1616             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1617             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1618             'only_matching': True,
1619         },
1620         {
1621             # DRM protected
1622             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1623             'only_matching': True,
1624         },
1625         {
1626             # Video with unsupported adaptive stream type formats
1627             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1628             'info_dict': {
1629                 'id': 'Z4Vy8R84T1U',
1630                 'ext': 'mp4',
1631                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1632                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1633                 'duration': 433,
1634                 'upload_date': '20130923',
1635                 'uploader': 'Amelia Putri Harwita',
1636                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1637                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1638                 'formats': 'maxcount:10',
1639             },
1640             'params': {
1641                 'skip_download': True,
1642                 'youtube_include_dash_manifest': False,
1643             },
1644             'skip': 'not actual anymore',
1645         },
1646         {
1647             # Youtube Music Auto-generated description
1648             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1649             'info_dict': {
1650                 'id': 'MgNrAu2pzNs',
1651                 'ext': 'mp4',
1652                 'title': 'Voyeur Girl',
1653                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1654                 'upload_date': '20190312',
1655                 'uploader': 'Stephen - Topic',
1656                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1657                 'artist': 'Stephen',
1658                 'track': 'Voyeur Girl',
1659                 'album': 'it\'s too much love to know my dear',
1660                 'release_date': '20190313',
1661                 'release_year': 2019,
1662             },
1663             'params': {
1664                 'skip_download': True,
1665             },
1666         },
1667         {
1668             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1669             'only_matching': True,
1670         },
1671         {
1672             # invalid -> valid video id redirection
1673             'url': 'DJztXj2GPfl',
1674             'info_dict': {
1675                 'id': 'DJztXj2GPfk',
1676                 'ext': 'mp4',
1677                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1678                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1679                 'upload_date': '20090125',
1680                 'uploader': 'Prochorowka',
1681                 'uploader_id': 'Prochorowka',
1682                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1683                 'artist': 'Panjabi MC',
1684                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1685                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1686             },
1687             'params': {
1688                 'skip_download': True,
1689             },
1690             'skip': 'Video unavailable',
1691         },
1692         {
1693             # empty description results in an empty string
1694             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1695             'info_dict': {
1696                 'id': 'x41yOUIvK2k',
1697                 'ext': 'mp4',
1698                 'title': 'IMG 3456',
1699                 'description': '',
1700                 'upload_date': '20170613',
1701                 'uploader_id': 'ElevageOrVert',
1702                 'uploader': 'ElevageOrVert',
1703             },
1704             'params': {
1705                 'skip_download': True,
1706             },
1707         },
1708         {
1709             # with '};' inside yt initial data (see [1])
1710             # see [2] for an example with '};' inside ytInitialPlayerResponse
1711             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1712             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1713             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1714             'info_dict': {
1715                 'id': 'CHqg6qOn4no',
1716                 'ext': 'mp4',
1717                 'title': 'Part 77   Sort a list of simple types in c#',
1718                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1719                 'upload_date': '20130831',
1720                 'uploader_id': 'kudvenkat',
1721                 'uploader': 'kudvenkat',
1722             },
1723             'params': {
1724                 'skip_download': True,
1725             },
1726         },
1727         {
1728             # another example of '};' in ytInitialData
1729             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1730             'only_matching': True,
1731         },
1732         {
1733             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1734             'only_matching': True,
1735         },
1736         {
1737             # https://github.com/ytdl-org/youtube-dl/pull/28094
1738             'url': 'OtqTfy26tG0',
1739             'info_dict': {
1740                 'id': 'OtqTfy26tG0',
1741                 'ext': 'mp4',
1742                 'title': 'Burn Out',
1743                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1744                 'upload_date': '20141120',
1745                 'uploader': 'The Cinematic Orchestra - Topic',
1746                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1747                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1748                 'artist': 'The Cinematic Orchestra',
1749                 'track': 'Burn Out',
1750                 'album': 'Every Day',
1751                 'release_data': None,
1752                 'release_year': None,
1753             },
1754             'params': {
1755                 'skip_download': True,
1756             },
1757         },
1758         {
1759             # controversial video, only works with bpctr when authenticated with cookies
1760             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1761             'only_matching': True,
1762         },
1763         {
1764             # controversial video, requires bpctr/contentCheckOk
1765             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1766             'info_dict': {
1767                 'id': 'SZJvDhaSDnc',
1768                 'ext': 'mp4',
1769                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1770                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1771                 'uploader': 'CBS This Morning',
1772                 'uploader_id': 'CBSThisMorning',
1773                 'upload_date': '20140716',
1774                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1775             }
1776         },
1777         {
1778             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1779             'url': 'cBvYw8_A0vQ',
1780             'info_dict': {
1781                 'id': 'cBvYw8_A0vQ',
1782                 'ext': 'mp4',
1783                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1784                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1785                 'upload_date': '20201120',
1786                 'uploader': 'Walk around Japan',
1787                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1788                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1789             },
1790             'params': {
1791                 'skip_download': True,
1792             },
1793         }, {
1794             # Has multiple audio streams
1795             'url': 'WaOKSUlf4TM',
1796             'only_matching': True
1797         }, {
1798             # Requires Premium: has format 141 when requested using YTM url
1799             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1800             'only_matching': True
1801         }, {
1802             # multiple subtitles with same lang_code
1803             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1804             'only_matching': True,
1805         }, {
1806             # Force use android client fallback
1807             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1808             'info_dict': {
1809                 'id': 'YOelRv7fMxY',
1810                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1811                 'ext': '3gp',
1812                 'upload_date': '20210624',
1813                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1814                 'uploader': 'colinfurze',
1815                 'uploader_id': 'colinfurze',
1816                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1817                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1818             },
1819             'params': {
1820                 'format': '17',  # 3gp format available on android
1821                 'extractor_args': {'youtube': {'player_client': ['android']}},
1822             },
1823         },
1824         {
1825             # Skip download of additional client configs (remix client config in this case)
1826             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1827             'only_matching': True,
1828             'params': {
1829                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1830             },
1831         }, {
1832             # shorts
1833             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1834             'only_matching': True,
1835         },
1836     ]
1837
1838     @classmethod
1839     def suitable(cls, url):
1840         # Hack for lazy extractors until more generic solution is implemented
1841         # (see #28780)
1842         from ..utils import parse_qs
1843
1844         qs = parse_qs(url)
1845         if qs.get('list', [None])[0]:
1846             return False
1847         return super(YoutubeIE, cls).suitable(url)
1848
1849     def __init__(self, *args, **kwargs):
1850         super(YoutubeIE, self).__init__(*args, **kwargs)
1851         self._code_cache = {}
1852         self._player_cache = {}
1853
1854     def _extract_player_url(self, ytcfg=None, webpage=None):
1855         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1856         if not player_url and webpage:
1857             player_url = self._search_regex(
1858                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1859                 webpage, 'player URL', fatal=False)
1860         if not player_url:
1861             return None
1862         if player_url.startswith('//'):
1863             player_url = 'https:' + player_url
1864         elif not re.match(r'https?://', player_url):
1865             player_url = compat_urlparse.urljoin(
1866                 'https://www.youtube.com', player_url)
1867         return player_url
1868
1869     def _signature_cache_id(self, example_sig):
1870         """ Return a string representation of a signature """
1871         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1872
1873     @classmethod
1874     def _extract_player_info(cls, player_url):
1875         for player_re in cls._PLAYER_INFO_RE:
1876             id_m = re.search(player_re, player_url)
1877             if id_m:
1878                 break
1879         else:
1880             raise ExtractorError('Cannot identify player %r' % player_url)
1881         return id_m.group('id')
1882
1883     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1884         player_id = self._extract_player_info(player_url)
1885         if player_id not in self._code_cache:
1886             self._code_cache[player_id] = self._download_webpage(
1887                 player_url, video_id, fatal=fatal,
1888                 note='Downloading player ' + player_id,
1889                 errnote='Download of %s failed' % player_url)
1890         return player_id in self._code_cache
1891
1892     def _extract_signature_function(self, video_id, player_url, example_sig):
1893         player_id = self._extract_player_info(player_url)
1894
1895         # Read from filesystem cache
1896         func_id = 'js_%s_%s' % (
1897             player_id, self._signature_cache_id(example_sig))
1898         assert os.path.basename(func_id) == func_id
1899
1900         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1901         if cache_spec is not None:
1902             return lambda s: ''.join(s[i] for i in cache_spec)
1903
1904         if self._load_player(video_id, player_url):
1905             code = self._code_cache[player_id]
1906             res = self._parse_sig_js(code)
1907
1908             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1909             cache_res = res(test_string)
1910             cache_spec = [ord(c) for c in cache_res]
1911
1912             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1913             return res
1914
1915     def _print_sig_code(self, func, example_sig):
1916         def gen_sig_code(idxs):
1917             def _genslice(start, end, step):
1918                 starts = '' if start == 0 else str(start)
1919                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1920                 steps = '' if step == 1 else (':%d' % step)
1921                 return 's[%s%s%s]' % (starts, ends, steps)
1922
1923             step = None
1924             # Quelch pyflakes warnings - start will be set when step is set
1925             start = '(Never used)'
1926             for i, prev in zip(idxs[1:], idxs[:-1]):
1927                 if step is not None:
1928                     if i - prev == step:
1929                         continue
1930                     yield _genslice(start, prev, step)
1931                     step = None
1932                     continue
1933                 if i - prev in [-1, 1]:
1934                     step = i - prev
1935                     start = prev
1936                     continue
1937                 else:
1938                     yield 's[%d]' % prev
1939             if step is None:
1940                 yield 's[%d]' % i
1941             else:
1942                 yield _genslice(start, i, step)
1943
1944         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1945         cache_res = func(test_string)
1946         cache_spec = [ord(c) for c in cache_res]
1947         expr_code = ' + '.join(gen_sig_code(cache_spec))
1948         signature_id_tuple = '(%s)' % (
1949             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1950         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1951                 '    return %s\n') % (signature_id_tuple, expr_code)
1952         self.to_screen('Extracted signature function:\n' + code)
1953
1954     def _parse_sig_js(self, jscode):
1955         funcname = self._search_regex(
1956             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1957              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1958              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1959              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1960              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1961              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1962              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1963              # Obsolete patterns
1964              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1965              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1966              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1967              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1968              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1969              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1970              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1971              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1972             jscode, 'Initial JS player signature function name', group='sig')
1973
1974         jsi = JSInterpreter(jscode)
1975         initial_function = jsi.extract_function(funcname)
1976         return lambda s: initial_function([s])
1977
1978     def _decrypt_signature(self, s, video_id, player_url):
1979         """Turn the encrypted s field into a working signature"""
1980
1981         if player_url is None:
1982             raise ExtractorError('Cannot decrypt signature without player_url')
1983
1984         try:
1985             player_id = (player_url, self._signature_cache_id(s))
1986             if player_id not in self._player_cache:
1987                 func = self._extract_signature_function(
1988                     video_id, player_url, s
1989                 )
1990                 self._player_cache[player_id] = func
1991             func = self._player_cache[player_id]
1992             if self.get_param('youtube_print_sig_code'):
1993                 self._print_sig_code(func, s)
1994             return func(s)
1995         except Exception as e:
1996             tb = traceback.format_exc()
1997             raise ExtractorError(
1998                 'Signature extraction failed: ' + tb, cause=e)
1999
2000     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2001         """
2002         Extract signatureTimestamp (sts)
2003         Required to tell API what sig/player version is in use.
2004         """
2005         sts = None
2006         if isinstance(ytcfg, dict):
2007             sts = int_or_none(ytcfg.get('STS'))
2008
2009         if not sts:
2010             # Attempt to extract from player
2011             if player_url is None:
2012                 error_msg = 'Cannot extract signature timestamp without player_url.'
2013                 if fatal:
2014                     raise ExtractorError(error_msg)
2015                 self.report_warning(error_msg)
2016                 return
2017             if self._load_player(video_id, player_url, fatal=fatal):
2018                 player_id = self._extract_player_info(player_url)
2019                 code = self._code_cache[player_id]
2020                 sts = int_or_none(self._search_regex(
2021                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2022                     'JS player signature timestamp', group='sts', fatal=fatal))
2023         return sts
2024
2025     def _mark_watched(self, video_id, player_responses):
2026         playback_url = traverse_obj(
2027             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2028             expected_type=url_or_none, get_all=False)
2029         if not playback_url:
2030             self.report_warning('Unable to mark watched')
2031             return
2032         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2033         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2034
2035         # cpn generation algorithm is reverse engineered from base.js.
2036         # In fact it works even with dummy cpn.
2037         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2038         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2039
2040         qs.update({
2041             'ver': ['2'],
2042             'cpn': [cpn],
2043         })
2044         playback_url = compat_urlparse.urlunparse(
2045             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2046
2047         self._download_webpage(
2048             playback_url, video_id, 'Marking watched',
2049             'Unable to mark watched', fatal=False)
2050
2051     @staticmethod
2052     def _extract_urls(webpage):
2053         # Embedded YouTube player
2054         entries = [
2055             unescapeHTML(mobj.group('url'))
2056             for mobj in re.finditer(r'''(?x)
2057             (?:
2058                 <iframe[^>]+?src=|
2059                 data-video-url=|
2060                 <embed[^>]+?src=|
2061                 embedSWF\(?:\s*|
2062                 <object[^>]+data=|
2063                 new\s+SWFObject\(
2064             )
2065             (["\'])
2066                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2067                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2068             \1''', webpage)]
2069
2070         # lazyYT YouTube embed
2071         entries.extend(list(map(
2072             unescapeHTML,
2073             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2074
2075         # Wordpress "YouTube Video Importer" plugin
2076         matches = re.findall(r'''(?x)<div[^>]+
2077             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2078             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2079         entries.extend(m[-1] for m in matches)
2080
2081         return entries
2082
2083     @staticmethod
2084     def _extract_url(webpage):
2085         urls = YoutubeIE._extract_urls(webpage)
2086         return urls[0] if urls else None
2087
2088     @classmethod
2089     def extract_id(cls, url):
2090         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2091         if mobj is None:
2092             raise ExtractorError('Invalid URL: %s' % url)
2093         return mobj.group('id')
2094
2095     def _extract_chapters_from_json(self, data, duration):
2096         chapter_list = traverse_obj(
2097             data, (
2098                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2099                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2100             ), expected_type=list)
2101
2102         return self._extract_chapters(
2103             chapter_list,
2104             chapter_time=lambda chapter: float_or_none(
2105                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2106             chapter_title=lambda chapter: traverse_obj(
2107                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2108             duration=duration)
2109
2110     def _extract_chapters_from_engagement_panel(self, data, duration):
2111         content_list = traverse_obj(
2112             data,
2113             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2114             expected_type=list, default=[])
2115         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2116         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2117
2118         return next((
2119             filter(None, (
2120                 self._extract_chapters(
2121                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2122                     chapter_time, chapter_title, duration)
2123                 for contents in content_list
2124             ))), [])
2125
2126     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2127         chapters = []
2128         last_chapter = {'start_time': 0}
2129         for idx, chapter in enumerate(chapter_list or []):
2130             title = chapter_title(chapter)
2131             start_time = chapter_time(chapter)
2132             if start_time is None:
2133                 continue
2134             last_chapter['end_time'] = start_time
2135             if start_time < last_chapter['start_time']:
2136                 if idx == 1:
2137                     chapters.pop()
2138                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2139                 else:
2140                     self.report_warning(f'Invalid start time for chapter "{title}"')
2141                     continue
2142             last_chapter = {'start_time': start_time, 'title': title}
2143             chapters.append(last_chapter)
2144         last_chapter['end_time'] = duration
2145         return chapters
2146
2147     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2148         return self._parse_json(self._search_regex(
2149             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2150              regex), webpage, name, default='{}'), video_id, fatal=False)
2151
2152     @staticmethod
2153     def parse_time_text(time_text):
2154         """
2155         Parse the comment time text
2156         time_text is in the format 'X units ago (edited)'
2157         """
2158         time_text_split = time_text.split(' ')
2159         if len(time_text_split) >= 3:
2160             try:
2161                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2162             except ValueError:
2163                 return None
2164
2165     def _extract_comment(self, comment_renderer, parent=None):
2166         comment_id = comment_renderer.get('commentId')
2167         if not comment_id:
2168             return
2169
2170         text = self._get_text(comment_renderer, 'contentText')
2171
2172         # note: timestamp is an estimate calculated from the current time and time_text
2173         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2174         time_text_dt = self.parse_time_text(time_text)
2175         if isinstance(time_text_dt, datetime.datetime):
2176             timestamp = calendar.timegm(time_text_dt.timetuple())
2177         author = self._get_text(comment_renderer, 'authorText')
2178         author_id = try_get(comment_renderer,
2179                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2180
2181         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2182                                                        lambda x: x['likeCount']), compat_str)) or 0
2183         author_thumbnail = try_get(comment_renderer,
2184                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2185
2186         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2187         is_favorited = 'creatorHeart' in (try_get(
2188             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2189         return {
2190             'id': comment_id,
2191             'text': text,
2192             'timestamp': timestamp,
2193             'time_text': time_text,
2194             'like_count': votes,
2195             'is_favorited': is_favorited,
2196             'author': author,
2197             'author_id': author_id,
2198             'author_thumbnail': author_thumbnail,
2199             'author_is_uploader': author_is_uploader,
2200             'parent': parent or 'root'
2201         }
2202
2203     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2204                          ytcfg, video_id, parent=None, comment_counts=None):
2205
2206         def extract_header(contents):
2207             _total_comments = 0
2208             _continuation = None
2209             for content in contents:
2210                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2211                 expected_comment_count = parse_count(self._get_text(
2212                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2213
2214                 if expected_comment_count:
2215                     comment_counts[1] = expected_comment_count
2216                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2217                     _total_comments = comment_counts[1]
2218                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2219                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2220
2221                 sort_menu_item = try_get(
2222                     comments_header_renderer,
2223                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2224                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2225
2226                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2227                 if not _continuation:
2228                     continue
2229
2230                 sort_text = sort_menu_item.get('title')
2231                 if isinstance(sort_text, compat_str):
2232                     sort_text = sort_text.lower()
2233                 else:
2234                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2235                 self.to_screen('Sorting comments by %s' % sort_text)
2236                 break
2237             return _total_comments, _continuation
2238
2239         def extract_thread(contents):
2240             if not parent:
2241                 comment_counts[2] = 0
2242             for content in contents:
2243                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2244                 comment_renderer = try_get(
2245                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2246                     content, (lambda x: x['commentRenderer'], dict))
2247
2248                 if not comment_renderer:
2249                     continue
2250                 comment = self._extract_comment(comment_renderer, parent)
2251                 if not comment:
2252                     continue
2253                 comment_counts[0] += 1
2254                 yield comment
2255                 # Attempt to get the replies
2256                 comment_replies_renderer = try_get(
2257                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2258
2259                 if comment_replies_renderer:
2260                     comment_counts[2] += 1
2261                     comment_entries_iter = self._comment_entries(
2262                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2263                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2264
2265                     for reply_comment in comment_entries_iter:
2266                         yield reply_comment
2267
2268         # YouTube comments have a max depth of 2
2269         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2270         if max_depth == 1 and parent:
2271             return
2272         if not comment_counts:
2273             # comment so far, est. total comments, current comment thread #
2274             comment_counts = [0, 0, 0]
2275
2276         continuation = self._extract_continuation(root_continuation_data)
2277         if continuation and len(continuation['continuation']) < 27:
2278             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2279             continuation_token = self._generate_comment_continuation(video_id)
2280             continuation = self._build_api_continuation_query(continuation_token, None)
2281
2282         visitor_data = None
2283         is_first_continuation = parent is None
2284
2285         for page_num in itertools.count(0):
2286             if not continuation:
2287                 break
2288             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2289             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2290             if page_num == 0:
2291                 if is_first_continuation:
2292                     note_prefix = 'Downloading comment section API JSON'
2293                 else:
2294                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2295                         comment_counts[2], comment_prog_str)
2296             else:
2297                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2298                     '       ' if parent else '', ' replies' if parent else '',
2299                     page_num, comment_prog_str)
2300
2301             response = self._extract_response(
2302                 item_id=None, query=continuation,
2303                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2304                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2305             if not response:
2306                 break
2307             visitor_data = try_get(
2308                 response,
2309                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2310                 compat_str) or visitor_data
2311
2312             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2313
2314             continuation = None
2315             if isinstance(continuation_contents, list):
2316                 for continuation_section in continuation_contents:
2317                     if not isinstance(continuation_section, dict):
2318                         continue
2319                     continuation_items = try_get(
2320                         continuation_section,
2321                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2322                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2323                         list) or []
2324                     if is_first_continuation:
2325                         total_comments, continuation = extract_header(continuation_items)
2326                         if total_comments:
2327                             yield total_comments
2328                         is_first_continuation = False
2329                         if continuation:
2330                             break
2331                         continue
2332                     count = 0
2333                     for count, entry in enumerate(extract_thread(continuation_items)):
2334                         yield entry
2335                     continuation = self._extract_continuation({'contents': continuation_items})
2336                     if continuation:
2337                         # Sometimes YouTube provides a continuation without any comments
2338                         # In most cases we end up just downloading these with very little comments to come.
2339                         if count == 0:
2340                             if not parent:
2341                                 self.report_warning('No comments received - assuming end of comments')
2342                             continuation = None
2343                         break
2344
2345             # Deprecated response structure
2346             elif isinstance(continuation_contents, dict):
2347                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2348                 for key, continuation_renderer in continuation_contents.items():
2349                     if key not in known_continuation_renderers:
2350                         continue
2351                     if not isinstance(continuation_renderer, dict):
2352                         continue
2353                     if is_first_continuation:
2354                         header_continuation_items = [continuation_renderer.get('header') or {}]
2355                         total_comments, continuation = extract_header(header_continuation_items)
2356                         if total_comments:
2357                             yield total_comments
2358                         is_first_continuation = False
2359                         if continuation:
2360                             break
2361
2362                     # Sometimes YouTube provides a continuation without any comments
2363                     # In most cases we end up just downloading these with very little comments to come.
2364                     count = 0
2365                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2366                         yield entry
2367                     continuation = self._extract_continuation(continuation_renderer)
2368                     if count == 0:
2369                         if not parent:
2370                             self.report_warning('No comments received - assuming end of comments')
2371                         continuation = None
2372                     break
2373
2374     @staticmethod
2375     def _generate_comment_continuation(video_id):
2376         """
2377         Generates initial comment section continuation token from given video id
2378         """
2379         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2380         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2381         new_continuation_intlist = list(itertools.chain.from_iterable(
2382             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2383         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2384
2385     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2386         """Entry for comment extraction"""
2387         def _real_comment_extract(contents):
2388             if isinstance(contents, list):
2389                 for entry in contents:
2390                     for key, renderer in entry.items():
2391                         if key not in known_entry_comment_renderers:
2392                             continue
2393                         yield from self._comment_entries(
2394                             renderer, video_id=video_id, ytcfg=ytcfg,
2395                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2396                             account_syncid=self._extract_account_syncid(ytcfg))
2397                         break
2398         comments = []
2399         known_entry_comment_renderers = ('itemSectionRenderer',)
2400         estimated_total = 0
2401         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2402         # Force English regardless of account setting to prevent parsing issues
2403         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2404         ytcfg = copy.deepcopy(ytcfg)
2405         traverse_obj(
2406             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2407         try:
2408             for comment in _real_comment_extract(contents):
2409                 if len(comments) >= max_comments:
2410                     break
2411                 if isinstance(comment, int):
2412                     estimated_total = comment
2413                     continue
2414                 comments.append(comment)
2415         except KeyboardInterrupt:
2416             self.to_screen('Interrupted by user')
2417         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2418         return {
2419             'comments': comments,
2420             'comment_count': len(comments),
2421         }
2422
2423     @staticmethod
2424     def _generate_player_context(sts=None):
2425         context = {
2426             'html5Preference': 'HTML5_PREF_WANTS',
2427         }
2428         if sts is not None:
2429             context['signatureTimestamp'] = sts
2430         return {
2431             'playbackContext': {
2432                 'contentPlaybackContext': context
2433             },
2434             'contentCheckOk': True,
2435             'racyCheckOk': True
2436         }
2437
2438     @staticmethod
2439     def _is_agegated(player_response):
2440         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2441             return True
2442
2443         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2444         AGE_GATE_REASONS = (
2445             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2446             'age_verification_required', 'age_check_required',  # status
2447         )
2448         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2449
2450     @staticmethod
2451     def _is_unplayable(player_response):
2452         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2453
2454     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2455
2456         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2457         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2458         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2459         headers = self.generate_api_headers(
2460             player_ytcfg, identity_token, syncid,
2461             default_client=client, session_index=session_index)
2462
2463         yt_query = {'videoId': video_id}
2464         yt_query.update(self._generate_player_context(sts))
2465         return self._extract_response(
2466             item_id=video_id, ep='player', query=yt_query,
2467             ytcfg=player_ytcfg, headers=headers, fatal=True,
2468             default_client=client,
2469             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2470         ) or None
2471
2472     def _get_requested_clients(self, url, smuggled_data):
2473         requested_clients = []
2474         allowed_clients = sorted(
2475             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2476             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2477         for client in self._configuration_arg('player_client'):
2478             if client in allowed_clients:
2479                 requested_clients.append(client)
2480             elif client == 'all':
2481                 requested_clients.extend(allowed_clients)
2482             else:
2483                 self.report_warning(f'Skipping unsupported client {client}')
2484         if not requested_clients:
2485             requested_clients = ['android', 'web']
2486
2487         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2488             requested_clients.extend(
2489                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2490
2491         return orderedSet(requested_clients)
2492
2493     def _extract_player_ytcfg(self, client, video_id):
2494         url = {
2495             'web_music': 'https://music.youtube.com',
2496             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2497         }.get(client)
2498         if not url:
2499             return {}
2500         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2501         return self.extract_ytcfg(video_id, webpage) or {}
2502
2503     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2504         initial_pr = None
2505         if webpage:
2506             initial_pr = self._extract_yt_initial_variable(
2507                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2508                 video_id, 'initial player response')
2509
2510         original_clients = clients
2511         clients = clients[::-1]
2512
2513         def append_client(client_name):
2514             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2515                 clients.append(client_name)
2516
2517         # Android player_response does not have microFormats which are needed for
2518         # extraction of some data. So we return the initial_pr with formats
2519         # stripped out even if not requested by the user
2520         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2521         yielded_pr = False
2522         if initial_pr:
2523             pr = dict(initial_pr)
2524             pr['streamingData'] = None
2525             yielded_pr = True
2526             yield pr
2527
2528         last_error = None
2529         while clients:
2530             client = clients.pop()
2531             player_ytcfg = master_ytcfg if client == 'web' else {}
2532             if 'configs' not in self._configuration_arg('player_skip'):
2533                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2534
2535             try:
2536                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2537                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2538             except ExtractorError as e:
2539                 if last_error:
2540                     self.report_warning(last_error)
2541                 last_error = e
2542                 continue
2543
2544             if pr:
2545                 yielded_pr = True
2546                 yield pr
2547
2548             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2549             if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
2550                 append_client(client.replace('_agegate', '_creator'))
2551             elif self._is_agegated(pr):
2552                 append_client(f'{client}_agegate')
2553
2554         if last_error:
2555             if not yielded_pr:
2556                 raise last_error
2557             self.report_warning(last_error)
2558
2559     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2560         itags, stream_ids = [], []
2561         itag_qualities, res_qualities = {}, {}
2562         q = qualities([
2563             # Normally tiny is the smallest video-only formats. But
2564             # audio-only formats with unknown quality may get tagged as tiny
2565             'tiny',
2566             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2567             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2568         ])
2569         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2570
2571         for fmt in streaming_formats:
2572             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2573                 continue
2574
2575             itag = str_or_none(fmt.get('itag'))
2576             audio_track = fmt.get('audioTrack') or {}
2577             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2578             if stream_id in stream_ids:
2579                 continue
2580
2581             quality = fmt.get('quality')
2582             height = int_or_none(fmt.get('height'))
2583             if quality == 'tiny' or not quality:
2584                 quality = fmt.get('audioQuality', '').lower() or quality
2585             # The 3gp format (17) in android client has a quality of "small",
2586             # but is actually worse than other formats
2587             if itag == '17':
2588                 quality = 'tiny'
2589             if quality:
2590                 if itag:
2591                     itag_qualities[itag] = quality
2592                 if height:
2593                     res_qualities[height] = quality
2594             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2595             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2596             # number of fragment that would subsequently requested with (`&sq=N`)
2597             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2598                 continue
2599
2600             fmt_url = fmt.get('url')
2601             if not fmt_url:
2602                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2603                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2604                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2605                 if not (sc and fmt_url and encrypted_sig):
2606                     continue
2607                 if not player_url:
2608                     continue
2609                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2610                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2611                 fmt_url += '&' + sp + '=' + signature
2612
2613             if itag:
2614                 itags.append(itag)
2615                 stream_ids.append(stream_id)
2616
2617             tbr = float_or_none(
2618                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2619             dct = {
2620                 'asr': int_or_none(fmt.get('audioSampleRate')),
2621                 'filesize': int_or_none(fmt.get('contentLength')),
2622                 'format_id': itag,
2623                 'format_note': ', '.join(filter(None, (
2624                     audio_track.get('displayName'),
2625                     fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2626                 'fps': int_or_none(fmt.get('fps')),
2627                 'height': height,
2628                 'quality': q(quality),
2629                 'tbr': tbr,
2630                 'url': fmt_url,
2631                 'width': int_or_none(fmt.get('width')),
2632                 'language': audio_track.get('id', '').split('.')[0],
2633             }
2634             mime_mobj = re.match(
2635                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2636             if mime_mobj:
2637                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2638                 dct.update(parse_codecs(mime_mobj.group(2)))
2639             no_audio = dct.get('acodec') == 'none'
2640             no_video = dct.get('vcodec') == 'none'
2641             if no_audio:
2642                 dct['vbr'] = tbr
2643             if no_video:
2644                 dct['abr'] = tbr
2645             if no_audio or no_video:
2646                 dct['downloader_options'] = {
2647                     # Youtube throttles chunks >~10M
2648                     'http_chunk_size': 10485760,
2649                 }
2650                 if dct.get('ext'):
2651                     dct['container'] = dct['ext'] + '_dash'
2652             yield dct
2653
2654         skip_manifests = self._configuration_arg('skip')
2655         get_dash = (
2656             (not is_live or self._configuration_arg('include_live_dash'))
2657             and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2658         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2659
2660         def guess_quality(f):
2661             for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2662                 if val in qdict:
2663                     return q(qdict[val])
2664             return -1
2665
2666         for sd in streaming_data:
2667             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2668             if hls_manifest_url:
2669                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2670                     itag = self._search_regex(
2671                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2672                     if itag in itags:
2673                         continue
2674                     if itag:
2675                         f['format_id'] = itag
2676                         itags.append(itag)
2677                     f['quality'] = guess_quality(f)
2678                     yield f
2679
2680             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2681             if dash_manifest_url:
2682                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2683                     itag = f['format_id']
2684                     if itag in itags:
2685                         continue
2686                     if itag:
2687                         itags.append(itag)
2688                     f['quality'] = guess_quality(f)
2689                     filesize = int_or_none(self._search_regex(
2690                         r'/clen/(\d+)', f.get('fragment_base_url')
2691                         or f['url'], 'file size', default=None))
2692                     if filesize:
2693                         f['filesize'] = filesize
2694                     yield f
2695
2696     def _real_extract(self, url):
2697         url, smuggled_data = unsmuggle_url(url, {})
2698         video_id = self._match_id(url)
2699
2700         base_url = self.http_scheme() + '//www.youtube.com/'
2701         webpage_url = base_url + 'watch?v=' + video_id
2702         webpage = self._download_webpage(
2703             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2704
2705         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2706         player_url = self._extract_player_url(master_ytcfg, webpage)
2707         identity_token = self._extract_identity_token(webpage, video_id)
2708
2709         player_responses = list(self._extract_player_responses(
2710             self._get_requested_clients(url, smuggled_data),
2711             video_id, webpage, master_ytcfg, player_url, identity_token))
2712
2713         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2714
2715         playability_statuses = traverse_obj(
2716             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2717
2718         trailer_video_id = get_first(
2719             playability_statuses,
2720             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2721             expected_type=str)
2722         if trailer_video_id:
2723             return self.url_result(
2724                 trailer_video_id, self.ie_key(), trailer_video_id)
2725
2726         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2727                        if webpage else (lambda x: None))
2728
2729         video_details = traverse_obj(
2730             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2731         microformats = traverse_obj(
2732             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2733             expected_type=dict, default=[])
2734         video_title = (
2735             get_first(video_details, 'title')
2736             or self._get_text(microformats, (..., 'title'))
2737             or search_meta(['og:title', 'twitter:title', 'title']))
2738         video_description = get_first(video_details, 'shortDescription')
2739
2740         if not smuggled_data.get('force_singlefeed', False):
2741             if not self.get_param('noplaylist'):
2742                 multifeed_metadata_list = get_first(
2743                     player_responses,
2744                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2745                     expected_type=str)
2746                 if multifeed_metadata_list:
2747                     entries = []
2748                     feed_ids = []
2749                     for feed in multifeed_metadata_list.split(','):
2750                         # Unquote should take place before split on comma (,) since textual
2751                         # fields may contain comma as well (see
2752                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2753                         feed_data = compat_parse_qs(
2754                             compat_urllib_parse_unquote_plus(feed))
2755
2756                         def feed_entry(name):
2757                             return try_get(
2758                                 feed_data, lambda x: x[name][0], compat_str)
2759
2760                         feed_id = feed_entry('id')
2761                         if not feed_id:
2762                             continue
2763                         feed_title = feed_entry('title')
2764                         title = video_title
2765                         if feed_title:
2766                             title += ' (%s)' % feed_title
2767                         entries.append({
2768                             '_type': 'url_transparent',
2769                             'ie_key': 'Youtube',
2770                             'url': smuggle_url(
2771                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2772                                 {'force_singlefeed': True}),
2773                             'title': title,
2774                         })
2775                         feed_ids.append(feed_id)
2776                     self.to_screen(
2777                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2778                         % (', '.join(feed_ids), video_id))
2779                     return self.playlist_result(
2780                         entries, video_id, video_title, video_description)
2781             else:
2782                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2783
2784         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2785         is_live = get_first(video_details, 'isLive')
2786         if is_live is None:
2787             is_live = get_first(live_broadcast_details, 'isLiveNow')
2788
2789         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2790         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2791
2792         if not formats:
2793             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2794                 self.raise_no_formats(
2795                     'This video is DRM protected.', expected=True)
2796             pemr = get_first(
2797                 playability_statuses,
2798                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2799             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2800             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2801             if subreason:
2802                 if subreason == 'The uploader has not made this video available in your country.':
2803                     countries = get_first(microformats, 'availableCountries')
2804                     if not countries:
2805                         regions_allowed = search_meta('regionsAllowed')
2806                         countries = regions_allowed.split(',') if regions_allowed else None
2807                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2808                 reason += f'. {subreason}'
2809             if reason:
2810                 self.raise_no_formats(reason, expected=True)
2811
2812         for f in formats:
2813             if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']:  # throttled
2814                 f['source_preference'] = -10
2815                 # TODO: this method is not reliable
2816                 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2817
2818         # Source is given priority since formats that throttle are given lower source_preference
2819         # When throttling issue is fully fixed, remove this
2820         self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
2821
2822         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2823         if not keywords and webpage:
2824             keywords = [
2825                 unescapeHTML(m.group('content'))
2826                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2827         for keyword in keywords:
2828             if keyword.startswith('yt:stretch='):
2829                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2830                 if mobj:
2831                     # NB: float is intentional for forcing float division
2832                     w, h = (float(v) for v in mobj.groups())
2833                     if w > 0 and h > 0:
2834                         ratio = w / h
2835                         for f in formats:
2836                             if f.get('vcodec') != 'none':
2837                                 f['stretched_ratio'] = ratio
2838                         break
2839
2840         thumbnails = []
2841         thumbnail_dicts = traverse_obj(
2842             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2843             expected_type=dict, default=[])
2844         for thumbnail in thumbnail_dicts:
2845             thumbnail_url = thumbnail.get('url')
2846             if not thumbnail_url:
2847                 continue
2848             # Sometimes youtube gives a wrong thumbnail URL. See:
2849             # https://github.com/yt-dlp/yt-dlp/issues/233
2850             # https://github.com/ytdl-org/youtube-dl/issues/28023
2851             if 'maxresdefault' in thumbnail_url:
2852                 thumbnail_url = thumbnail_url.split('?')[0]
2853             thumbnails.append({
2854                 'url': thumbnail_url,
2855                 'height': int_or_none(thumbnail.get('height')),
2856                 'width': int_or_none(thumbnail.get('width')),
2857             })
2858         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2859         if thumbnail_url:
2860             thumbnails.append({
2861                 'url': thumbnail_url,
2862             })
2863         # The best resolution thumbnails sometimes does not appear in the webpage
2864         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2865         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2866         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2867         # TODO: Test them also? - For some videos, even these don't exist
2868         guaranteed_thumbnail_names = [
2869             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2870             'mqdefault', 'mq1', 'mq2', 'mq3',
2871             'default', '1', '2', '3'
2872         ]
2873         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2874         n_thumbnail_names = len(thumbnail_names)
2875
2876         thumbnails.extend({
2877             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2878                 video_id=video_id, name=name, ext=ext,
2879                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2880             '_test_url': name in hq_thumbnail_names,
2881         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2882         for thumb in thumbnails:
2883             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2884             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2885         self._remove_duplicate_formats(thumbnails)
2886
2887         category = get_first(microformats, 'category') or search_meta('genre')
2888         channel_id = str_or_none(
2889             get_first(video_details, 'channelId')
2890             or get_first(microformats, 'externalChannelId')
2891             or search_meta('channelId'))
2892         duration = int_or_none(
2893             get_first(video_details, 'lengthSeconds')
2894             or get_first(microformats, 'lengthSeconds')
2895             or parse_duration(search_meta('duration'))) or None
2896         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2897
2898         live_content = get_first(video_details, 'isLiveContent')
2899         is_upcoming = get_first(video_details, 'isUpcoming')
2900         if is_live is None:
2901             if is_upcoming or live_content is False:
2902                 is_live = False
2903         if is_upcoming is None and (live_content or is_live):
2904             is_upcoming = False
2905         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2906         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2907         if not duration and live_endtime and live_starttime:
2908             duration = live_endtime - live_starttime
2909
2910         info = {
2911             'id': video_id,
2912             'title': self._live_title(video_title) if is_live else video_title,
2913             'formats': formats,
2914             'thumbnails': thumbnails,
2915             'description': video_description,
2916             'upload_date': unified_strdate(
2917                 get_first(microformats, 'uploadDate')
2918                 or search_meta('uploadDate')),
2919             'uploader': get_first(video_details, 'author'),
2920             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2921             'uploader_url': owner_profile_url,
2922             'channel_id': channel_id,
2923             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2924             'duration': duration,
2925             'view_count': int_or_none(
2926                 get_first((video_details, microformats), (..., 'viewCount'))
2927                 or search_meta('interactionCount')),
2928             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2929             'age_limit': 18 if (
2930                 get_first(microformats, 'isFamilySafe') is False
2931                 or search_meta('isFamilyFriendly') == 'false'
2932                 or search_meta('og:restrictions:age') == '18+') else 0,
2933             'webpage_url': webpage_url,
2934             'categories': [category] if category else None,
2935             'tags': keywords,
2936             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2937             'is_live': is_live,
2938             'was_live': (False if is_live or is_upcoming or live_content is False
2939                          else None if is_live is None or is_upcoming is None
2940                          else live_content),
2941             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2942             'release_timestamp': live_starttime,
2943         }
2944
2945         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2946         # Converted into dicts to remove duplicates
2947         captions = {
2948             sub.get('baseUrl'): sub
2949             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2950         translation_languages = {
2951             lang.get('languageCode'): lang.get('languageName')
2952             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2953         subtitles = {}
2954         if pctr:
2955             def process_language(container, base_url, lang_code, sub_name, query):
2956                 lang_subs = container.setdefault(lang_code, [])
2957                 for fmt in self._SUBTITLE_FORMATS:
2958                     query.update({
2959                         'fmt': fmt,
2960                     })
2961                     lang_subs.append({
2962                         'ext': fmt,
2963                         'url': update_url_query(base_url, query),
2964                         'name': sub_name,
2965                     })
2966
2967             for base_url, caption_track in captions.items():
2968                 if not base_url:
2969                     continue
2970                 if caption_track.get('kind') != 'asr':
2971                     lang_code = (
2972                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2973                         or caption_track.get('languageCode'))
2974                     if not lang_code:
2975                         continue
2976                     process_language(
2977                         subtitles, base_url, lang_code,
2978                         traverse_obj(caption_track, ('name', 'simpleText')),
2979                         {})
2980                     continue
2981                 automatic_captions = {}
2982                 for trans_code, trans_name in translation_languages.items():
2983                     if not trans_code:
2984                         continue
2985                     process_language(
2986                         automatic_captions, base_url, trans_code,
2987                         self._get_text(trans_name, max_runs=1),
2988                         {'tlang': trans_code})
2989                 info['automatic_captions'] = automatic_captions
2990         info['subtitles'] = subtitles
2991
2992         parsed_url = compat_urllib_parse_urlparse(url)
2993         for component in [parsed_url.fragment, parsed_url.query]:
2994             query = compat_parse_qs(component)
2995             for k, v in query.items():
2996                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2997                     d_k += '_time'
2998                     if d_k not in info and k in s_ks:
2999                         info[d_k] = parse_duration(query[k][0])
3000
3001         # Youtube Music Auto-generated description
3002         if video_description:
3003             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3004             if mobj:
3005                 release_year = mobj.group('release_year')
3006                 release_date = mobj.group('release_date')
3007                 if release_date:
3008                     release_date = release_date.replace('-', '')
3009                     if not release_year:
3010                         release_year = release_date[:4]
3011                 info.update({
3012                     'album': mobj.group('album'.strip()),
3013                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3014                     'track': mobj.group('track').strip(),
3015                     'release_date': release_date,
3016                     'release_year': int_or_none(release_year),
3017                 })
3018
3019         initial_data = None
3020         if webpage:
3021             initial_data = self._extract_yt_initial_variable(
3022                 webpage, self._YT_INITIAL_DATA_RE, video_id,
3023                 'yt initial data')
3024         if not initial_data:
3025             headers = self.generate_api_headers(
3026                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
3027                 session_index=self._extract_session_index(master_ytcfg))
3028
3029             initial_data = self._extract_response(
3030                 item_id=video_id, ep='next', fatal=False,
3031                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
3032                 note='Downloading initial data API JSON')
3033
3034         try:
3035             # This will error if there is no livechat
3036             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3037             info['subtitles']['live_chat'] = [{
3038                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3039                 'video_id': video_id,
3040                 'ext': 'json',
3041                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3042             }]
3043         except (KeyError, IndexError, TypeError):
3044             pass
3045
3046         if initial_data:
3047             info['chapters'] = (
3048                 self._extract_chapters_from_json(initial_data, duration)
3049                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3050                 or None)
3051
3052             contents = try_get(
3053                 initial_data,
3054                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3055                 list) or []
3056             for content in contents:
3057                 vpir = content.get('videoPrimaryInfoRenderer')
3058                 if vpir:
3059                     stl = vpir.get('superTitleLink')
3060                     if stl:
3061                         stl = self._get_text(stl)
3062                         if try_get(
3063                                 vpir,
3064                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3065                             info['location'] = stl
3066                         else:
3067                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3068                             if mobj:
3069                                 info.update({
3070                                     'series': mobj.group(1),
3071                                     'season_number': int(mobj.group(2)),
3072                                     'episode_number': int(mobj.group(3)),
3073                                 })
3074                     for tlb in (try_get(
3075                             vpir,
3076                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3077                             list) or []):
3078                         tbr = tlb.get('toggleButtonRenderer') or {}
3079                         for getter, regex in [(
3080                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3081                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3082                                     lambda x: x['accessibility'],
3083                                     lambda x: x['accessibilityData']['accessibilityData'],
3084                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3085                             label = (try_get(tbr, getter, dict) or {}).get('label')
3086                             if label:
3087                                 mobj = re.match(regex, label)
3088                                 if mobj:
3089                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3090                                     break
3091                     sbr_tooltip = try_get(
3092                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3093                     if sbr_tooltip:
3094                         like_count, dislike_count = sbr_tooltip.split(' / ')
3095                         info.update({
3096                             'like_count': str_to_int(like_count),
3097                             'dislike_count': str_to_int(dislike_count),
3098                         })
3099                 vsir = content.get('videoSecondaryInfoRenderer')
3100                 if vsir:
3101                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3102                     rows = try_get(
3103                         vsir,
3104                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3105                         list) or []
3106                     multiple_songs = False
3107                     for row in rows:
3108                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3109                             multiple_songs = True
3110                             break
3111                     for row in rows:
3112                         mrr = row.get('metadataRowRenderer') or {}
3113                         mrr_title = mrr.get('title')
3114                         if not mrr_title:
3115                             continue
3116                         mrr_title = self._get_text(mrr, 'title')
3117                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3118                         if mrr_title == 'License':
3119                             info['license'] = mrr_contents_text
3120                         elif not multiple_songs:
3121                             if mrr_title == 'Album':
3122                                 info['album'] = mrr_contents_text
3123                             elif mrr_title == 'Artist':
3124                                 info['artist'] = mrr_contents_text
3125                             elif mrr_title == 'Song':
3126                                 info['track'] = mrr_contents_text
3127
3128         fallbacks = {
3129             'channel': 'uploader',
3130             'channel_id': 'uploader_id',
3131             'channel_url': 'uploader_url',
3132         }
3133         for to, frm in fallbacks.items():
3134             if not info.get(to):
3135                 info[to] = info.get(frm)
3136
3137         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3138             v = info.get(s_k)
3139             if v:
3140                 info[d_k] = v
3141
3142         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3143         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3144         is_membersonly = None
3145         is_premium = None
3146         if initial_data and is_private is not None:
3147             is_membersonly = False
3148             is_premium = False
3149             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3150             badge_labels = set()
3151             for content in contents:
3152                 if not isinstance(content, dict):
3153                     continue
3154                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3155             for badge_label in badge_labels:
3156                 if badge_label.lower() == 'members only':
3157                     is_membersonly = True
3158                 elif badge_label.lower() == 'premium':
3159                     is_premium = True
3160                 elif badge_label.lower() == 'unlisted':
3161                     is_unlisted = True
3162
3163         info['availability'] = self._availability(
3164             is_private=is_private,
3165             needs_premium=is_premium,
3166             needs_subscription=is_membersonly,
3167             needs_auth=info['age_limit'] >= 18,
3168             is_unlisted=None if is_private is None else is_unlisted)
3169
3170         # get xsrf for annotations or comments
3171         get_annotations = self.get_param('writeannotations', False)
3172         get_comments = self.get_param('getcomments', False)
3173         if get_annotations or get_comments:
3174             xsrf_token = None
3175             if master_ytcfg:
3176                 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
3177             if not xsrf_token:
3178                 xsrf_token = self._search_regex(
3179                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3180                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
3181
3182         # annotations
3183         if get_annotations:
3184             invideo_url = get_first(
3185                 player_responses,
3186                 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3187                 expected_type=str)
3188             if xsrf_token and invideo_url:
3189                 xsrf_field_name = None
3190                 if master_ytcfg:
3191                     xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3192                 if not xsrf_field_name:
3193                     xsrf_field_name = self._search_regex(
3194                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3195                         webpage, 'xsrf field name',
3196                         group='xsrf_field_name', default='session_token')
3197                 info['annotations'] = self._download_webpage(
3198                     self._proto_relative_url(invideo_url),
3199                     video_id, note='Downloading annotations',
3200                     errnote='Unable to download video annotations', fatal=False,
3201                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3202
3203         if get_comments:
3204             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3205
3206         self.mark_watched(video_id, player_responses)
3207
3208         return info
3209
3210
3211 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3212     IE_DESC = 'YouTube.com tab'
3213     _VALID_URL = r'''(?x)
3214                     https?://
3215                         (?:\w+\.)?
3216                         (?:
3217                             youtube(?:kids)?\.com|
3218                             invidio\.us
3219                         )/
3220                         (?:
3221                             (?P<channel_type>channel|c|user|browse)/|
3222                             (?P<not_channel>
3223                                 feed/|hashtag/|
3224                                 (?:playlist|watch)\?.*?\blist=
3225                             )|
3226                             (?!(?:%s)\b)  # Direct URLs
3227                         )
3228                         (?P<id>[^/?\#&]+)
3229                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3230     IE_NAME = 'youtube:tab'
3231
3232     _TESTS = [{
3233         'note': 'playlists, multipage',
3234         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3235         'playlist_mincount': 94,
3236         'info_dict': {
3237             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3238             'title': 'Игорь Клейнер - Playlists',
3239             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3240             'uploader': 'Игорь Клейнер',
3241             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3242         },
3243     }, {
3244         'note': 'playlists, multipage, different order',
3245         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3246         'playlist_mincount': 94,
3247         'info_dict': {
3248             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3249             'title': 'Игорь Клейнер - Playlists',
3250             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3251             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3252             'uploader': 'Игорь Клейнер',
3253         },
3254     }, {
3255         'note': 'playlists, series',
3256         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3257         'playlist_mincount': 5,
3258         'info_dict': {
3259             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3260             'title': '3Blue1Brown - Playlists',
3261             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3262             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3263             'uploader': '3Blue1Brown',
3264         },
3265     }, {
3266         'note': 'playlists, singlepage',
3267         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3268         'playlist_mincount': 4,
3269         'info_dict': {
3270             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3271             'title': 'ThirstForScience - Playlists',
3272             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3273             'uploader': 'ThirstForScience',
3274             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3275         }
3276     }, {
3277         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3278         'only_matching': True,
3279     }, {
3280         'note': 'basic, single video playlist',
3281         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3282         'info_dict': {
3283             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3284             'uploader': 'Sergey M.',
3285             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3286             'title': 'youtube-dl public playlist',
3287         },
3288         'playlist_count': 1,
3289     }, {
3290         'note': 'empty playlist',
3291         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3292         'info_dict': {
3293             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3294             'uploader': 'Sergey M.',
3295             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3296             'title': 'youtube-dl empty playlist',
3297         },
3298         'playlist_count': 0,
3299     }, {
3300         'note': 'Home tab',
3301         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3302         'info_dict': {
3303             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3304             'title': 'lex will - Home',
3305             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3306             'uploader': 'lex will',
3307             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3308         },
3309         'playlist_mincount': 2,
3310     }, {
3311         'note': 'Videos tab',
3312         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3313         'info_dict': {
3314             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3315             'title': 'lex will - Videos',
3316             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3317             'uploader': 'lex will',
3318             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3319         },
3320         'playlist_mincount': 975,
3321     }, {
3322         'note': 'Videos tab, sorted by popular',
3323         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3324         'info_dict': {
3325             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3326             'title': 'lex will - Videos',
3327             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3328             'uploader': 'lex will',
3329             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3330         },
3331         'playlist_mincount': 199,
3332     }, {
3333         'note': 'Playlists tab',
3334         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3335         'info_dict': {
3336             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3337             'title': 'lex will - Playlists',
3338             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3339             'uploader': 'lex will',
3340             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3341         },
3342         'playlist_mincount': 17,
3343     }, {
3344         'note': 'Community tab',
3345         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3346         'info_dict': {
3347             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3348             'title': 'lex will - Community',
3349             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3350             'uploader': 'lex will',
3351             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3352         },
3353         'playlist_mincount': 18,
3354     }, {
3355         'note': 'Channels tab',
3356         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3357         'info_dict': {
3358             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3359             'title': 'lex will - Channels',
3360             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3361             'uploader': 'lex will',
3362             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3363         },
3364         'playlist_mincount': 12,
3365     }, {
3366         'note': 'Search tab',
3367         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3368         'playlist_mincount': 40,
3369         'info_dict': {
3370             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3371             'title': '3Blue1Brown - Search - linear algebra',
3372             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3373             'uploader': '3Blue1Brown',
3374             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3375         },
3376     }, {
3377         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3378         'only_matching': True,
3379     }, {
3380         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3381         'only_matching': True,
3382     }, {
3383         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3384         'only_matching': True,
3385     }, {
3386         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3387         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3388         'info_dict': {
3389             'title': '29C3: Not my department',
3390             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3391             'uploader': 'Christiaan008',
3392             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3393             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3394         },
3395         'playlist_count': 96,
3396     }, {
3397         'note': 'Large playlist',
3398         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3399         'info_dict': {
3400             'title': 'Uploads from Cauchemar',
3401             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3402             'uploader': 'Cauchemar',
3403             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3404         },
3405         'playlist_mincount': 1123,
3406     }, {
3407         'note': 'even larger playlist, 8832 videos',
3408         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3409         'only_matching': True,
3410     }, {
3411         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3412         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3413         'info_dict': {
3414             'title': 'Uploads from Interstellar Movie',
3415             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3416             'uploader': 'Interstellar Movie',
3417             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3418         },
3419         'playlist_mincount': 21,
3420     }, {
3421         'note': 'Playlist with "show unavailable videos" button',
3422         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3423         'info_dict': {
3424             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3425             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3426             'uploader': 'Phim Siêu Nhân Nhật Bản',
3427             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3428         },
3429         'playlist_mincount': 200,
3430     }, {
3431         'note': 'Playlist with unavailable videos in page 7',
3432         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3433         'info_dict': {
3434             'title': 'Uploads from BlankTV',
3435             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3436             'uploader': 'BlankTV',
3437             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3438         },
3439         'playlist_mincount': 1000,
3440     }, {
3441         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3442         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3443         'info_dict': {
3444             'title': 'Data Analysis with Dr Mike Pound',
3445             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3446             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3447             'uploader': 'Computerphile',
3448             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3449         },
3450         'playlist_mincount': 11,
3451     }, {
3452         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3453         'only_matching': True,
3454     }, {
3455         'note': 'Playlist URL that does not actually serve a playlist',
3456         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3457         'info_dict': {
3458             'id': 'FqZTN594JQw',
3459             'ext': 'webm',
3460             'title': "Smiley's People 01 detective, Adventure Series, Action",
3461             'uploader': 'STREEM',
3462             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3463             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3464             'upload_date': '20150526',
3465             'license': 'Standard YouTube License',
3466             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3467             'categories': ['People & Blogs'],
3468             'tags': list,
3469             'view_count': int,
3470             'like_count': int,
3471             'dislike_count': int,
3472         },
3473         'params': {
3474             'skip_download': True,
3475         },
3476         'skip': 'This video is not available.',
3477         'add_ie': [YoutubeIE.ie_key()],
3478     }, {
3479         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3480         'only_matching': True,
3481     }, {
3482         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3483         'only_matching': True,
3484     }, {
3485         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3486         'info_dict': {
3487             'id': '3yImotZU3tw',  # This will keep changing
3488             'ext': 'mp4',
3489             'title': compat_str,
3490             'uploader': 'Sky News',
3491             'uploader_id': 'skynews',
3492             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3493             'upload_date': r're:\d{8}',
3494             'description': compat_str,
3495             'categories': ['News & Politics'],
3496             'tags': list,
3497             'like_count': int,
3498             'dislike_count': int,
3499         },
3500         'params': {
3501             'skip_download': True,
3502         },
3503         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3504     }, {
3505         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3506         'info_dict': {
3507             'id': 'a48o2S1cPoo',
3508             'ext': 'mp4',
3509             'title': 'The Young Turks - Live Main Show',
3510             'uploader': 'The Young Turks',
3511             'uploader_id': 'TheYoungTurks',
3512             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3513             'upload_date': '20150715',
3514             'license': 'Standard YouTube License',
3515             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3516             'categories': ['News & Politics'],
3517             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3518             'like_count': int,
3519             'dislike_count': int,
3520         },
3521         'params': {
3522             'skip_download': True,
3523         },
3524         'only_matching': True,
3525     }, {
3526         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3527         'only_matching': True,
3528     }, {
3529         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3530         'only_matching': True,
3531     }, {
3532         'note': 'A channel that is not live. Should raise error',
3533         'url': 'https://www.youtube.com/user/numberphile/live',
3534         'only_matching': True,
3535     }, {
3536         'url': 'https://www.youtube.com/feed/trending',
3537         'only_matching': True,
3538     }, {
3539         'url': 'https://www.youtube.com/feed/library',
3540         'only_matching': True,
3541     }, {
3542         'url': 'https://www.youtube.com/feed/history',
3543         'only_matching': True,
3544     }, {
3545         'url': 'https://www.youtube.com/feed/subscriptions',
3546         'only_matching': True,
3547     }, {
3548         'url': 'https://www.youtube.com/feed/watch_later',
3549         'only_matching': True,
3550     }, {
3551         'note': 'Recommended - redirects to home page',
3552         'url': 'https://www.youtube.com/feed/recommended',
3553         'only_matching': True,
3554     }, {
3555         'note': 'inline playlist with not always working continuations',
3556         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3557         'only_matching': True,
3558     }, {
3559         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3560         'only_matching': True,
3561     }, {
3562         'url': 'https://www.youtube.com/course',
3563         'only_matching': True,
3564     }, {
3565         'url': 'https://www.youtube.com/zsecurity',
3566         'only_matching': True,
3567     }, {
3568         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3569         'only_matching': True,
3570     }, {
3571         'url': 'https://www.youtube.com/TheYoungTurks/live',
3572         'only_matching': True,
3573     }, {
3574         'url': 'https://www.youtube.com/hashtag/cctv9',
3575         'info_dict': {
3576             'id': 'cctv9',
3577             'title': '#cctv9',
3578         },
3579         'playlist_mincount': 350,
3580     }, {
3581         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3582         'only_matching': True,
3583     }, {
3584         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3585         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3586         'only_matching': True
3587     }, {
3588         'note': '/browse/ should redirect to /channel/',
3589         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3590         'only_matching': True
3591     }, {
3592         'note': 'VLPL, should redirect to playlist?list=PL...',
3593         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3594         'info_dict': {
3595             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3596             'uploader': 'NoCopyrightSounds',
3597             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3598             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3599             'title': 'NCS Releases',
3600         },
3601         'playlist_mincount': 166,
3602     }, {
3603         'note': 'Topic, should redirect to playlist?list=UU...',
3604         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3605         'info_dict': {
3606             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3607             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3608             'title': 'Uploads from Royalty Free Music - Topic',
3609             'uploader': 'Royalty Free Music - Topic',
3610         },
3611         'expected_warnings': [
3612             'A channel/user page was given',
3613             'The URL does not have a videos tab',
3614         ],
3615         'playlist_mincount': 101,
3616     }, {
3617         'note': 'Topic without a UU playlist',
3618         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3619         'info_dict': {
3620             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3621             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3622         },
3623         'expected_warnings': [
3624             'A channel/user page was given',
3625             'The URL does not have a videos tab',
3626             'Falling back to channel URL',
3627         ],
3628         'playlist_mincount': 9,
3629     }, {
3630         'note': 'Youtube music Album',
3631         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3632         'info_dict': {
3633             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3634             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3635         },
3636         'playlist_count': 50,
3637     }, {
3638         'note': 'unlisted single video playlist',
3639         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3640         'info_dict': {
3641             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3642             'uploader': 'colethedj',
3643             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3644             'title': 'yt-dlp unlisted playlist test',
3645             'availability': 'unlisted'
3646         },
3647         'playlist_count': 1,
3648     }]
3649
3650     @classmethod
3651     def suitable(cls, url):
3652         return False if YoutubeIE.suitable(url) else super(
3653             YoutubeTabIE, cls).suitable(url)
3654
3655     def _extract_channel_id(self, webpage):
3656         channel_id = self._html_search_meta(
3657             'channelId', webpage, 'channel id', default=None)
3658         if channel_id:
3659             return channel_id
3660         channel_url = self._html_search_meta(
3661             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3662              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3663              'twitter:app:url:googleplay'), webpage, 'channel url')
3664         return self._search_regex(
3665             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3666             channel_url, 'channel id')
3667
3668     @staticmethod
3669     def _extract_basic_item_renderer(item):
3670         # Modified from _extract_grid_item_renderer
3671         known_basic_renderers = (
3672             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3673         )
3674         for key, renderer in item.items():
3675             if not isinstance(renderer, dict):
3676                 continue
3677             elif key in known_basic_renderers:
3678                 return renderer
3679             elif key.startswith('grid') and key.endswith('Renderer'):
3680                 return renderer
3681
3682     def _grid_entries(self, grid_renderer):
3683         for item in grid_renderer['items']:
3684             if not isinstance(item, dict):
3685                 continue
3686             renderer = self._extract_basic_item_renderer(item)
3687             if not isinstance(renderer, dict):
3688                 continue
3689             title = self._get_text(renderer, 'title')
3690
3691             # playlist
3692             playlist_id = renderer.get('playlistId')
3693             if playlist_id:
3694                 yield self.url_result(
3695                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3696                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3697                     video_title=title)
3698                 continue
3699             # video
3700             video_id = renderer.get('videoId')
3701             if video_id:
3702                 yield self._extract_video(renderer)
3703                 continue
3704             # channel
3705             channel_id = renderer.get('channelId')
3706             if channel_id:
3707                 yield self.url_result(
3708                     'https://www.youtube.com/channel/%s' % channel_id,
3709                     ie=YoutubeTabIE.ie_key(), video_title=title)
3710                 continue
3711             # generic endpoint URL support
3712             ep_url = urljoin('https://www.youtube.com/', try_get(
3713                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3714                 compat_str))
3715             if ep_url:
3716                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3717                     if ie.suitable(ep_url):
3718                         yield self.url_result(
3719                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3720                         break
3721
3722     def _shelf_entries_from_content(self, shelf_renderer):
3723         content = shelf_renderer.get('content')
3724         if not isinstance(content, dict):
3725             return
3726         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3727         if renderer:
3728             # TODO: add support for nested playlists so each shelf is processed
3729             # as separate playlist
3730             # TODO: this includes only first N items
3731             for entry in self._grid_entries(renderer):
3732                 yield entry
3733         renderer = content.get('horizontalListRenderer')
3734         if renderer:
3735             # TODO
3736             pass
3737
3738     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3739         ep = try_get(
3740             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3741             compat_str)
3742         shelf_url = urljoin('https://www.youtube.com', ep)
3743         if shelf_url:
3744             # Skipping links to another channels, note that checking for
3745             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3746             # will not work
3747             if skip_channels and '/channels?' in shelf_url:
3748                 return
3749             title = self._get_text(shelf_renderer, 'title')
3750             yield self.url_result(shelf_url, video_title=title)
3751         # Shelf may not contain shelf URL, fallback to extraction from content
3752         for entry in self._shelf_entries_from_content(shelf_renderer):
3753             yield entry
3754
3755     def _playlist_entries(self, video_list_renderer):
3756         for content in video_list_renderer['contents']:
3757             if not isinstance(content, dict):
3758                 continue
3759             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3760             if not isinstance(renderer, dict):
3761                 continue
3762             video_id = renderer.get('videoId')
3763             if not video_id:
3764                 continue
3765             yield self._extract_video(renderer)
3766
3767     def _rich_entries(self, rich_grid_renderer):
3768         renderer = try_get(
3769             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3770         video_id = renderer.get('videoId')
3771         if not video_id:
3772             return
3773         yield self._extract_video(renderer)
3774
3775     def _video_entry(self, video_renderer):
3776         video_id = video_renderer.get('videoId')
3777         if video_id:
3778             return self._extract_video(video_renderer)
3779
3780     def _post_thread_entries(self, post_thread_renderer):
3781         post_renderer = try_get(
3782             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3783         if not post_renderer:
3784             return
3785         # video attachment
3786         video_renderer = try_get(
3787             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3788         video_id = video_renderer.get('videoId')
3789         if video_id:
3790             entry = self._extract_video(video_renderer)
3791             if entry:
3792                 yield entry
3793         # playlist attachment
3794         playlist_id = try_get(
3795             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3796         if playlist_id:
3797             yield self.url_result(
3798                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3799                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3800         # inline video links
3801         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3802         for run in runs:
3803             if not isinstance(run, dict):
3804                 continue
3805             ep_url = try_get(
3806                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3807             if not ep_url:
3808                 continue
3809             if not YoutubeIE.suitable(ep_url):
3810                 continue
3811             ep_video_id = YoutubeIE._match_id(ep_url)
3812             if video_id == ep_video_id:
3813                 continue
3814             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3815
3816     def _post_thread_continuation_entries(self, post_thread_continuation):
3817         contents = post_thread_continuation.get('contents')
3818         if not isinstance(contents, list):
3819             return
3820         for content in contents:
3821             renderer = content.get('backstagePostThreadRenderer')
3822             if not isinstance(renderer, dict):
3823                 continue
3824             for entry in self._post_thread_entries(renderer):
3825                 yield entry
3826
3827     r''' # unused
3828     def _rich_grid_entries(self, contents):
3829         for content in contents:
3830             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3831             if video_renderer:
3832                 entry = self._video_entry(video_renderer)
3833                 if entry:
3834                     yield entry
3835     '''
3836     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3837
3838         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3839             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3840             for content in contents:
3841                 if not isinstance(content, dict):
3842                     continue
3843                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3844                 if not is_renderer:
3845                     renderer = content.get('richItemRenderer')
3846                     if renderer:
3847                         for entry in self._rich_entries(renderer):
3848                             yield entry
3849                         continuation_list[0] = self._extract_continuation(parent_renderer)
3850                     continue
3851                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3852                 for isr_content in isr_contents:
3853                     if not isinstance(isr_content, dict):
3854                         continue
3855
3856                     known_renderers = {
3857                         'playlistVideoListRenderer': self._playlist_entries,
3858                         'gridRenderer': self._grid_entries,
3859                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3860                         'backstagePostThreadRenderer': self._post_thread_entries,
3861                         'videoRenderer': lambda x: [self._video_entry(x)],
3862                     }
3863                     for key, renderer in isr_content.items():
3864                         if key not in known_renderers:
3865                             continue
3866                         for entry in known_renderers[key](renderer):
3867                             if entry:
3868                                 yield entry
3869                         continuation_list[0] = self._extract_continuation(renderer)
3870                         break
3871
3872                 if not continuation_list[0]:
3873                     continuation_list[0] = self._extract_continuation(is_renderer)
3874
3875             if not continuation_list[0]:
3876                 continuation_list[0] = self._extract_continuation(parent_renderer)
3877
3878         continuation_list = [None]  # Python 2 doesnot support nonlocal
3879         tab_content = try_get(tab, lambda x: x['content'], dict)
3880         if not tab_content:
3881             return
3882         parent_renderer = (
3883             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3884             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3885         for entry in extract_entries(parent_renderer):
3886             yield entry
3887         continuation = continuation_list[0]
3888         visitor_data = None
3889
3890         for page_num in itertools.count(1):
3891             if not continuation:
3892                 break
3893             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3894             response = self._extract_response(
3895                 item_id='%s page %s' % (item_id, page_num),
3896                 query=continuation, headers=headers, ytcfg=ytcfg,
3897                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3898
3899             if not response:
3900                 break
3901             visitor_data = try_get(
3902                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3903
3904             known_continuation_renderers = {
3905                 'playlistVideoListContinuation': self._playlist_entries,
3906                 'gridContinuation': self._grid_entries,
3907                 'itemSectionContinuation': self._post_thread_continuation_entries,
3908                 'sectionListContinuation': extract_entries,  # for feeds
3909             }
3910             continuation_contents = try_get(
3911                 response, lambda x: x['continuationContents'], dict) or {}
3912             continuation_renderer = None
3913             for key, value in continuation_contents.items():
3914                 if key not in known_continuation_renderers:
3915                     continue
3916                 continuation_renderer = value
3917                 continuation_list = [None]
3918                 for entry in known_continuation_renderers[key](continuation_renderer):
3919                     yield entry
3920                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3921                 break
3922             if continuation_renderer:
3923                 continue
3924
3925             known_renderers = {
3926                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3927                 'gridVideoRenderer': (self._grid_entries, 'items'),
3928                 'gridChannelRenderer': (self._grid_entries, 'items'),
3929                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3930                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3931                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3932                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3933             }
3934             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3935             continuation_items = try_get(
3936                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3937             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3938             video_items_renderer = None
3939             for key, value in continuation_item.items():
3940                 if key not in known_renderers:
3941                     continue
3942                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3943                 continuation_list = [None]
3944                 for entry in known_renderers[key][0](video_items_renderer):
3945                     yield entry
3946                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3947                 break
3948             if video_items_renderer:
3949                 continue
3950             break
3951
3952     @staticmethod
3953     def _extract_selected_tab(tabs):
3954         for tab in tabs:
3955             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3956             if renderer.get('selected') is True:
3957                 return renderer
3958         else:
3959             raise ExtractorError('Unable to find selected tab')
3960
3961     @classmethod
3962     def _extract_uploader(cls, data):
3963         uploader = {}
3964         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3965         owner = try_get(
3966             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3967         if owner:
3968             uploader['uploader'] = owner.get('text')
3969             uploader['uploader_id'] = try_get(
3970                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3971             uploader['uploader_url'] = urljoin(
3972                 'https://www.youtube.com/',
3973                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3974         return {k: v for k, v in uploader.items() if v is not None}
3975
3976     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3977         playlist_id = title = description = channel_url = channel_name = channel_id = None
3978         thumbnails_list = tags = []
3979
3980         selected_tab = self._extract_selected_tab(tabs)
3981         renderer = try_get(
3982             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3983         if renderer:
3984             channel_name = renderer.get('title')
3985             channel_url = renderer.get('channelUrl')
3986             channel_id = renderer.get('externalId')
3987         else:
3988             renderer = try_get(
3989                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3990
3991         if renderer:
3992             title = renderer.get('title')
3993             description = renderer.get('description', '')
3994             playlist_id = channel_id
3995             tags = renderer.get('keywords', '').split()
3996             thumbnails_list = (
3997                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3998                 or try_get(
3999                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
4000                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
4001                     list)
4002                 or [])
4003
4004         thumbnails = []
4005         for t in thumbnails_list:
4006             if not isinstance(t, dict):
4007                 continue
4008             thumbnail_url = url_or_none(t.get('url'))
4009             if not thumbnail_url:
4010                 continue
4011             thumbnails.append({
4012                 'url': thumbnail_url,
4013                 'width': int_or_none(t.get('width')),
4014                 'height': int_or_none(t.get('height')),
4015             })
4016         if playlist_id is None:
4017             playlist_id = item_id
4018         if title is None:
4019             title = (
4020                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
4021                 or playlist_id)
4022         title += format_field(selected_tab, 'title', ' - %s')
4023         title += format_field(selected_tab, 'expandedText', ' - %s')
4024         metadata = {
4025             'playlist_id': playlist_id,
4026             'playlist_title': title,
4027             'playlist_description': description,
4028             'uploader': channel_name,
4029             'uploader_id': channel_id,
4030             'uploader_url': channel_url,
4031             'thumbnails': thumbnails,
4032             'tags': tags,
4033         }
4034         availability = self._extract_availability(data)
4035         if availability:
4036             metadata['availability'] = availability
4037         if not channel_id:
4038             metadata.update(self._extract_uploader(data))
4039         metadata.update({
4040             'channel': metadata['uploader'],
4041             'channel_id': metadata['uploader_id'],
4042             'channel_url': metadata['uploader_url']})
4043         ytcfg = self.extract_ytcfg(item_id, webpage)
4044         return self.playlist_result(
4045             self._entries(
4046                 selected_tab, playlist_id,
4047                 self._extract_identity_token(webpage, item_id),
4048                 self._extract_account_syncid(ytcfg, data), ytcfg),
4049             **metadata)
4050
4051     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4052         first_id = last_id = None
4053         ytcfg = self.extract_ytcfg(playlist_id, webpage)
4054         headers = self.generate_api_headers(
4055             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4056             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
4057         for page_num in itertools.count(1):
4058             videos = list(self._playlist_entries(playlist))
4059             if not videos:
4060                 return
4061             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4062             if start >= len(videos):
4063                 return
4064             for video in videos[start:]:
4065                 if video['id'] == first_id:
4066                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4067                     return
4068                 yield video
4069             first_id = first_id or videos[0]['id']
4070             last_id = videos[-1]['id']
4071             watch_endpoint = try_get(
4072                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4073             query = {
4074                 'playlistId': playlist_id,
4075                 'videoId': watch_endpoint.get('videoId') or last_id,
4076                 'index': watch_endpoint.get('index') or len(videos),
4077                 'params': watch_endpoint.get('params') or 'OAE%3D'
4078             }
4079             response = self._extract_response(
4080                 item_id='%s page %d' % (playlist_id, page_num),
4081                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4082                 check_get_keys='contents'
4083             )
4084             playlist = try_get(
4085                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4086
4087     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4088         title = playlist.get('title') or try_get(
4089             data, lambda x: x['titleText']['simpleText'], compat_str)
4090         playlist_id = playlist.get('playlistId') or item_id
4091
4092         # Delegating everything except mix playlists to regular tab-based playlist URL
4093         playlist_url = urljoin(url, try_get(
4094             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4095             compat_str))
4096         if playlist_url and playlist_url != url:
4097             return self.url_result(
4098                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4099                 video_title=title)
4100
4101         return self.playlist_result(
4102             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4103             playlist_id=playlist_id, playlist_title=title)
4104
4105     def _extract_availability(self, data):
4106         """
4107         Gets the availability of a given playlist/tab.
4108         Note: Unless YouTube tells us explicitly, we do not assume it is public
4109         @param data: response
4110         """
4111         is_private = is_unlisted = None
4112         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4113         badge_labels = self._extract_badges(renderer)
4114
4115         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4116         privacy_dropdown_entries = try_get(
4117             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4118         for renderer_dict in privacy_dropdown_entries:
4119             is_selected = try_get(
4120                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4121             if not is_selected:
4122                 continue
4123             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4124             if label:
4125                 badge_labels.add(label.lower())
4126                 break
4127
4128         for badge_label in badge_labels:
4129             if badge_label == 'unlisted':
4130                 is_unlisted = True
4131             elif badge_label == 'private':
4132                 is_private = True
4133             elif badge_label == 'public':
4134                 is_unlisted = is_private = False
4135         return self._availability(is_private, False, False, False, is_unlisted)
4136
4137     @staticmethod
4138     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4139         sidebar_renderer = try_get(
4140             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4141         for item in sidebar_renderer:
4142             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4143             if renderer:
4144                 return renderer
4145
4146     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4147         """
4148         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4149         """
4150         browse_id = params = None
4151         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4152         if not renderer:
4153             return
4154         menu_renderer = try_get(
4155             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4156         for menu_item in menu_renderer:
4157             if not isinstance(menu_item, dict):
4158                 continue
4159             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4160             text = try_get(
4161                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4162             if not text or text.lower() != 'show unavailable videos':
4163                 continue
4164             browse_endpoint = try_get(
4165                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4166             browse_id = browse_endpoint.get('browseId')
4167             params = browse_endpoint.get('params')
4168             break
4169
4170         ytcfg = self.extract_ytcfg(item_id, webpage)
4171         headers = self.generate_api_headers(
4172             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4173             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4174             visitor_data=try_get(
4175                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4176         query = {
4177             'params': params or 'wgYCCAA=',
4178             'browseId': browse_id or 'VL%s' % item_id
4179         }
4180         return self._extract_response(
4181             item_id=item_id, headers=headers, query=query,
4182             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4183             note='Downloading API JSON with unavailable videos')
4184
4185     def _extract_webpage(self, url, item_id):
4186         retries = self.get_param('extractor_retries', 3)
4187         count = -1
4188         last_error = 'Incomplete yt initial data recieved'
4189         while count < retries:
4190             count += 1
4191             # Sometimes youtube returns a webpage with incomplete ytInitialData
4192             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4193             if count:
4194                 self.report_warning('%s. Retrying ...' % last_error)
4195             webpage = self._download_webpage(
4196                 url, item_id,
4197                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4198             data = self.extract_yt_initial_data(item_id, webpage)
4199             if data.get('contents') or data.get('currentVideoEndpoint'):
4200                 break
4201             # Extract alerts here only when there is error
4202             self._extract_and_report_alerts(data)
4203             if count >= retries:
4204                 raise ExtractorError(last_error)
4205         return webpage, data
4206
4207     @staticmethod
4208     def _smuggle_data(entries, data):
4209         for entry in entries:
4210             if data:
4211                 entry['url'] = smuggle_url(entry['url'], data)
4212             yield entry
4213
4214     def _real_extract(self, url):
4215         url, smuggled_data = unsmuggle_url(url, {})
4216         if self.is_music_url(url):
4217             smuggled_data['is_music_url'] = True
4218         info_dict = self.__real_extract(url, smuggled_data)
4219         if info_dict.get('entries'):
4220             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4221         return info_dict
4222
4223     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4224
4225     def __real_extract(self, url, smuggled_data):
4226         item_id = self._match_id(url)
4227         url = compat_urlparse.urlunparse(
4228             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4229         compat_opts = self.get_param('compat_opts', [])
4230
4231         def get_mobj(url):
4232             mobj = self._url_re.match(url).groupdict()
4233             mobj.update((k, '') for k, v in mobj.items() if v is None)
4234             return mobj
4235
4236         mobj = get_mobj(url)
4237         # Youtube returns incomplete data if tabname is not lower case
4238         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4239
4240         if is_channel:
4241             if smuggled_data.get('is_music_url'):
4242                 if item_id[:2] == 'VL':
4243                     # Youtube music VL channels have an equivalent playlist
4244                     item_id = item_id[2:]
4245                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4246                 elif item_id[:2] == 'MP':
4247                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4248                     item_id = self._search_regex(
4249                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4250                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4251                         'playlist id')
4252                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4253                 elif mobj['channel_type'] == 'browse':
4254                     # Youtube music /browse/ should be changed to /channel/
4255                     pre = 'https://www.youtube.com/channel/%s' % item_id
4256         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4257             # Home URLs should redirect to /videos/
4258             self.report_warning(
4259                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4260                 'To download only the videos in the home page, add a "/featured" to the URL')
4261             tab = '/videos'
4262
4263         url = ''.join((pre, tab, post))
4264         mobj = get_mobj(url)
4265
4266         # Handle both video/playlist URLs
4267         qs = parse_qs(url)
4268         video_id = qs.get('v', [None])[0]
4269         playlist_id = qs.get('list', [None])[0]
4270
4271         if not video_id and mobj['not_channel'].startswith('watch'):
4272             if not playlist_id:
4273                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4274                 raise ExtractorError('Unable to recognize tab page')
4275             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4276             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4277             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4278             mobj = get_mobj(url)
4279
4280         if video_id and playlist_id:
4281             if self.get_param('noplaylist'):
4282                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4283                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4284             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4285
4286         webpage, data = self._extract_webpage(url, item_id)
4287
4288         tabs = try_get(
4289             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4290         if tabs:
4291             selected_tab = self._extract_selected_tab(tabs)
4292             tab_name = selected_tab.get('title', '')
4293             if 'no-youtube-channel-redirect' not in compat_opts:
4294                 if mobj['tab'] == '/live':
4295                     # Live tab should have redirected to the video
4296                     raise ExtractorError('The channel is not currently live', expected=True)
4297                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4298                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4299                         # Topic channels don't have /videos. Use the equivalent playlist instead
4300                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4301                         pl_id = 'UU%s' % item_id[2:]
4302                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4303                         try:
4304                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4305                             for alert_type, alert_message in self._extract_alerts(pl_data):
4306                                 if alert_type == 'error':
4307                                     raise ExtractorError('Youtube said: %s' % alert_message)
4308                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4309                         except ExtractorError:
4310                             self.report_warning('The playlist gave error. Falling back to channel URL')
4311                     else:
4312                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4313
4314         self.write_debug('Final URL: %s' % url)
4315
4316         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4317         if 'no-youtube-unavailable-videos' not in compat_opts:
4318             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4319         self._extract_and_report_alerts(data)
4320         tabs = try_get(
4321             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4322         if tabs:
4323             return self._extract_from_tabs(item_id, webpage, data, tabs)
4324
4325         playlist = try_get(
4326             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4327         if playlist:
4328             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4329
4330         video_id = try_get(
4331             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4332             compat_str) or video_id
4333         if video_id:
4334             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4335                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4336             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4337
4338         raise ExtractorError('Unable to recognize tab page')
4339
4340
4341 class YoutubePlaylistIE(InfoExtractor):
4342     IE_DESC = 'YouTube.com playlists'
4343     _VALID_URL = r'''(?x)(?:
4344                         (?:https?://)?
4345                         (?:\w+\.)?
4346                         (?:
4347                             (?:
4348                                 youtube(?:kids)?\.com|
4349                                 invidio\.us
4350                             )
4351                             /.*?\?.*?\blist=
4352                         )?
4353                         (?P<id>%(playlist_id)s)
4354                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4355     IE_NAME = 'youtube:playlist'
4356     _TESTS = [{
4357         'note': 'issue #673',
4358         'url': 'PLBB231211A4F62143',
4359         'info_dict': {
4360             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4361             'id': 'PLBB231211A4F62143',
4362             'uploader': 'Wickydoo',
4363             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4364             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4365         },
4366         'playlist_mincount': 29,
4367     }, {
4368         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4369         'info_dict': {
4370             'title': 'YDL_safe_search',
4371             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4372         },
4373         'playlist_count': 2,
4374         'skip': 'This playlist is private',
4375     }, {
4376         'note': 'embedded',
4377         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4378         'playlist_count': 4,
4379         'info_dict': {
4380             'title': 'JODA15',
4381             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4382             'uploader': 'milan',
4383             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4384         }
4385     }, {
4386         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4387         'playlist_mincount': 654,
4388         'info_dict': {
4389             'title': '2018 Chinese New Singles (11/6 updated)',
4390             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4391             'uploader': 'LBK',
4392             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4393             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4394         }
4395     }, {
4396         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4397         'only_matching': True,
4398     }, {
4399         # music album playlist
4400         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4401         'only_matching': True,
4402     }]
4403
4404     @classmethod
4405     def suitable(cls, url):
4406         if YoutubeTabIE.suitable(url):
4407             return False
4408         # Hack for lazy extractors until more generic solution is implemented
4409         # (see #28780)
4410         from .youtube import parse_qs
4411         qs = parse_qs(url)
4412         if qs.get('v', [None])[0]:
4413             return False
4414         return super(YoutubePlaylistIE, cls).suitable(url)
4415
4416     def _real_extract(self, url):
4417         playlist_id = self._match_id(url)
4418         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4419         url = update_url_query(
4420             'https://www.youtube.com/playlist',
4421             parse_qs(url) or {'list': playlist_id})
4422         if is_music_url:
4423             url = smuggle_url(url, {'is_music_url': True})
4424         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4425
4426
4427 class YoutubeYtBeIE(InfoExtractor):
4428     IE_DESC = 'youtu.be'
4429     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4430     _TESTS = [{
4431         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4432         'info_dict': {
4433             'id': 'yeWKywCrFtk',
4434             'ext': 'mp4',
4435             'title': 'Small Scale Baler and Braiding Rugs',
4436             'uploader': 'Backus-Page House Museum',
4437             'uploader_id': 'backuspagemuseum',
4438             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4439             'upload_date': '20161008',
4440             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4441             'categories': ['Nonprofits & Activism'],
4442             'tags': list,
4443             'like_count': int,
4444             'dislike_count': int,
4445         },
4446         'params': {
4447             'noplaylist': True,
4448             'skip_download': True,
4449         },
4450     }, {
4451         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4452         'only_matching': True,
4453     }]
4454
4455     def _real_extract(self, url):
4456         mobj = self._match_valid_url(url)
4457         video_id = mobj.group('id')
4458         playlist_id = mobj.group('playlist_id')
4459         return self.url_result(
4460             update_url_query('https://www.youtube.com/watch', {
4461                 'v': video_id,
4462                 'list': playlist_id,
4463                 'feature': 'youtu.be',
4464             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4465
4466
4467 class YoutubeYtUserIE(InfoExtractor):
4468     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4469     _VALID_URL = r'ytuser:(?P<id>.+)'
4470     _TESTS = [{
4471         'url': 'ytuser:phihag',
4472         'only_matching': True,
4473     }]
4474
4475     def _real_extract(self, url):
4476         user_id = self._match_id(url)
4477         return self.url_result(
4478             'https://www.youtube.com/user/%s' % user_id,
4479             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4480
4481
4482 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4483     IE_NAME = 'youtube:favorites'
4484     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4485     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4486     _LOGIN_REQUIRED = True
4487     _TESTS = [{
4488         'url': ':ytfav',
4489         'only_matching': True,
4490     }, {
4491         'url': ':ytfavorites',
4492         'only_matching': True,
4493     }]
4494
4495     def _real_extract(self, url):
4496         return self.url_result(
4497             'https://www.youtube.com/playlist?list=LL',
4498             ie=YoutubeTabIE.ie_key())
4499
4500
4501 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4502     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4503     # there doesn't appear to be a real limit, for example if you search for
4504     # 'python' you get more than 8.000.000 results
4505     _MAX_RESULTS = float('inf')
4506     IE_NAME = 'youtube:search'
4507     _SEARCH_KEY = 'ytsearch'
4508     _SEARCH_PARAMS = None
4509     _TESTS = []
4510
4511     def _entries(self, query, n):
4512         data = {'query': query}
4513         if self._SEARCH_PARAMS:
4514             data['params'] = self._SEARCH_PARAMS
4515         total = 0
4516         continuation = {}
4517         for page_num in itertools.count(1):
4518             data.update(continuation)
4519             search = self._extract_response(
4520                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4521                 check_get_keys=('contents', 'onResponseReceivedCommands')
4522             )
4523             if not search:
4524                 break
4525             slr_contents = try_get(
4526                 search,
4527                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4528                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4529                 list)
4530             if not slr_contents:
4531                 break
4532
4533             # Youtube sometimes adds promoted content to searches,
4534             # changing the index location of videos and token.
4535             # So we search through all entries till we find them.
4536             continuation = None
4537             for slr_content in slr_contents:
4538                 if not continuation:
4539                     continuation = self._extract_continuation({'contents': [slr_content]})
4540
4541                 isr_contents = try_get(
4542                     slr_content,
4543                     lambda x: x['itemSectionRenderer']['contents'],
4544                     list)
4545                 if not isr_contents:
4546                     continue
4547                 for content in isr_contents:
4548                     if not isinstance(content, dict):
4549                         continue
4550                     video = content.get('videoRenderer')
4551                     if not isinstance(video, dict):
4552                         continue
4553                     video_id = video.get('videoId')
4554                     if not video_id:
4555                         continue
4556
4557                     yield self._extract_video(video)
4558                     total += 1
4559                     if total == n:
4560                         return
4561
4562             if not continuation:
4563                 break
4564
4565     def _get_n_results(self, query, n):
4566         """Get a specified number of results for a query"""
4567         return self.playlist_result(self._entries(query, n), query, query)
4568
4569
4570 class YoutubeSearchDateIE(YoutubeSearchIE):
4571     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4572     _SEARCH_KEY = 'ytsearchdate'
4573     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4574     _SEARCH_PARAMS = 'CAI%3D'
4575
4576
4577 class YoutubeSearchURLIE(YoutubeSearchIE):
4578     IE_DESC = 'YouTube.com search URLs'
4579     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4580     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4581     # _MAX_RESULTS = 100
4582     _TESTS = [{
4583         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4584         'playlist_mincount': 5,
4585         'info_dict': {
4586             'id': 'youtube-dl test video',
4587             'title': 'youtube-dl test video',
4588         }
4589     }, {
4590         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4591         'only_matching': True,
4592     }]
4593
4594     @classmethod
4595     def _make_valid_url(cls):
4596         return cls._VALID_URL
4597
4598     def _real_extract(self, url):
4599         qs = parse_qs(url)
4600         query = (qs.get('search_query') or qs.get('q'))[0]
4601         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4602         return self._get_n_results(query, self._MAX_RESULTS)
4603
4604
4605 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4606     """
4607     Base class for feed extractors
4608     Subclasses must define the _FEED_NAME property.
4609     """
4610     _LOGIN_REQUIRED = True
4611     _TESTS = []
4612
4613     @property
4614     def IE_NAME(self):
4615         return 'youtube:%s' % self._FEED_NAME
4616
4617     def _real_extract(self, url):
4618         return self.url_result(
4619             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4620             ie=YoutubeTabIE.ie_key())
4621
4622
4623 class YoutubeWatchLaterIE(InfoExtractor):
4624     IE_NAME = 'youtube:watchlater'
4625     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4626     _VALID_URL = r':ytwatchlater'
4627     _TESTS = [{
4628         'url': ':ytwatchlater',
4629         'only_matching': True,
4630     }]
4631
4632     def _real_extract(self, url):
4633         return self.url_result(
4634             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4635
4636
4637 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4638     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4639     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4640     _FEED_NAME = 'recommended'
4641     _LOGIN_REQUIRED = False
4642     _TESTS = [{
4643         'url': ':ytrec',
4644         'only_matching': True,
4645     }, {
4646         'url': ':ytrecommended',
4647         'only_matching': True,
4648     }, {
4649         'url': 'https://youtube.com',
4650         'only_matching': True,
4651     }]
4652
4653
4654 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4655     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4656     _VALID_URL = r':ytsub(?:scription)?s?'
4657     _FEED_NAME = 'subscriptions'
4658     _TESTS = [{
4659         'url': ':ytsubs',
4660         'only_matching': True,
4661     }, {
4662         'url': ':ytsubscriptions',
4663         'only_matching': True,
4664     }]
4665
4666
4667 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4668     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4669     _VALID_URL = r':ythis(?:tory)?'
4670     _FEED_NAME = 'history'
4671     _TESTS = [{
4672         'url': ':ythistory',
4673         'only_matching': True,
4674     }]
4675
4676
4677 class YoutubeTruncatedURLIE(InfoExtractor):
4678     IE_NAME = 'youtube:truncated_url'
4679     IE_DESC = False  # Do not list
4680     _VALID_URL = r'''(?x)
4681         (?:https?://)?
4682         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4683         (?:watch\?(?:
4684             feature=[a-z_]+|
4685             annotation_id=annotation_[^&]+|
4686             x-yt-cl=[0-9]+|
4687             hl=[^&]*|
4688             t=[0-9]+
4689         )?
4690         |
4691             attribution_link\?a=[^&]+
4692         )
4693         $
4694     '''
4695
4696     _TESTS = [{
4697         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4698         'only_matching': True,
4699     }, {
4700         'url': 'https://www.youtube.com/watch?',
4701         'only_matching': True,
4702     }, {
4703         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4704         'only_matching': True,
4705     }, {
4706         'url': 'https://www.youtube.com/watch?feature=foo',
4707         'only_matching': True,
4708     }, {
4709         'url': 'https://www.youtube.com/watch?hl=en-GB',
4710         'only_matching': True,
4711     }, {
4712         'url': 'https://www.youtube.com/watch?t=2372',
4713         'only_matching': True,
4714     }]
4715
4716     def _real_extract(self, url):
4717         raise ExtractorError(
4718             'Did you forget to quote the URL? Remember that & is a meta '
4719             'character in most shells, so you want to put the URL in quotes, '
4720             'like  youtube-dl '
4721             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4722             ' or simply  youtube-dl BaW_jenozKc  .',
4723             expected=True)
4724
4725
4726 class YoutubeTruncatedIDIE(InfoExtractor):
4727     IE_NAME = 'youtube:truncated_id'
4728     IE_DESC = False  # Do not list
4729     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4730
4731     _TESTS = [{
4732         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4733         'only_matching': True,
4734     }]
4735
4736     def _real_extract(self, url):
4737         video_id = self._match_id(url)
4738         raise ExtractorError(
4739             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4740             expected=True)