yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     is_html,
  42     mimetype2ext,
  43     network_exceptions,
  44     orderedSet,
  45     parse_codecs,
  46     parse_count,
  47     parse_duration,
  48     parse_iso8601,
  49     qualities,
  50     remove_start,
  51     smuggle_url,
  52     str_or_none,
  53     str_to_int,
  54     traverse_obj,
  55     try_get,
  56     unescapeHTML,
  57     unified_strdate,
  58     unsmuggle_url,
  59     update_url_query,
  60     url_or_none,
  61     urlencode_postdata,
  62     urljoin,
  63     variadic,
  64 )
  65
  66
  67 def parse_qs(url):
  68     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  69
  70
  71 # any clients starting with _ cannot be explicity requested by the user
  72 INNERTUBE_CLIENTS = {
  73     'web': {
  74         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  75         'INNERTUBE_CONTEXT': {
  76             'client': {
  77                 'clientName': 'WEB',
  78                 'clientVersion': '2.20210622.10.00',
  79             }
  80         },
  81         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  82     },
  83     'web_embedded': {
  84         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  85         'INNERTUBE_CONTEXT': {
  86             'client': {
  87                 'clientName': 'WEB_EMBEDDED_PLAYER',
  88                 'clientVersion': '1.20210620.0.1',
  89             },
  90         },
  91         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  92     },
  93     'web_music': {
  94         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  95         'INNERTUBE_HOST': 'music.youtube.com',
  96         'INNERTUBE_CONTEXT': {
  97             'client': {
  98                 'clientName': 'WEB_REMIX',
  99                 'clientVersion': '1.20210621.00.00',
 100             }
 101         },
 102         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 103     },
 104     'web_creator': {
 105         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 106         'INNERTUBE_CONTEXT': {
 107             'client': {
 108                 'clientName': 'WEB_CREATOR',
 109                 'clientVersion': '1.20210621.00.00',
 110             }
 111         },
 112         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 113     },
 114     'android': {
 115         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 116         'INNERTUBE_CONTEXT': {
 117             'client': {
 118                 'clientName': 'ANDROID',
 119                 'clientVersion': '16.20',
 120             }
 121         },
 122         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 123     },
 124     'android_embedded': {
 125         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 126         'INNERTUBE_CONTEXT': {
 127             'client': {
 128                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 129                 'clientVersion': '16.20',
 130             },
 131         },
 132         'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 133     },
 134     'android_music': {
 135         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 136         'INNERTUBE_HOST': 'music.youtube.com',
 137         'INNERTUBE_CONTEXT': {
 138             'client': {
 139                 'clientName': 'ANDROID_MUSIC',
 140                 'clientVersion': '4.32',
 141             }
 142         },
 143         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 144     },
 145     'android_creator': {
 146         'INNERTUBE_CONTEXT': {
 147             'client': {
 148                 'clientName': 'ANDROID_CREATOR',
 149                 'clientVersion': '21.24.100',
 150             },
 151         },
 152         'INNERTUBE_CONTEXT_CLIENT_NAME': 14
 153     },
 154     # ios has HLS live streams
 155     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 156     'ios': {
 157         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 158         'INNERTUBE_CONTEXT': {
 159             'client': {
 160                 'clientName': 'IOS',
 161                 'clientVersion': '16.20',
 162             }
 163         },
 164         'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 165     },
 166     'ios_embedded': {
 167         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 168         'INNERTUBE_CONTEXT': {
 169             'client': {
 170                 'clientName': 'IOS_MESSAGES_EXTENSION',
 171                 'clientVersion': '16.20',
 172             },
 173         },
 174         'INNERTUBE_CONTEXT_CLIENT_NAME': 66
 175     },
 176     'ios_music': {
 177         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 178         'INNERTUBE_HOST': 'music.youtube.com',
 179         'INNERTUBE_CONTEXT': {
 180             'client': {
 181                 'clientName': 'IOS_MUSIC',
 182                 'clientVersion': '4.32',
 183             },
 184         },
 185         'INNERTUBE_CONTEXT_CLIENT_NAME': 26
 186     },
 187     'ios_creator': {
 188         'INNERTUBE_CONTEXT': {
 189             'client': {
 190                 'clientName': 'IOS_CREATOR',
 191                 'clientVersion': '21.24.100',
 192             },
 193         },
 194         'INNERTUBE_CONTEXT_CLIENT_NAME': 15
 195     },
 196     # mweb has 'ultralow' formats
 197     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 198     'mweb': {
 199         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 200         'INNERTUBE_CONTEXT': {
 201             'client': {
 202                 'clientName': 'MWEB',
 203                 'clientVersion': '2.20210721.07.00',
 204             }
 205         },
 206         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 207     },
 208 }
 209
 210
 211 def build_innertube_clients():
 212     third_party = {
 213         'embedUrl': 'https://google.com',  # Can be any valid URL
 214     }
 215     base_clients = ('android', 'web', 'ios', 'mweb')
 216     priority = qualities(base_clients[::-1])
 217
 218     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 219         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 220         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 221         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 222         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 223
 224         if client in base_clients:
 225             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 226             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 227             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 228             agegate_ytcfg['priority'] -= 1
 229         elif client.endswith('_embedded'):
 230             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 231             ytcfg['priority'] -= 2
 232         else:
 233             ytcfg['priority'] -= 3
 234
 235
 236 build_innertube_clients()
 237
 238
 239 class YoutubeBaseInfoExtractor(InfoExtractor):
 240     """Provide base functions for Youtube extractors"""
 241
 242     _RESERVED_NAMES = (
 243         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
 244         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 245         r'browse|oembed|get_video_info|iframe_api|s/player|'
 246         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 247
 248     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 249
 250     _NETRC_MACHINE = 'youtube'
 251
 252     # If True it will raise an error if no login info is provided
 253     _LOGIN_REQUIRED = False
 254
 255     r'''  # Unused since login is broken
 256     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 257     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
 258
 259     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
 260     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
 261     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 262     '''
 263
 264     def _login(self):
 265         """
 266         Attempt to log in to YouTube.
 267         True is returned if successful or skipped.
 268         False is returned if login failed.
 269
 270         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 271         """
 272
 273         def warn(message):
 274             self.report_warning(message)
 275
 276         # username+password login is broken
 277         if (self._LOGIN_REQUIRED
 278                 and self.get_param('cookiefile') is None
 279                 and self.get_param('cookiesfrombrowser') is None):
 280             self.raise_login_required(
 281                 'Login details are needed to download this content', method='cookies')
 282         username, password = self._get_login_info()
 283         if username:
 284             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 285         return
 286
 287         # Everything below this is broken!
 288         r'''
 289         # No authentication to be performed
 290         if username is None:
 291             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 292                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 293             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 294             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 295             return True
 296
 297         login_page = self._download_webpage(
 298             self._LOGIN_URL, None,
 299             note='Downloading login page',
 300             errnote='unable to fetch login page', fatal=False)
 301         if login_page is False:
 302             return
 303
 304         login_form = self._hidden_inputs(login_page)
 305
 306         def req(url, f_req, note, errnote):
 307             data = login_form.copy()
 308             data.update({
 309                 'pstMsg': 1,
 310                 'checkConnection': 'youtube',
 311                 'checkedDomains': 'youtube',
 312                 'hl': 'en',
 313                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 314                 'f.req': json.dumps(f_req),
 315                 'flowName': 'GlifWebSignIn',
 316                 'flowEntry': 'ServiceLogin',
 317                 # TODO: reverse actual botguard identifier generation algo
 318                 'bgRequest': '["identifier",""]',
 319             })
 320             return self._download_json(
 321                 url, None, note=note, errnote=errnote,
 322                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 323                 fatal=False,
 324                 data=urlencode_postdata(data), headers={
 325                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 326                     'Google-Accounts-XSRF': 1,
 327                 })
 328
 329         lookup_req = [
 330             username,
 331             None, [], None, 'US', None, None, 2, False, True,
 332             [
 333                 None, None,
 334                 [2, 1, None, 1,
 335                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 336                  None, [], 4],
 337                 1, [None, None, []], None, None, None, True
 338             ],
 339             username,
 340         ]
 341
 342         lookup_results = req(
 343             self._LOOKUP_URL, lookup_req,
 344             'Looking up account info', 'Unable to look up account info')
 345
 346         if lookup_results is False:
 347             return False
 348
 349         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 350         if not user_hash:
 351             warn('Unable to extract user hash')
 352             return False
 353
 354         challenge_req = [
 355             user_hash,
 356             None, 1, None, [1, None, None, None, [password, None, True]],
 357             [
 358                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 359                 1, [None, None, []], None, None, None, True
 360             ]]
 361
 362         challenge_results = req(
 363             self._CHALLENGE_URL, challenge_req,
 364             'Logging in', 'Unable to log in')
 365
 366         if challenge_results is False:
 367             return
 368
 369         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 370         if login_res:
 371             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 372             warn(
 373                 'Unable to login: %s' % 'Invalid password'
 374                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 375             return False
 376
 377         res = try_get(challenge_results, lambda x: x[0][-1], list)
 378         if not res:
 379             warn('Unable to extract result entry')
 380             return False
 381
 382         login_challenge = try_get(res, lambda x: x[0][0], list)
 383         if login_challenge:
 384             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 385             if challenge_str == 'TWO_STEP_VERIFICATION':
 386                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 387                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 388                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 389                 if status == 'QUOTA_EXCEEDED':
 390                     warn('Exceeded the limit of TFA codes, try later')
 391                     return False
 392
 393                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 394                 if not tl:
 395                     warn('Unable to extract TL')
 396                     return False
 397
 398                 tfa_code = self._get_tfa_info('2-step verification code')
 399
 400                 if not tfa_code:
 401                     warn(
 402                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 403                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 404                     return False
 405
 406                 tfa_code = remove_start(tfa_code, 'G-')
 407
 408                 tfa_req = [
 409                     user_hash, None, 2, None,
 410                     [
 411                         9, None, None, None, None, None, None, None,
 412                         [None, tfa_code, True, 2]
 413                     ]]
 414
 415                 tfa_results = req(
 416                     self._TFA_URL.format(tl), tfa_req,
 417                     'Submitting TFA code', 'Unable to submit TFA code')
 418
 419                 if tfa_results is False:
 420                     return False
 421
 422                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 423                 if tfa_res:
 424                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 425                     warn(
 426                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 427                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 428                     return False
 429
 430                 check_cookie_url = try_get(
 431                     tfa_results, lambda x: x[0][-1][2], compat_str)
 432             else:
 433                 CHALLENGES = {
 434                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 435                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 436                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 437                 }
 438                 challenge = CHALLENGES.get(
 439                     challenge_str,
 440                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 441                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 442                 return False
 443         else:
 444             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 445
 446         if not check_cookie_url:
 447             warn('Unable to extract CheckCookie URL')
 448             return False
 449
 450         check_cookie_results = self._download_webpage(
 451             check_cookie_url, None, 'Checking cookie', fatal=False)
 452
 453         if check_cookie_results is False:
 454             return False
 455
 456         if 'https://myaccount.google.com/' not in check_cookie_results:
 457             warn('Unable to log in')
 458             return False
 459
 460         return True
 461         '''
 462
 463     def _initialize_consent(self):
 464         cookies = self._get_cookies('https://www.youtube.com/')
 465         if cookies.get('__Secure-3PSID'):
 466             return
 467         consent_id = None
 468         consent = cookies.get('CONSENT')
 469         if consent:
 470             if 'YES' in consent.value:
 471                 return
 472             consent_id = self._search_regex(
 473                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 474         if not consent_id:
 475             consent_id = random.randint(100, 999)
 476         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 477
 478     def _real_initialize(self):
 479         self._initialize_consent()
 480         if self._downloader is None:
 481             return
 482         if not self._login():
 483             return
 484
 485     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 486     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 487     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 488
 489     def _get_default_ytcfg(self, client='web'):
 490         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 491
 492     def _get_innertube_host(self, client='web'):
 493         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 494
 495     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 496         # try_get but with fallback to default ytcfg client values when present
 497         _func = lambda y: try_get(y, getter, expected_type)
 498         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 499
 500     def _extract_client_name(self, ytcfg, default_client='web'):
 501         return self._ytcfg_get_safe(
 502             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 503                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 504
 505     @staticmethod
 506     def _extract_session_index(*data):
 507         for ytcfg in data:
 508             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 509             if session_index is not None:
 510                 return session_index
 511
 512     def _extract_client_version(self, ytcfg, default_client='web'):
 513         return self._ytcfg_get_safe(
 514             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 515                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 516
 517     def _extract_api_key(self, ytcfg=None, default_client='web'):
 518         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 519
 520     def _extract_context(self, ytcfg=None, default_client='web'):
 521         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 522         context = _get_context(ytcfg)
 523         if context:
 524             return context
 525
 526         context = _get_context(self._get_default_ytcfg(default_client))
 527         if not ytcfg:
 528             return context
 529
 530         # Recreate the client context (required)
 531         context['client'].update({
 532             'clientVersion': self._extract_client_version(ytcfg, default_client),
 533             'clientName': self._extract_client_name(ytcfg, default_client),
 534         })
 535         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 536         if visitor_data:
 537             context['client']['visitorData'] = visitor_data
 538         return context
 539
 540     _SAPISID = None
 541
 542     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 543         time_now = round(time.time())
 544         if self._SAPISID is None:
 545             yt_cookies = self._get_cookies('https://www.youtube.com')
 546             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 547             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 548             sapisid_cookie = dict_get(
 549                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 550             if sapisid_cookie and sapisid_cookie.value:
 551                 self._SAPISID = sapisid_cookie.value
 552                 self.write_debug('Extracted SAPISID cookie')
 553                 # SAPISID cookie is required if not already present
 554                 if not yt_cookies.get('SAPISID'):
 555                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 556                     self._set_cookie(
 557                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 558             else:
 559                 self._SAPISID = False
 560         if not self._SAPISID:
 561             return None
 562         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 563         sapisidhash = hashlib.sha1(
 564             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 565         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 566
 567     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 568                   note='Downloading API JSON', errnote='Unable to download API page',
 569                   context=None, api_key=None, api_hostname=None, default_client='web'):
 570
 571         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 572         data.update(query)
 573         real_headers = self.generate_api_headers(default_client=default_client)
 574         real_headers.update({'content-type': 'application/json'})
 575         if headers:
 576             real_headers.update(headers)
 577         return self._download_json(
 578             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 579             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 580             data=json.dumps(data).encode('utf8'), headers=real_headers,
 581             query={'key': api_key or self._extract_api_key()})
 582
 583     def extract_yt_initial_data(self, video_id, webpage):
 584         return self._parse_json(
 585             self._search_regex(
 586                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 587                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 588             video_id)
 589
 590     def _extract_identity_token(self, webpage, item_id):
 591         if not webpage:
 592             return None
 593         ytcfg = self.extract_ytcfg(item_id, webpage)
 594         if ytcfg:
 595             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 596             if token:
 597                 return token
 598         return self._search_regex(
 599             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 600             'identity token', default=None)
 601
 602     @staticmethod
 603     def _extract_account_syncid(*args):
 604         """
 605         Extract syncId required to download private playlists of secondary channels
 606         @params response and/or ytcfg
 607         """
 608         for data in args:
 609             # ytcfg includes channel_syncid if on secondary channel
 610             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 611             if delegated_sid:
 612                 return delegated_sid
 613             sync_ids = (try_get(
 614                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 615                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 616             if len(sync_ids) >= 2 and sync_ids[1]:
 617                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 618                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 619                 return sync_ids[0]
 620
 621     def extract_ytcfg(self, video_id, webpage):
 622         if not webpage:
 623             return {}
 624         return self._parse_json(
 625             self._search_regex(
 626                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 627                 default='{}'), video_id, fatal=False) or {}
 628
 629     def generate_api_headers(
 630             self, ytcfg=None, identity_token=None, account_syncid=None,
 631             visitor_data=None, api_hostname=None, default_client='web', session_index=None):
 632         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 633         headers = {
 634             'X-YouTube-Client-Name': compat_str(
 635                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 636             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 637             'Origin': origin
 638         }
 639         if not visitor_data and ytcfg:
 640             visitor_data = try_get(
 641                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 642         if identity_token:
 643             headers['X-Youtube-Identity-Token'] = identity_token
 644         if account_syncid:
 645             headers['X-Goog-PageId'] = account_syncid
 646         if session_index is None and ytcfg:
 647             session_index = self._extract_session_index(ytcfg)
 648         if account_syncid or session_index is not None:
 649             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 650         if visitor_data:
 651             headers['X-Goog-Visitor-Id'] = visitor_data
 652         auth = self._generate_sapisidhash_header(origin)
 653         if auth is not None:
 654             headers['Authorization'] = auth
 655             headers['X-Origin'] = origin
 656         return headers
 657
 658     @staticmethod
 659     def _build_api_continuation_query(continuation, ctp=None):
 660         query = {
 661             'continuation': continuation
 662         }
 663         # TODO: Inconsistency with clickTrackingParams.
 664         # Currently we have a fixed ctp contained within context (from ytcfg)
 665         # and a ctp in root query for continuation.
 666         if ctp:
 667             query['clickTracking'] = {'clickTrackingParams': ctp}
 668         return query
 669
 670     @classmethod
 671     def _extract_next_continuation_data(cls, renderer):
 672         next_continuation = try_get(
 673             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 674                        lambda x: x['continuation']['reloadContinuationData']), dict)
 675         if not next_continuation:
 676             return
 677         continuation = next_continuation.get('continuation')
 678         if not continuation:
 679             return
 680         ctp = next_continuation.get('clickTrackingParams')
 681         return cls._build_api_continuation_query(continuation, ctp)
 682
 683     @classmethod
 684     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 685         if isinstance(continuation_ep, dict):
 686             continuation = try_get(
 687                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 688             if not continuation:
 689                 return
 690             ctp = continuation_ep.get('clickTrackingParams')
 691             return cls._build_api_continuation_query(continuation, ctp)
 692
 693     @classmethod
 694     def _extract_continuation(cls, renderer):
 695         next_continuation = cls._extract_next_continuation_data(renderer)
 696         if next_continuation:
 697             return next_continuation
 698
 699         contents = []
 700         for key in ('contents', 'items'):
 701             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 702
 703         for content in contents:
 704             if not isinstance(content, dict):
 705                 continue
 706             continuation_ep = try_get(
 707                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 708                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 709                 dict)
 710             continuation = cls._extract_continuation_ep_data(continuation_ep)
 711             if continuation:
 712                 return continuation
 713
 714     @classmethod
 715     def _extract_alerts(cls, data):
 716         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 717             if not isinstance(alert_dict, dict):
 718                 continue
 719             for alert in alert_dict.values():
 720                 alert_type = alert.get('type')
 721                 if not alert_type:
 722                     continue
 723                 message = cls._get_text(alert, 'text')
 724                 if message:
 725                     yield alert_type, message
 726
 727     def _report_alerts(self, alerts, expected=True, fatal=True):
 728         errors = []
 729         warnings = []
 730         for alert_type, alert_message in alerts:
 731             if alert_type.lower() == 'error' and fatal:
 732                 errors.append([alert_type, alert_message])
 733             else:
 734                 warnings.append([alert_type, alert_message])
 735
 736         for alert_type, alert_message in (warnings + errors[:-1]):
 737             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 738         if errors:
 739             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 740
 741     def _extract_and_report_alerts(self, data, *args, **kwargs):
 742         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 743
 744     def _extract_badges(self, renderer: dict):
 745         badges = set()
 746         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 747             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 748             if label:
 749                 badges.add(label.lower())
 750         return badges
 751
 752     @staticmethod
 753     def _get_text(data, *path_list, max_runs=None):
 754         for path in path_list or [None]:
 755             if path is None:
 756                 obj = [data]
 757             else:
 758                 obj = traverse_obj(data, path, default=[])
 759                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 760                     obj = [obj]
 761             for item in obj:
 762                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 763                 if text:
 764                     return text
 765                 runs = try_get(item, lambda x: x['runs'], list) or []
 766                 if not runs and isinstance(item, list):
 767                     runs = item
 768
 769                 runs = runs[:min(len(runs), max_runs or len(runs))]
 770                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 771                 if text:
 772                     return text
 773
 774     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 775                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 776                           default_client='web'):
 777         response = None
 778         last_error = None
 779         count = -1
 780         retries = self.get_param('extractor_retries', 3)
 781         if check_get_keys is None:
 782             check_get_keys = []
 783         while count < retries:
 784             count += 1
 785             if last_error:
 786                 self.report_warning('%s. Retrying ...' % last_error)
 787             try:
 788                 response = self._call_api(
 789                     ep=ep, fatal=True, headers=headers,
 790                     video_id=item_id, query=query,
 791                     context=self._extract_context(ytcfg, default_client),
 792                     api_key=self._extract_api_key(ytcfg, default_client),
 793                     api_hostname=api_hostname, default_client=default_client,
 794                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 795             except ExtractorError as e:
 796                 if isinstance(e.cause, network_exceptions):
 797                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 798                         e.cause.seek(0)
 799                         yt_error = try_get(
 800                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 801                             lambda x: x['error']['message'], compat_str)
 802                         if yt_error:
 803                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 804                     # Downloading page may result in intermittent 5xx HTTP error
 805                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 806                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 807                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 808                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 809                         last_error = error_to_compat_str(e.cause or e)
 810                         if count < retries:
 811                             continue
 812                 if fatal:
 813                     raise
 814                 else:
 815                     self.report_warning(error_to_compat_str(e))
 816                     return
 817
 818             else:
 819                 # Youtube may send alerts if there was an issue with the continuation page
 820                 try:
 821                     self._extract_and_report_alerts(response, expected=False)
 822                 except ExtractorError as e:
 823                     if fatal:
 824                         raise
 825                     self.report_warning(error_to_compat_str(e))
 826                     return
 827                 if not check_get_keys or dict_get(response, check_get_keys):
 828                     break
 829                 # Youtube sometimes sends incomplete data
 830                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 831                 last_error = 'Incomplete data received'
 832                 if count >= retries:
 833                     if fatal:
 834                         raise ExtractorError(last_error)
 835                     else:
 836                         self.report_warning(last_error)
 837                         return
 838         return response
 839
 840     @staticmethod
 841     def is_music_url(url):
 842         return re.match(r'https?://music\.youtube\.com/', url) is not None
 843
 844     def _extract_video(self, renderer):
 845         video_id = renderer.get('videoId')
 846         title = self._get_text(renderer, 'title')
 847         description = self._get_text(renderer, 'descriptionSnippet')
 848         duration = parse_duration(self._get_text(
 849             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 850         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 851         view_count = str_to_int(self._search_regex(
 852             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 853             'view count', default=None))
 854
 855         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 856
 857         return {
 858             '_type': 'url',
 859             'ie_key': YoutubeIE.ie_key(),
 860             'id': video_id,
 861             'url': video_id,
 862             'title': title,
 863             'description': description,
 864             'duration': duration,
 865             'view_count': view_count,
 866             'uploader': uploader,
 867         }
 868
 869
 870 class YoutubeIE(YoutubeBaseInfoExtractor):
 871     IE_DESC = 'YouTube.com'
 872     _INVIDIOUS_SITES = (
 873         # invidious-redirect websites
 874         r'(?:www\.)?redirect\.invidious\.io',
 875         r'(?:(?:www|dev)\.)?invidio\.us',
 876         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 877         r'(?:www\.)?invidious\.pussthecat\.org',
 878         r'(?:www\.)?invidious\.zee\.li',
 879         r'(?:www\.)?invidious\.ethibox\.fr',
 880         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 881         # youtube-dl invidious instances list
 882         r'(?:(?:www|no)\.)?invidiou\.sh',
 883         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 884         r'(?:www\.)?invidious\.kabi\.tk',
 885         r'(?:www\.)?invidious\.mastodon\.host',
 886         r'(?:www\.)?invidious\.zapashcanon\.fr',
 887         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 888         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 889         r'(?:www\.)?invidious\.himiko\.cloud',
 890         r'(?:www\.)?invidious\.reallyancient\.tech',
 891         r'(?:www\.)?invidious\.tube',
 892         r'(?:www\.)?invidiou\.site',
 893         r'(?:www\.)?invidious\.site',
 894         r'(?:www\.)?invidious\.xyz',
 895         r'(?:www\.)?invidious\.nixnet\.xyz',
 896         r'(?:www\.)?invidious\.048596\.xyz',
 897         r'(?:www\.)?invidious\.drycat\.fr',
 898         r'(?:www\.)?inv\.skyn3t\.in',
 899         r'(?:www\.)?tube\.poal\.co',
 900         r'(?:www\.)?tube\.connect\.cafe',
 901         r'(?:www\.)?vid\.wxzm\.sx',
 902         r'(?:www\.)?vid\.mint\.lgbt',
 903         r'(?:www\.)?vid\.puffyan\.us',
 904         r'(?:www\.)?yewtu\.be',
 905         r'(?:www\.)?yt\.elukerio\.org',
 906         r'(?:www\.)?yt\.lelux\.fi',
 907         r'(?:www\.)?invidious\.ggc-project\.de',
 908         r'(?:www\.)?yt\.maisputain\.ovh',
 909         r'(?:www\.)?ytprivate\.com',
 910         r'(?:www\.)?invidious\.13ad\.de',
 911         r'(?:www\.)?invidious\.toot\.koeln',
 912         r'(?:www\.)?invidious\.fdn\.fr',
 913         r'(?:www\.)?watch\.nettohikari\.com',
 914         r'(?:www\.)?invidious\.namazso\.eu',
 915         r'(?:www\.)?invidious\.silkky\.cloud',
 916         r'(?:www\.)?invidious\.exonip\.de',
 917         r'(?:www\.)?invidious\.riverside\.rocks',
 918         r'(?:www\.)?invidious\.blamefran\.net',
 919         r'(?:www\.)?invidious\.moomoo\.de',
 920         r'(?:www\.)?ytb\.trom\.tf',
 921         r'(?:www\.)?yt\.cyberhost\.uk',
 922         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 923         r'(?:www\.)?qklhadlycap4cnod\.onion',
 924         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 925         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 926         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 927         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 928         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 929         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 930         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 931         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 932         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 933         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 934     )
 935     _VALID_URL = r"""(?x)^
 936                      (
 937                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 938                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 939                             (?:www\.)?deturl\.com/www\.youtube\.com|
 940                             (?:www\.)?pwnyoutube\.com|
 941                             (?:www\.)?hooktube\.com|
 942                             (?:www\.)?yourepeat\.com|
 943                             tube\.majestyc\.net|
 944                             %(invidious)s|
 945                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 946                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 947                          (?:                                                  # the various things that can precede the ID:
 948                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 949                              |(?:                                             # or the v= param in all its forms
 950                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 951                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 952                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 953                                  v=
 954                              )
 955                          ))
 956                          |(?:
 957                             youtu\.be|                                        # just youtu.be/xxxx
 958                             vid\.plus|                                        # or vid.plus/xxxx
 959                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 960                             %(invidious)s
 961                          )/
 962                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 963                          )
 964                      )?                                                       # all until now is optional -> you can pass the naked ID
 965                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 966                      (?(1).+)?                                                # if we found the ID, everything can follow
 967                      (?:\#|$)""" % {
 968         'invidious': '|'.join(_INVIDIOUS_SITES),
 969     }
 970     _PLAYER_INFO_RE = (
 971         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 972         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 973         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 974     )
 975     _formats = {
 976         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 977         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 978         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 979         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 980         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 981         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 982         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 983         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 984         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 985         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 986         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 987         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 988         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 989         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 990         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 991         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 992         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 993         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 994
 995
 996         # 3D videos
 997         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 998         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 999         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1000         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1001         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1002         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1003         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1004
1005         # Apple HTTP Live Streaming
1006         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1007         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1008         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1009         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1010         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1011         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1012         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1013         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1014
1015         # DASH mp4 video
1016         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1017         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1018         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1019         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1020         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1021         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1022         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1023         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1024         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1025         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1026         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1027         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1028
1029         # Dash mp4 audio
1030         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1031         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1032         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1033         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1034         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1035         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1036         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1037
1038         # Dash webm
1039         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1040         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1041         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1042         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1043         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1044         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1045         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1046         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1047         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1048         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1049         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1050         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1051         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1052         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1053         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1054         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1055         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1056         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1057         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1058         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1059         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1060         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1061
1062         # Dash webm audio
1063         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1064         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1065
1066         # Dash webm audio with opus inside
1067         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1068         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1069         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1070
1071         # RTMP (unnamed)
1072         '_rtmp': {'protocol': 'rtmp'},
1073
1074         # av01 video only formats sometimes served with "unknown" codecs
1075         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1076         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1077         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1078         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1079     }
1080     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1081
1082     _GEO_BYPASS = False
1083
1084     IE_NAME = 'youtube'
1085     _TESTS = [
1086         {
1087             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1088             'info_dict': {
1089                 'id': 'BaW_jenozKc',
1090                 'ext': 'mp4',
1091                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1092                 'uploader': 'Philipp Hagemeister',
1093                 'uploader_id': 'phihag',
1094                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1095                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1096                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1097                 'upload_date': '20121002',
1098                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1099                 'categories': ['Science & Technology'],
1100                 'tags': ['youtube-dl'],
1101                 'duration': 10,
1102                 'view_count': int,
1103                 'like_count': int,
1104                 'dislike_count': int,
1105                 'start_time': 1,
1106                 'end_time': 9,
1107             }
1108         },
1109         {
1110             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1111             'note': 'Embed-only video (#1746)',
1112             'info_dict': {
1113                 'id': 'yZIXLfi8CZQ',
1114                 'ext': 'mp4',
1115                 'upload_date': '20120608',
1116                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1117                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1118                 'uploader': 'SET India',
1119                 'uploader_id': 'setindia',
1120                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1121                 'age_limit': 18,
1122             },
1123             'skip': 'Private video',
1124         },
1125         {
1126             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1127             'note': 'Use the first video ID in the URL',
1128             'info_dict': {
1129                 'id': 'BaW_jenozKc',
1130                 'ext': 'mp4',
1131                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1132                 'uploader': 'Philipp Hagemeister',
1133                 'uploader_id': 'phihag',
1134                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1135                 'upload_date': '20121002',
1136                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1137                 'categories': ['Science & Technology'],
1138                 'tags': ['youtube-dl'],
1139                 'duration': 10,
1140                 'view_count': int,
1141                 'like_count': int,
1142                 'dislike_count': int,
1143             },
1144             'params': {
1145                 'skip_download': True,
1146             },
1147         },
1148         {
1149             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1150             'note': '256k DASH audio (format 141) via DASH manifest',
1151             'info_dict': {
1152                 'id': 'a9LDPn-MO4I',
1153                 'ext': 'm4a',
1154                 'upload_date': '20121002',
1155                 'uploader_id': '8KVIDEO',
1156                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1157                 'description': '',
1158                 'uploader': '8KVIDEO',
1159                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1160             },
1161             'params': {
1162                 'youtube_include_dash_manifest': True,
1163                 'format': '141',
1164             },
1165             'skip': 'format 141 not served anymore',
1166         },
1167         # DASH manifest with encrypted signature
1168         {
1169             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1170             'info_dict': {
1171                 'id': 'IB3lcPjvWLA',
1172                 'ext': 'm4a',
1173                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1174                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1175                 'duration': 244,
1176                 'uploader': 'AfrojackVEVO',
1177                 'uploader_id': 'AfrojackVEVO',
1178                 'upload_date': '20131011',
1179                 'abr': 129.495,
1180             },
1181             'params': {
1182                 'youtube_include_dash_manifest': True,
1183                 'format': '141/bestaudio[ext=m4a]',
1184             },
1185         },
1186         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1187         {
1188             'note': 'Embed allowed age-gate video',
1189             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1190             'info_dict': {
1191                 'id': 'HtVdAasjOgU',
1192                 'ext': 'mp4',
1193                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1194                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1195                 'duration': 142,
1196                 'uploader': 'The Witcher',
1197                 'uploader_id': 'WitcherGame',
1198                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1199                 'upload_date': '20140605',
1200                 'age_limit': 18,
1201             },
1202         },
1203         {
1204             'note': 'Age-gate video with embed allowed in public site',
1205             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1206             'info_dict': {
1207                 'id': 'HsUATh_Nc2U',
1208                 'ext': 'mp4',
1209                 'title': 'Godzilla 2 (Official Video)',
1210                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1211                 'upload_date': '20200408',
1212                 'uploader_id': 'FlyingKitty900',
1213                 'uploader': 'FlyingKitty',
1214                 'age_limit': 18,
1215             },
1216         },
1217         {
1218             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1219             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1220             'info_dict': {
1221                 'id': 'Tq92D6wQ1mg',
1222                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1223                 'ext': 'mp4',
1224                 'upload_date': '20191227',
1225                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1226                 'uploader': 'Projekt Melody',
1227                 'description': 'md5:17eccca93a786d51bc67646756894066',
1228                 'age_limit': 18,
1229             },
1230         },
1231         {
1232             'note': 'Non-Agegated non-embeddable video',
1233             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1234             'info_dict': {
1235                 'id': 'MeJVWBSsPAY',
1236                 'ext': 'mp4',
1237                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1238                 'uploader': 'Herr Lurik',
1239                 'uploader_id': 'st3in234',
1240                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1241                 'upload_date': '20130730',
1242             },
1243         },
1244         {
1245             'note': 'Non-bypassable age-gated video',
1246             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1247             'only_matching': True,
1248         },
1249         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1250         # YouTube Red ad is not captured for creator
1251         {
1252             'url': '__2ABJjxzNo',
1253             'info_dict': {
1254                 'id': '__2ABJjxzNo',
1255                 'ext': 'mp4',
1256                 'duration': 266,
1257                 'upload_date': '20100430',
1258                 'uploader_id': 'deadmau5',
1259                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1260                 'creator': 'deadmau5',
1261                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1262                 'uploader': 'deadmau5',
1263                 'title': 'Deadmau5 - Some Chords (HD)',
1264                 'alt_title': 'Some Chords',
1265             },
1266             'expected_warnings': [
1267                 'DASH manifest missing',
1268             ]
1269         },
1270         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1271         {
1272             'url': 'lqQg6PlCWgI',
1273             'info_dict': {
1274                 'id': 'lqQg6PlCWgI',
1275                 'ext': 'mp4',
1276                 'duration': 6085,
1277                 'upload_date': '20150827',
1278                 'uploader_id': 'olympic',
1279                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1280                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1281                 'uploader': 'Olympics',
1282                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1283             },
1284             'params': {
1285                 'skip_download': 'requires avconv',
1286             }
1287         },
1288         # Non-square pixels
1289         {
1290             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1291             'info_dict': {
1292                 'id': '_b-2C3KPAM0',
1293                 'ext': 'mp4',
1294                 'stretched_ratio': 16 / 9.,
1295                 'duration': 85,
1296                 'upload_date': '20110310',
1297                 'uploader_id': 'AllenMeow',
1298                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1299                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1300                 'uploader': '孫ᄋᄅ',
1301                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1302             },
1303         },
1304         # url_encoded_fmt_stream_map is empty string
1305         {
1306             'url': 'qEJwOuvDf7I',
1307             'info_dict': {
1308                 'id': 'qEJwOuvDf7I',
1309                 'ext': 'webm',
1310                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1311                 'description': '',
1312                 'upload_date': '20150404',
1313                 'uploader_id': 'spbelect',
1314                 'uploader': 'Наблюдатели Петербурга',
1315             },
1316             'params': {
1317                 'skip_download': 'requires avconv',
1318             },
1319             'skip': 'This live event has ended.',
1320         },
1321         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1322         {
1323             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1324             'info_dict': {
1325                 'id': 'FIl7x6_3R5Y',
1326                 'ext': 'webm',
1327                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1328                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1329                 'duration': 220,
1330                 'upload_date': '20150625',
1331                 'uploader_id': 'dorappi2000',
1332                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1333                 'uploader': 'dorappi2000',
1334                 'formats': 'mincount:31',
1335             },
1336             'skip': 'not actual anymore',
1337         },
1338         # DASH manifest with segment_list
1339         {
1340             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1341             'md5': '8ce563a1d667b599d21064e982ab9e31',
1342             'info_dict': {
1343                 'id': 'CsmdDsKjzN8',
1344                 'ext': 'mp4',
1345                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1346                 'uploader': 'Airtek',
1347                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1348                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1349                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1350             },
1351             'params': {
1352                 'youtube_include_dash_manifest': True,
1353                 'format': '135',  # bestvideo
1354             },
1355             'skip': 'This live event has ended.',
1356         },
1357         {
1358             # Multifeed videos (multiple cameras), URL is for Main Camera
1359             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1360             'info_dict': {
1361                 'id': 'jvGDaLqkpTg',
1362                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1363                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1364             },
1365             'playlist': [{
1366                 'info_dict': {
1367                     'id': 'jvGDaLqkpTg',
1368                     'ext': 'mp4',
1369                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1370                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1371                     'duration': 10643,
1372                     'upload_date': '20161111',
1373                     'uploader': 'Team PGP',
1374                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1375                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1376                 },
1377             }, {
1378                 'info_dict': {
1379                     'id': '3AKt1R1aDnw',
1380                     'ext': 'mp4',
1381                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1382                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1383                     'duration': 10991,
1384                     'upload_date': '20161111',
1385                     'uploader': 'Team PGP',
1386                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1387                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1388                 },
1389             }, {
1390                 'info_dict': {
1391                     'id': 'RtAMM00gpVc',
1392                     'ext': 'mp4',
1393                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1394                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1395                     'duration': 10995,
1396                     'upload_date': '20161111',
1397                     'uploader': 'Team PGP',
1398                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1399                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1400                 },
1401             }, {
1402                 'info_dict': {
1403                     'id': '6N2fdlP3C5U',
1404                     'ext': 'mp4',
1405                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1406                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1407                     'duration': 10990,
1408                     'upload_date': '20161111',
1409                     'uploader': 'Team PGP',
1410                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1411                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1412                 },
1413             }],
1414             'params': {
1415                 'skip_download': True,
1416             },
1417             'skip': 'Not multifeed anymore',
1418         },
1419         {
1420             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1421             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1422             'info_dict': {
1423                 'id': 'gVfLd0zydlo',
1424                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1425             },
1426             'playlist_count': 2,
1427             'skip': 'Not multifeed anymore',
1428         },
1429         {
1430             'url': 'https://vid.plus/FlRa-iH7PGw',
1431             'only_matching': True,
1432         },
1433         {
1434             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1435             'only_matching': True,
1436         },
1437         {
1438             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1439             # Also tests cut-off URL expansion in video description (see
1440             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1441             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1442             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1443             'info_dict': {
1444                 'id': 'lsguqyKfVQg',
1445                 'ext': 'mp4',
1446                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1447                 'alt_title': 'Dark Walk',
1448                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1449                 'duration': 133,
1450                 'upload_date': '20151119',
1451                 'uploader_id': 'IronSoulElf',
1452                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1453                 'uploader': 'IronSoulElf',
1454                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1455                 'track': 'Dark Walk',
1456                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1457                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1458             },
1459             'params': {
1460                 'skip_download': True,
1461             },
1462         },
1463         {
1464             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1465             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1466             'only_matching': True,
1467         },
1468         {
1469             # Video with yt:stretch=17:0
1470             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1471             'info_dict': {
1472                 'id': 'Q39EVAstoRM',
1473                 'ext': 'mp4',
1474                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1475                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1476                 'upload_date': '20151107',
1477                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1478                 'uploader': 'CH GAMER DROID',
1479             },
1480             'params': {
1481                 'skip_download': True,
1482             },
1483             'skip': 'This video does not exist.',
1484         },
1485         {
1486             # Video with incomplete 'yt:stretch=16:'
1487             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1488             'only_matching': True,
1489         },
1490         {
1491             # Video licensed under Creative Commons
1492             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1493             'info_dict': {
1494                 'id': 'M4gD1WSo5mA',
1495                 'ext': 'mp4',
1496                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1497                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1498                 'duration': 721,
1499                 'upload_date': '20150127',
1500                 'uploader_id': 'BerkmanCenter',
1501                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1502                 'uploader': 'The Berkman Klein Center for Internet & Society',
1503                 'license': 'Creative Commons Attribution license (reuse allowed)',
1504             },
1505             'params': {
1506                 'skip_download': True,
1507             },
1508         },
1509         {
1510             # Channel-like uploader_url
1511             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1512             'info_dict': {
1513                 'id': 'eQcmzGIKrzg',
1514                 'ext': 'mp4',
1515                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1516                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1517                 'duration': 4060,
1518                 'upload_date': '20151119',
1519                 'uploader': 'Bernie Sanders',
1520                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1521                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1522                 'license': 'Creative Commons Attribution license (reuse allowed)',
1523             },
1524             'params': {
1525                 'skip_download': True,
1526             },
1527         },
1528         {
1529             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1530             'only_matching': True,
1531         },
1532         {
1533             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1534             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1535             'only_matching': True,
1536         },
1537         {
1538             # Rental video preview
1539             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1540             'info_dict': {
1541                 'id': 'uGpuVWrhIzE',
1542                 'ext': 'mp4',
1543                 'title': 'Piku - Trailer',
1544                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1545                 'upload_date': '20150811',
1546                 'uploader': 'FlixMatrix',
1547                 'uploader_id': 'FlixMatrixKaravan',
1548                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1549                 'license': 'Standard YouTube License',
1550             },
1551             'params': {
1552                 'skip_download': True,
1553             },
1554             'skip': 'This video is not available.',
1555         },
1556         {
1557             # YouTube Red video with episode data
1558             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1559             'info_dict': {
1560                 'id': 'iqKdEhx-dD4',
1561                 'ext': 'mp4',
1562                 'title': 'Isolation - Mind Field (Ep 1)',
1563                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1564                 'duration': 2085,
1565                 'upload_date': '20170118',
1566                 'uploader': 'Vsauce',
1567                 'uploader_id': 'Vsauce',
1568                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1569                 'series': 'Mind Field',
1570                 'season_number': 1,
1571                 'episode_number': 1,
1572             },
1573             'params': {
1574                 'skip_download': True,
1575             },
1576             'expected_warnings': [
1577                 'Skipping DASH manifest',
1578             ],
1579         },
1580         {
1581             # The following content has been identified by the YouTube community
1582             # as inappropriate or offensive to some audiences.
1583             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1584             'info_dict': {
1585                 'id': '6SJNVb0GnPI',
1586                 'ext': 'mp4',
1587                 'title': 'Race Differences in Intelligence',
1588                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1589                 'duration': 965,
1590                 'upload_date': '20140124',
1591                 'uploader': 'New Century Foundation',
1592                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1593                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1594             },
1595             'params': {
1596                 'skip_download': True,
1597             },
1598             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1599         },
1600         {
1601             # itag 212
1602             'url': '1t24XAntNCY',
1603             'only_matching': True,
1604         },
1605         {
1606             # geo restricted to JP
1607             'url': 'sJL6WA-aGkQ',
1608             'only_matching': True,
1609         },
1610         {
1611             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1612             'only_matching': True,
1613         },
1614         {
1615             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1616             'only_matching': True,
1617         },
1618         {
1619             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1620             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1621             'only_matching': True,
1622         },
1623         {
1624             # DRM protected
1625             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1626             'only_matching': True,
1627         },
1628         {
1629             # Video with unsupported adaptive stream type formats
1630             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1631             'info_dict': {
1632                 'id': 'Z4Vy8R84T1U',
1633                 'ext': 'mp4',
1634                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1635                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1636                 'duration': 433,
1637                 'upload_date': '20130923',
1638                 'uploader': 'Amelia Putri Harwita',
1639                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1640                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1641                 'formats': 'maxcount:10',
1642             },
1643             'params': {
1644                 'skip_download': True,
1645                 'youtube_include_dash_manifest': False,
1646             },
1647             'skip': 'not actual anymore',
1648         },
1649         {
1650             # Youtube Music Auto-generated description
1651             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1652             'info_dict': {
1653                 'id': 'MgNrAu2pzNs',
1654                 'ext': 'mp4',
1655                 'title': 'Voyeur Girl',
1656                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1657                 'upload_date': '20190312',
1658                 'uploader': 'Stephen - Topic',
1659                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1660                 'artist': 'Stephen',
1661                 'track': 'Voyeur Girl',
1662                 'album': 'it\'s too much love to know my dear',
1663                 'release_date': '20190313',
1664                 'release_year': 2019,
1665             },
1666             'params': {
1667                 'skip_download': True,
1668             },
1669         },
1670         {
1671             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1672             'only_matching': True,
1673         },
1674         {
1675             # invalid -> valid video id redirection
1676             'url': 'DJztXj2GPfl',
1677             'info_dict': {
1678                 'id': 'DJztXj2GPfk',
1679                 'ext': 'mp4',
1680                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1681                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1682                 'upload_date': '20090125',
1683                 'uploader': 'Prochorowka',
1684                 'uploader_id': 'Prochorowka',
1685                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1686                 'artist': 'Panjabi MC',
1687                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1688                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1689             },
1690             'params': {
1691                 'skip_download': True,
1692             },
1693             'skip': 'Video unavailable',
1694         },
1695         {
1696             # empty description results in an empty string
1697             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1698             'info_dict': {
1699                 'id': 'x41yOUIvK2k',
1700                 'ext': 'mp4',
1701                 'title': 'IMG 3456',
1702                 'description': '',
1703                 'upload_date': '20170613',
1704                 'uploader_id': 'ElevageOrVert',
1705                 'uploader': 'ElevageOrVert',
1706             },
1707             'params': {
1708                 'skip_download': True,
1709             },
1710         },
1711         {
1712             # with '};' inside yt initial data (see [1])
1713             # see [2] for an example with '};' inside ytInitialPlayerResponse
1714             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1715             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1716             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1717             'info_dict': {
1718                 'id': 'CHqg6qOn4no',
1719                 'ext': 'mp4',
1720                 'title': 'Part 77   Sort a list of simple types in c#',
1721                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1722                 'upload_date': '20130831',
1723                 'uploader_id': 'kudvenkat',
1724                 'uploader': 'kudvenkat',
1725             },
1726             'params': {
1727                 'skip_download': True,
1728             },
1729         },
1730         {
1731             # another example of '};' in ytInitialData
1732             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1733             'only_matching': True,
1734         },
1735         {
1736             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1737             'only_matching': True,
1738         },
1739         {
1740             # https://github.com/ytdl-org/youtube-dl/pull/28094
1741             'url': 'OtqTfy26tG0',
1742             'info_dict': {
1743                 'id': 'OtqTfy26tG0',
1744                 'ext': 'mp4',
1745                 'title': 'Burn Out',
1746                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1747                 'upload_date': '20141120',
1748                 'uploader': 'The Cinematic Orchestra - Topic',
1749                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1750                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1751                 'artist': 'The Cinematic Orchestra',
1752                 'track': 'Burn Out',
1753                 'album': 'Every Day',
1754                 'release_data': None,
1755                 'release_year': None,
1756             },
1757             'params': {
1758                 'skip_download': True,
1759             },
1760         },
1761         {
1762             # controversial video, only works with bpctr when authenticated with cookies
1763             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1764             'only_matching': True,
1765         },
1766         {
1767             # controversial video, requires bpctr/contentCheckOk
1768             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1769             'info_dict': {
1770                 'id': 'SZJvDhaSDnc',
1771                 'ext': 'mp4',
1772                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1773                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1774                 'uploader': 'CBS This Morning',
1775                 'uploader_id': 'CBSThisMorning',
1776                 'upload_date': '20140716',
1777                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1778             }
1779         },
1780         {
1781             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1782             'url': 'cBvYw8_A0vQ',
1783             'info_dict': {
1784                 'id': 'cBvYw8_A0vQ',
1785                 'ext': 'mp4',
1786                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1787                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1788                 'upload_date': '20201120',
1789                 'uploader': 'Walk around Japan',
1790                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1791                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1792             },
1793             'params': {
1794                 'skip_download': True,
1795             },
1796         }, {
1797             # Has multiple audio streams
1798             'url': 'WaOKSUlf4TM',
1799             'only_matching': True
1800         }, {
1801             # Requires Premium: has format 141 when requested using YTM url
1802             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1803             'only_matching': True
1804         }, {
1805             # multiple subtitles with same lang_code
1806             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1807             'only_matching': True,
1808         }, {
1809             # Force use android client fallback
1810             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1811             'info_dict': {
1812                 'id': 'YOelRv7fMxY',
1813                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1814                 'ext': '3gp',
1815                 'upload_date': '20210624',
1816                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1817                 'uploader': 'colinfurze',
1818                 'uploader_id': 'colinfurze',
1819                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1820                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1821             },
1822             'params': {
1823                 'format': '17',  # 3gp format available on android
1824                 'extractor_args': {'youtube': {'player_client': ['android']}},
1825             },
1826         },
1827         {
1828             # Skip download of additional client configs (remix client config in this case)
1829             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1830             'only_matching': True,
1831             'params': {
1832                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1833             },
1834         }
1835     ]
1836
1837     @classmethod
1838     def suitable(cls, url):
1839         # Hack for lazy extractors until more generic solution is implemented
1840         # (see #28780)
1841         from .youtube import parse_qs
1842         qs = parse_qs(url)
1843         if qs.get('list', [None])[0]:
1844             return False
1845         return super(YoutubeIE, cls).suitable(url)
1846
1847     def __init__(self, *args, **kwargs):
1848         super(YoutubeIE, self).__init__(*args, **kwargs)
1849         self._code_cache = {}
1850         self._player_cache = {}
1851
1852     def _extract_player_url(self, ytcfg=None, webpage=None):
1853         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1854         if not player_url and webpage:
1855             player_url = self._search_regex(
1856                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1857                 webpage, 'player URL', fatal=False)
1858         if not player_url:
1859             return None
1860         if player_url.startswith('//'):
1861             player_url = 'https:' + player_url
1862         elif not re.match(r'https?://', player_url):
1863             player_url = compat_urlparse.urljoin(
1864                 'https://www.youtube.com', player_url)
1865         return player_url
1866
1867     def _signature_cache_id(self, example_sig):
1868         """ Return a string representation of a signature """
1869         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1870
1871     @classmethod
1872     def _extract_player_info(cls, player_url):
1873         for player_re in cls._PLAYER_INFO_RE:
1874             id_m = re.search(player_re, player_url)
1875             if id_m:
1876                 break
1877         else:
1878             raise ExtractorError('Cannot identify player %r' % player_url)
1879         return id_m.group('id')
1880
1881     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1882         player_id = self._extract_player_info(player_url)
1883         if player_id not in self._code_cache:
1884             self._code_cache[player_id] = self._download_webpage(
1885                 player_url, video_id, fatal=fatal,
1886                 note='Downloading player ' + player_id,
1887                 errnote='Download of %s failed' % player_url)
1888         return player_id in self._code_cache
1889
1890     def _extract_signature_function(self, video_id, player_url, example_sig):
1891         player_id = self._extract_player_info(player_url)
1892
1893         # Read from filesystem cache
1894         func_id = 'js_%s_%s' % (
1895             player_id, self._signature_cache_id(example_sig))
1896         assert os.path.basename(func_id) == func_id
1897
1898         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1899         if cache_spec is not None:
1900             return lambda s: ''.join(s[i] for i in cache_spec)
1901
1902         if self._load_player(video_id, player_url):
1903             code = self._code_cache[player_id]
1904             res = self._parse_sig_js(code)
1905
1906             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1907             cache_res = res(test_string)
1908             cache_spec = [ord(c) for c in cache_res]
1909
1910             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1911             return res
1912
1913     def _print_sig_code(self, func, example_sig):
1914         def gen_sig_code(idxs):
1915             def _genslice(start, end, step):
1916                 starts = '' if start == 0 else str(start)
1917                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1918                 steps = '' if step == 1 else (':%d' % step)
1919                 return 's[%s%s%s]' % (starts, ends, steps)
1920
1921             step = None
1922             # Quelch pyflakes warnings - start will be set when step is set
1923             start = '(Never used)'
1924             for i, prev in zip(idxs[1:], idxs[:-1]):
1925                 if step is not None:
1926                     if i - prev == step:
1927                         continue
1928                     yield _genslice(start, prev, step)
1929                     step = None
1930                     continue
1931                 if i - prev in [-1, 1]:
1932                     step = i - prev
1933                     start = prev
1934                     continue
1935                 else:
1936                     yield 's[%d]' % prev
1937             if step is None:
1938                 yield 's[%d]' % i
1939             else:
1940                 yield _genslice(start, i, step)
1941
1942         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1943         cache_res = func(test_string)
1944         cache_spec = [ord(c) for c in cache_res]
1945         expr_code = ' + '.join(gen_sig_code(cache_spec))
1946         signature_id_tuple = '(%s)' % (
1947             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1948         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1949                 '    return %s\n') % (signature_id_tuple, expr_code)
1950         self.to_screen('Extracted signature function:\n' + code)
1951
1952     def _parse_sig_js(self, jscode):
1953         funcname = self._search_regex(
1954             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1955              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1956              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1957              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1958              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1959              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1960              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1961              # Obsolete patterns
1962              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1963              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1964              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1965              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1966              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1967              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1968              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1969              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1970             jscode, 'Initial JS player signature function name', group='sig')
1971
1972         jsi = JSInterpreter(jscode)
1973         initial_function = jsi.extract_function(funcname)
1974         return lambda s: initial_function([s])
1975
1976     def _decrypt_signature(self, s, video_id, player_url):
1977         """Turn the encrypted s field into a working signature"""
1978
1979         if player_url is None:
1980             raise ExtractorError('Cannot decrypt signature without player_url')
1981
1982         try:
1983             player_id = (player_url, self._signature_cache_id(s))
1984             if player_id not in self._player_cache:
1985                 func = self._extract_signature_function(
1986                     video_id, player_url, s
1987                 )
1988                 self._player_cache[player_id] = func
1989             func = self._player_cache[player_id]
1990             if self.get_param('youtube_print_sig_code'):
1991                 self._print_sig_code(func, s)
1992             return func(s)
1993         except Exception as e:
1994             tb = traceback.format_exc()
1995             raise ExtractorError(
1996                 'Signature extraction failed: ' + tb, cause=e)
1997
1998     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1999         """
2000         Extract signatureTimestamp (sts)
2001         Required to tell API what sig/player version is in use.
2002         """
2003         sts = None
2004         if isinstance(ytcfg, dict):
2005             sts = int_or_none(ytcfg.get('STS'))
2006
2007         if not sts:
2008             # Attempt to extract from player
2009             if player_url is None:
2010                 error_msg = 'Cannot extract signature timestamp without player_url.'
2011                 if fatal:
2012                     raise ExtractorError(error_msg)
2013                 self.report_warning(error_msg)
2014                 return
2015             if self._load_player(video_id, player_url, fatal=fatal):
2016                 player_id = self._extract_player_info(player_url)
2017                 code = self._code_cache[player_id]
2018                 sts = int_or_none(self._search_regex(
2019                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2020                     'JS player signature timestamp', group='sts', fatal=fatal))
2021         return sts
2022
2023     def _mark_watched(self, video_id, player_responses):
2024         playback_url = traverse_obj(
2025             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2026             expected_type=url_or_none, get_all=False)
2027         if not playback_url:
2028             self.report_warning('Unable to mark watched')
2029             return
2030         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2031         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2032
2033         # cpn generation algorithm is reverse engineered from base.js.
2034         # In fact it works even with dummy cpn.
2035         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2036         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2037
2038         qs.update({
2039             'ver': ['2'],
2040             'cpn': [cpn],
2041         })
2042         playback_url = compat_urlparse.urlunparse(
2043             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2044
2045         self._download_webpage(
2046             playback_url, video_id, 'Marking watched',
2047             'Unable to mark watched', fatal=False)
2048
2049     @staticmethod
2050     def _extract_urls(webpage):
2051         # Embedded YouTube player
2052         entries = [
2053             unescapeHTML(mobj.group('url'))
2054             for mobj in re.finditer(r'''(?x)
2055             (?:
2056                 <iframe[^>]+?src=|
2057                 data-video-url=|
2058                 <embed[^>]+?src=|
2059                 embedSWF\(?:\s*|
2060                 <object[^>]+data=|
2061                 new\s+SWFObject\(
2062             )
2063             (["\'])
2064                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2065                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2066             \1''', webpage)]
2067
2068         # lazyYT YouTube embed
2069         entries.extend(list(map(
2070             unescapeHTML,
2071             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2072
2073         # Wordpress "YouTube Video Importer" plugin
2074         matches = re.findall(r'''(?x)<div[^>]+
2075             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2076             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2077         entries.extend(m[-1] for m in matches)
2078
2079         return entries
2080
2081     @staticmethod
2082     def _extract_url(webpage):
2083         urls = YoutubeIE._extract_urls(webpage)
2084         return urls[0] if urls else None
2085
2086     @classmethod
2087     def extract_id(cls, url):
2088         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2089         if mobj is None:
2090             raise ExtractorError('Invalid URL: %s' % url)
2091         return mobj.group('id')
2092
2093     def _extract_chapters_from_json(self, data, duration):
2094         chapter_list = traverse_obj(
2095             data, (
2096                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2097                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2098             ), expected_type=list)
2099
2100         return self._extract_chapters(
2101             chapter_list,
2102             chapter_time=lambda chapter: float_or_none(
2103                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2104             chapter_title=lambda chapter: traverse_obj(
2105                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2106             duration=duration)
2107
2108     def _extract_chapters_from_engagement_panel(self, data, duration):
2109         content_list = traverse_obj(
2110             data,
2111             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2112             expected_type=list, default=[])
2113         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2114         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2115
2116         return next((
2117             filter(None, (
2118                 self._extract_chapters(
2119                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2120                     chapter_time, chapter_title, duration)
2121                 for contents in content_list
2122             ))), [])
2123
2124     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2125         chapters = []
2126         last_chapter = {'start_time': 0}
2127         for idx, chapter in enumerate(chapter_list or []):
2128             title = chapter_title(chapter)
2129             start_time = chapter_time(chapter)
2130             if start_time is None:
2131                 continue
2132             last_chapter['end_time'] = start_time
2133             if start_time < last_chapter['start_time']:
2134                 if idx == 1:
2135                     chapters.pop()
2136                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2137                 else:
2138                     self.report_warning(f'Invalid start time for chapter "{title}"')
2139                     continue
2140             last_chapter = {'start_time': start_time, 'title': title}
2141             chapters.append(last_chapter)
2142         last_chapter['end_time'] = duration
2143         return chapters
2144
2145     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2146         return self._parse_json(self._search_regex(
2147             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2148              regex), webpage, name, default='{}'), video_id, fatal=False)
2149
2150     @staticmethod
2151     def parse_time_text(time_text):
2152         """
2153         Parse the comment time text
2154         time_text is in the format 'X units ago (edited)'
2155         """
2156         time_text_split = time_text.split(' ')
2157         if len(time_text_split) >= 3:
2158             try:
2159                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2160             except ValueError:
2161                 return None
2162
2163     def _extract_comment(self, comment_renderer, parent=None):
2164         comment_id = comment_renderer.get('commentId')
2165         if not comment_id:
2166             return
2167
2168         text = self._get_text(comment_renderer, 'contentText')
2169
2170         # note: timestamp is an estimate calculated from the current time and time_text
2171         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2172         time_text_dt = self.parse_time_text(time_text)
2173         if isinstance(time_text_dt, datetime.datetime):
2174             timestamp = calendar.timegm(time_text_dt.timetuple())
2175         author = self._get_text(comment_renderer, 'authorText')
2176         author_id = try_get(comment_renderer,
2177                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2178
2179         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2180                                                        lambda x: x['likeCount']), compat_str)) or 0
2181         author_thumbnail = try_get(comment_renderer,
2182                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2183
2184         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2185         is_favorited = 'creatorHeart' in (try_get(
2186             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2187         return {
2188             'id': comment_id,
2189             'text': text,
2190             'timestamp': timestamp,
2191             'time_text': time_text,
2192             'like_count': votes,
2193             'is_favorited': is_favorited,
2194             'author': author,
2195             'author_id': author_id,
2196             'author_thumbnail': author_thumbnail,
2197             'author_is_uploader': author_is_uploader,
2198             'parent': parent or 'root'
2199         }
2200
2201     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2202                          ytcfg, video_id, parent=None, comment_counts=None):
2203
2204         def extract_header(contents):
2205             _total_comments = 0
2206             _continuation = None
2207             for content in contents:
2208                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2209                 expected_comment_count = parse_count(self._get_text(
2210                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2211
2212                 if expected_comment_count:
2213                     comment_counts[1] = expected_comment_count
2214                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2215                     _total_comments = comment_counts[1]
2216                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2217                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2218
2219                 sort_menu_item = try_get(
2220                     comments_header_renderer,
2221                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2222                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2223
2224                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2225                 if not _continuation:
2226                     continue
2227
2228                 sort_text = sort_menu_item.get('title')
2229                 if isinstance(sort_text, compat_str):
2230                     sort_text = sort_text.lower()
2231                 else:
2232                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2233                 self.to_screen('Sorting comments by %s' % sort_text)
2234                 break
2235             return _total_comments, _continuation
2236
2237         def extract_thread(contents):
2238             if not parent:
2239                 comment_counts[2] = 0
2240             for content in contents:
2241                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2242                 comment_renderer = try_get(
2243                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2244                     content, (lambda x: x['commentRenderer'], dict))
2245
2246                 if not comment_renderer:
2247                     continue
2248                 comment = self._extract_comment(comment_renderer, parent)
2249                 if not comment:
2250                     continue
2251                 comment_counts[0] += 1
2252                 yield comment
2253                 # Attempt to get the replies
2254                 comment_replies_renderer = try_get(
2255                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2256
2257                 if comment_replies_renderer:
2258                     comment_counts[2] += 1
2259                     comment_entries_iter = self._comment_entries(
2260                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2261                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2262
2263                     for reply_comment in comment_entries_iter:
2264                         yield reply_comment
2265
2266         # YouTube comments have a max depth of 2
2267         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2268         if max_depth == 1 and parent:
2269             return
2270         if not comment_counts:
2271             # comment so far, est. total comments, current comment thread #
2272             comment_counts = [0, 0, 0]
2273
2274         continuation = self._extract_continuation(root_continuation_data)
2275         if continuation and len(continuation['continuation']) < 27:
2276             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2277             continuation_token = self._generate_comment_continuation(video_id)
2278             continuation = self._build_api_continuation_query(continuation_token, None)
2279
2280         visitor_data = None
2281         is_first_continuation = parent is None
2282
2283         for page_num in itertools.count(0):
2284             if not continuation:
2285                 break
2286             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2287             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2288             if page_num == 0:
2289                 if is_first_continuation:
2290                     note_prefix = 'Downloading comment section API JSON'
2291                 else:
2292                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2293                         comment_counts[2], comment_prog_str)
2294             else:
2295                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2296                     '       ' if parent else '', ' replies' if parent else '',
2297                     page_num, comment_prog_str)
2298
2299             response = self._extract_response(
2300                 item_id=None, query=continuation,
2301                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2302                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2303             if not response:
2304                 break
2305             visitor_data = try_get(
2306                 response,
2307                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2308                 compat_str) or visitor_data
2309
2310             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2311
2312             continuation = None
2313             if isinstance(continuation_contents, list):
2314                 for continuation_section in continuation_contents:
2315                     if not isinstance(continuation_section, dict):
2316                         continue
2317                     continuation_items = try_get(
2318                         continuation_section,
2319                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2320                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2321                         list) or []
2322                     if is_first_continuation:
2323                         total_comments, continuation = extract_header(continuation_items)
2324                         if total_comments:
2325                             yield total_comments
2326                         is_first_continuation = False
2327                         if continuation:
2328                             break
2329                         continue
2330                     count = 0
2331                     for count, entry in enumerate(extract_thread(continuation_items)):
2332                         yield entry
2333                     continuation = self._extract_continuation({'contents': continuation_items})
2334                     if continuation:
2335                         # Sometimes YouTube provides a continuation without any comments
2336                         # In most cases we end up just downloading these with very little comments to come.
2337                         if count == 0:
2338                             if not parent:
2339                                 self.report_warning('No comments received - assuming end of comments')
2340                             continuation = None
2341                         break
2342
2343             # Deprecated response structure
2344             elif isinstance(continuation_contents, dict):
2345                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2346                 for key, continuation_renderer in continuation_contents.items():
2347                     if key not in known_continuation_renderers:
2348                         continue
2349                     if not isinstance(continuation_renderer, dict):
2350                         continue
2351                     if is_first_continuation:
2352                         header_continuation_items = [continuation_renderer.get('header') or {}]
2353                         total_comments, continuation = extract_header(header_continuation_items)
2354                         if total_comments:
2355                             yield total_comments
2356                         is_first_continuation = False
2357                         if continuation:
2358                             break
2359
2360                     # Sometimes YouTube provides a continuation without any comments
2361                     # In most cases we end up just downloading these with very little comments to come.
2362                     count = 0
2363                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2364                         yield entry
2365                     continuation = self._extract_continuation(continuation_renderer)
2366                     if count == 0:
2367                         if not parent:
2368                             self.report_warning('No comments received - assuming end of comments')
2369                         continuation = None
2370                     break
2371
2372     @staticmethod
2373     def _generate_comment_continuation(video_id):
2374         """
2375         Generates initial comment section continuation token from given video id
2376         """
2377         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2378         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2379         new_continuation_intlist = list(itertools.chain.from_iterable(
2380             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2381         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2382
2383     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2384         """Entry for comment extraction"""
2385         def _real_comment_extract(contents):
2386             if isinstance(contents, list):
2387                 for entry in contents:
2388                     for key, renderer in entry.items():
2389                         if key not in known_entry_comment_renderers:
2390                             continue
2391                         yield from self._comment_entries(
2392                             renderer, video_id=video_id, ytcfg=ytcfg,
2393                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2394                             account_syncid=self._extract_account_syncid(ytcfg))
2395                         break
2396         comments = []
2397         known_entry_comment_renderers = ('itemSectionRenderer',)
2398         estimated_total = 0
2399         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2400         # Force English regardless of account setting to prevent parsing issues
2401         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2402         ytcfg = copy.deepcopy(ytcfg)
2403         traverse_obj(
2404             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2405         try:
2406             for comment in _real_comment_extract(contents):
2407                 if len(comments) >= max_comments:
2408                     break
2409                 if isinstance(comment, int):
2410                     estimated_total = comment
2411                     continue
2412                 comments.append(comment)
2413         except KeyboardInterrupt:
2414             self.to_screen('Interrupted by user')
2415         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2416         return {
2417             'comments': comments,
2418             'comment_count': len(comments),
2419         }
2420
2421     @staticmethod
2422     def _generate_player_context(sts=None):
2423         context = {
2424             'html5Preference': 'HTML5_PREF_WANTS',
2425         }
2426         if sts is not None:
2427             context['signatureTimestamp'] = sts
2428         return {
2429             'playbackContext': {
2430                 'contentPlaybackContext': context
2431             },
2432             'contentCheckOk': True,
2433             'racyCheckOk': True
2434         }
2435
2436     @staticmethod
2437     def _is_agegated(player_response):
2438         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2439             return True
2440
2441         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2442         AGE_GATE_REASONS = (
2443             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2444             'age_verification_required', 'age_check_required',  # status
2445         )
2446         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2447
2448     @staticmethod
2449     def _is_unplayable(player_response):
2450         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2451
2452     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2453
2454         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2455         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2456         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2457         headers = self.generate_api_headers(
2458             player_ytcfg, identity_token, syncid,
2459             default_client=client, session_index=session_index)
2460
2461         yt_query = {'videoId': video_id}
2462         yt_query.update(self._generate_player_context(sts))
2463         return self._extract_response(
2464             item_id=video_id, ep='player', query=yt_query,
2465             ytcfg=player_ytcfg, headers=headers, fatal=True,
2466             default_client=client,
2467             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2468         ) or None
2469
2470     def _get_requested_clients(self, url, smuggled_data):
2471         requested_clients = []
2472         allowed_clients = sorted(
2473             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2474             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2475         for client in self._configuration_arg('player_client'):
2476             if client in allowed_clients:
2477                 requested_clients.append(client)
2478             elif client == 'all':
2479                 requested_clients.extend(allowed_clients)
2480             else:
2481                 self.report_warning(f'Skipping unsupported client {client}')
2482         if not requested_clients:
2483             requested_clients = ['android', 'web']
2484
2485         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2486             requested_clients.extend(
2487                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2488
2489         return orderedSet(requested_clients)
2490
2491     def _extract_player_ytcfg(self, client, video_id):
2492         url = {
2493             'web_music': 'https://music.youtube.com',
2494             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2495         }.get(client)
2496         if not url:
2497             return {}
2498         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2499         return self.extract_ytcfg(video_id, webpage) or {}
2500
2501     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2502         initial_pr = None
2503         if webpage:
2504             initial_pr = self._extract_yt_initial_variable(
2505                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2506                 video_id, 'initial player response')
2507
2508         original_clients = clients
2509         clients = clients[::-1]
2510
2511         def append_client(client_name):
2512             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2513                 clients.append(client_name)
2514
2515         # Android player_response does not have microFormats which are needed for
2516         # extraction of some data. So we return the initial_pr with formats
2517         # stripped out even if not requested by the user
2518         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2519         yielded_pr = False
2520         if initial_pr:
2521             pr = dict(initial_pr)
2522             pr['streamingData'] = None
2523             yielded_pr = True
2524             yield pr
2525
2526         last_error = None
2527         while clients:
2528             client = clients.pop()
2529             player_ytcfg = master_ytcfg if client == 'web' else {}
2530             if 'configs' not in self._configuration_arg('player_skip'):
2531                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2532
2533             try:
2534                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2535                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2536             except ExtractorError as e:
2537                 if last_error:
2538                     self.report_warning(last_error)
2539                 last_error = e
2540                 continue
2541
2542             if pr:
2543                 yielded_pr = True
2544                 yield pr
2545
2546             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2547             if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
2548                 append_client(client.replace('_agegate', '_creator'))
2549             elif self._is_agegated(pr):
2550                 append_client(f'{client}_agegate')
2551
2552         if last_error:
2553             if not yielded_pr:
2554                 raise last_error
2555             self.report_warning(last_error)
2556
2557     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2558         itags, stream_ids = [], []
2559         itag_qualities, res_qualities = {}, {}
2560         q = qualities([
2561             # Normally tiny is the smallest video-only formats. But
2562             # audio-only formats with unknown quality may get tagged as tiny
2563             'tiny',
2564             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2565             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2566         ])
2567         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2568
2569         for fmt in streaming_formats:
2570             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2571                 continue
2572
2573             itag = str_or_none(fmt.get('itag'))
2574             audio_track = fmt.get('audioTrack') or {}
2575             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2576             if stream_id in stream_ids:
2577                 continue
2578
2579             quality = fmt.get('quality')
2580             height = int_or_none(fmt.get('height'))
2581             if quality == 'tiny' or not quality:
2582                 quality = fmt.get('audioQuality', '').lower() or quality
2583             # The 3gp format (17) in android client has a quality of "small",
2584             # but is actually worse than other formats
2585             if itag == '17':
2586                 quality = 'tiny'
2587             if quality:
2588                 if itag:
2589                     itag_qualities[itag] = quality
2590                 if height:
2591                     res_qualities[height] = quality
2592             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2593             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2594             # number of fragment that would subsequently requested with (`&sq=N`)
2595             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2596                 continue
2597
2598             fmt_url = fmt.get('url')
2599             if not fmt_url:
2600                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2601                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2602                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2603                 if not (sc and fmt_url and encrypted_sig):
2604                     continue
2605                 if not player_url:
2606                     continue
2607                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2608                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2609                 fmt_url += '&' + sp + '=' + signature
2610
2611             if itag:
2612                 itags.append(itag)
2613                 stream_ids.append(stream_id)
2614
2615             tbr = float_or_none(
2616                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2617             dct = {
2618                 'asr': int_or_none(fmt.get('audioSampleRate')),
2619                 'filesize': int_or_none(fmt.get('contentLength')),
2620                 'format_id': itag,
2621                 'format_note': ', '.join(filter(None, (
2622                     audio_track.get('displayName'),
2623                     fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2624                 'fps': int_or_none(fmt.get('fps')),
2625                 'height': height,
2626                 'quality': q(quality),
2627                 'tbr': tbr,
2628                 'url': fmt_url,
2629                 'width': int_or_none(fmt.get('width')),
2630                 'language': audio_track.get('id', '').split('.')[0],
2631             }
2632             mime_mobj = re.match(
2633                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2634             if mime_mobj:
2635                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2636                 dct.update(parse_codecs(mime_mobj.group(2)))
2637             no_audio = dct.get('acodec') == 'none'
2638             no_video = dct.get('vcodec') == 'none'
2639             if no_audio:
2640                 dct['vbr'] = tbr
2641             if no_video:
2642                 dct['abr'] = tbr
2643             if no_audio or no_video:
2644                 dct['downloader_options'] = {
2645                     # Youtube throttles chunks >~10M
2646                     'http_chunk_size': 10485760,
2647                 }
2648                 if dct.get('ext'):
2649                     dct['container'] = dct['ext'] + '_dash'
2650             yield dct
2651
2652         skip_manifests = self._configuration_arg('skip')
2653         get_dash = (
2654             (not is_live or self._configuration_arg('include_live_dash'))
2655             and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2656         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2657
2658         def guess_quality(f):
2659             for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2660                 if val in qdict:
2661                     return q(qdict[val])
2662             return -1
2663
2664         for sd in streaming_data:
2665             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2666             if hls_manifest_url:
2667                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2668                     itag = self._search_regex(
2669                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2670                     if itag in itags:
2671                         continue
2672                     if itag:
2673                         f['format_id'] = itag
2674                         itags.append(itag)
2675                     f['quality'] = guess_quality(f)
2676                     yield f
2677
2678             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2679             if dash_manifest_url:
2680                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2681                     itag = f['format_id']
2682                     if itag in itags:
2683                         continue
2684                     if itag:
2685                         itags.append(itag)
2686                     f['quality'] = guess_quality(f)
2687                     filesize = int_or_none(self._search_regex(
2688                         r'/clen/(\d+)', f.get('fragment_base_url')
2689                         or f['url'], 'file size', default=None))
2690                     if filesize:
2691                         f['filesize'] = filesize
2692                     yield f
2693
2694     def _real_extract(self, url):
2695         url, smuggled_data = unsmuggle_url(url, {})
2696         video_id = self._match_id(url)
2697
2698         base_url = self.http_scheme() + '//www.youtube.com/'
2699         webpage_url = base_url + 'watch?v=' + video_id
2700         webpage = self._download_webpage(
2701             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2702
2703         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2704         player_url = self._extract_player_url(master_ytcfg, webpage)
2705         identity_token = self._extract_identity_token(webpage, video_id)
2706
2707         player_responses = list(self._extract_player_responses(
2708             self._get_requested_clients(url, smuggled_data),
2709             video_id, webpage, master_ytcfg, player_url, identity_token))
2710
2711         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2712
2713         playability_statuses = traverse_obj(
2714             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2715
2716         trailer_video_id = get_first(
2717             playability_statuses,
2718             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2719             expected_type=str)
2720         if trailer_video_id:
2721             return self.url_result(
2722                 trailer_video_id, self.ie_key(), trailer_video_id)
2723
2724         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2725                        if webpage else (lambda x: None))
2726
2727         video_details = traverse_obj(
2728             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2729         microformats = traverse_obj(
2730             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2731             expected_type=dict, default=[])
2732         video_title = (
2733             get_first(video_details, 'title')
2734             or self._get_text(microformats, (..., 'title'))
2735             or search_meta(['og:title', 'twitter:title', 'title']))
2736         video_description = get_first(video_details, 'shortDescription')
2737
2738         if not smuggled_data.get('force_singlefeed', False):
2739             if not self.get_param('noplaylist'):
2740                 multifeed_metadata_list = get_first(
2741                     player_responses,
2742                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2743                     expected_type=str)
2744                 if multifeed_metadata_list:
2745                     entries = []
2746                     feed_ids = []
2747                     for feed in multifeed_metadata_list.split(','):
2748                         # Unquote should take place before split on comma (,) since textual
2749                         # fields may contain comma as well (see
2750                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2751                         feed_data = compat_parse_qs(
2752                             compat_urllib_parse_unquote_plus(feed))
2753
2754                         def feed_entry(name):
2755                             return try_get(
2756                                 feed_data, lambda x: x[name][0], compat_str)
2757
2758                         feed_id = feed_entry('id')
2759                         if not feed_id:
2760                             continue
2761                         feed_title = feed_entry('title')
2762                         title = video_title
2763                         if feed_title:
2764                             title += ' (%s)' % feed_title
2765                         entries.append({
2766                             '_type': 'url_transparent',
2767                             'ie_key': 'Youtube',
2768                             'url': smuggle_url(
2769                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2770                                 {'force_singlefeed': True}),
2771                             'title': title,
2772                         })
2773                         feed_ids.append(feed_id)
2774                     self.to_screen(
2775                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2776                         % (', '.join(feed_ids), video_id))
2777                     return self.playlist_result(
2778                         entries, video_id, video_title, video_description)
2779             else:
2780                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2781
2782         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2783         is_live = get_first(video_details, 'isLive')
2784         if is_live is None:
2785             is_live = get_first(live_broadcast_details, 'isLiveNow')
2786
2787         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2788         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2789
2790         if not formats:
2791             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2792                 self.raise_no_formats(
2793                     'This video is DRM protected.', expected=True)
2794             pemr = get_first(
2795                 playability_statuses,
2796                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2797             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2798             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2799             if subreason:
2800                 if subreason == 'The uploader has not made this video available in your country.':
2801                     countries = get_first(microformats, 'availableCountries')
2802                     if not countries:
2803                         regions_allowed = search_meta('regionsAllowed')
2804                         countries = regions_allowed.split(',') if regions_allowed else None
2805                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2806                 reason += f'. {subreason}'
2807             if reason:
2808                 self.raise_no_formats(reason, expected=True)
2809
2810         for f in formats:
2811             if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']:  # throttled
2812                 f['source_preference'] = -10
2813                 # TODO: this method is not reliable
2814                 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2815
2816         # Source is given priority since formats that throttle are given lower source_preference
2817         # When throttling issue is fully fixed, remove this
2818         self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
2819
2820         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2821         if not keywords and webpage:
2822             keywords = [
2823                 unescapeHTML(m.group('content'))
2824                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2825         for keyword in keywords:
2826             if keyword.startswith('yt:stretch='):
2827                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2828                 if mobj:
2829                     # NB: float is intentional for forcing float division
2830                     w, h = (float(v) for v in mobj.groups())
2831                     if w > 0 and h > 0:
2832                         ratio = w / h
2833                         for f in formats:
2834                             if f.get('vcodec') != 'none':
2835                                 f['stretched_ratio'] = ratio
2836                         break
2837
2838         thumbnails = []
2839         thumbnail_dicts = traverse_obj(
2840             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2841             expected_type=dict, default=[])
2842         for thumbnail in thumbnail_dicts:
2843             thumbnail_url = thumbnail.get('url')
2844             if not thumbnail_url:
2845                 continue
2846             # Sometimes youtube gives a wrong thumbnail URL. See:
2847             # https://github.com/yt-dlp/yt-dlp/issues/233
2848             # https://github.com/ytdl-org/youtube-dl/issues/28023
2849             if 'maxresdefault' in thumbnail_url:
2850                 thumbnail_url = thumbnail_url.split('?')[0]
2851             thumbnails.append({
2852                 'url': thumbnail_url,
2853                 'height': int_or_none(thumbnail.get('height')),
2854                 'width': int_or_none(thumbnail.get('width')),
2855             })
2856         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2857         if thumbnail_url:
2858             thumbnails.append({
2859                 'url': thumbnail_url,
2860             })
2861         # The best resolution thumbnails sometimes does not appear in the webpage
2862         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2863         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2864         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2865         # TODO: Test them also? - For some videos, even these don't exist
2866         guaranteed_thumbnail_names = [
2867             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2868             'mqdefault', 'mq1', 'mq2', 'mq3',
2869             'default', '1', '2', '3'
2870         ]
2871         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2872         n_thumbnail_names = len(thumbnail_names)
2873
2874         thumbnails.extend({
2875             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2876                 video_id=video_id, name=name, ext=ext,
2877                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2878             '_test_url': name in hq_thumbnail_names,
2879         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2880         for thumb in thumbnails:
2881             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2882             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2883         self._remove_duplicate_formats(thumbnails)
2884
2885         category = get_first(microformats, 'category') or search_meta('genre')
2886         channel_id = str_or_none(
2887             get_first(video_details, 'channelId')
2888             or get_first(microformats, 'externalChannelId')
2889             or search_meta('channelId'))
2890         duration = int_or_none(
2891             get_first(video_details, 'lengthSeconds')
2892             or get_first(microformats, 'lengthSeconds')
2893             or parse_duration(search_meta('duration'))) or None
2894         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2895
2896         live_content = get_first(video_details, 'isLiveContent')
2897         is_upcoming = get_first(video_details, 'isUpcoming')
2898         if is_live is None:
2899             if is_upcoming or live_content is False:
2900                 is_live = False
2901         if is_upcoming is None and (live_content or is_live):
2902             is_upcoming = False
2903         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2904         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2905         if not duration and live_endtime and live_starttime:
2906             duration = live_endtime - live_starttime
2907
2908         info = {
2909             'id': video_id,
2910             'title': self._live_title(video_title) if is_live else video_title,
2911             'formats': formats,
2912             'thumbnails': thumbnails,
2913             'description': video_description,
2914             'upload_date': unified_strdate(
2915                 get_first(microformats, 'uploadDate')
2916                 or search_meta('uploadDate')),
2917             'uploader': get_first(video_details, 'author'),
2918             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2919             'uploader_url': owner_profile_url,
2920             'channel_id': channel_id,
2921             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2922             'duration': duration,
2923             'view_count': int_or_none(
2924                 get_first((video_details, microformats), (..., 'viewCount'))
2925                 or search_meta('interactionCount')),
2926             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2927             'age_limit': 18 if (
2928                 get_first(microformats, 'isFamilySafe') is False
2929                 or search_meta('isFamilyFriendly') == 'false'
2930                 or search_meta('og:restrictions:age') == '18+') else 0,
2931             'webpage_url': webpage_url,
2932             'categories': [category] if category else None,
2933             'tags': keywords,
2934             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2935             'is_live': is_live,
2936             'was_live': (False if is_live or is_upcoming or live_content is False
2937                          else None if is_live is None or is_upcoming is None
2938                          else live_content),
2939             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2940             'release_timestamp': live_starttime,
2941         }
2942
2943         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2944         # Converted into dicts to remove duplicates
2945         captions = {
2946             sub.get('baseUrl'): sub
2947             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2948         translation_languages = {
2949             lang.get('languageCode'): lang.get('languageName')
2950             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2951         subtitles = {}
2952         if pctr:
2953             def process_language(container, base_url, lang_code, sub_name, query):
2954                 lang_subs = container.setdefault(lang_code, [])
2955                 for fmt in self._SUBTITLE_FORMATS:
2956                     query.update({
2957                         'fmt': fmt,
2958                     })
2959                     lang_subs.append({
2960                         'ext': fmt,
2961                         'url': update_url_query(base_url, query),
2962                         'name': sub_name,
2963                     })
2964
2965             for base_url, caption_track in captions.items():
2966                 if not base_url:
2967                     continue
2968                 if caption_track.get('kind') != 'asr':
2969                     lang_code = (
2970                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2971                         or caption_track.get('languageCode'))
2972                     if not lang_code:
2973                         continue
2974                     process_language(
2975                         subtitles, base_url, lang_code,
2976                         traverse_obj(caption_track, ('name', 'simpleText')),
2977                         {})
2978                     continue
2979                 automatic_captions = {}
2980                 for trans_code, trans_name in translation_languages.items():
2981                     if not trans_code:
2982                         continue
2983                     process_language(
2984                         automatic_captions, base_url, trans_code,
2985                         self._get_text(trans_name, max_runs=1),
2986                         {'tlang': trans_code})
2987                 info['automatic_captions'] = automatic_captions
2988         info['subtitles'] = subtitles
2989
2990         parsed_url = compat_urllib_parse_urlparse(url)
2991         for component in [parsed_url.fragment, parsed_url.query]:
2992             query = compat_parse_qs(component)
2993             for k, v in query.items():
2994                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2995                     d_k += '_time'
2996                     if d_k not in info and k in s_ks:
2997                         info[d_k] = parse_duration(query[k][0])
2998
2999         # Youtube Music Auto-generated description
3000         if video_description:
3001             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3002             if mobj:
3003                 release_year = mobj.group('release_year')
3004                 release_date = mobj.group('release_date')
3005                 if release_date:
3006                     release_date = release_date.replace('-', '')
3007                     if not release_year:
3008                         release_year = release_date[:4]
3009                 info.update({
3010                     'album': mobj.group('album'.strip()),
3011                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3012                     'track': mobj.group('track').strip(),
3013                     'release_date': release_date,
3014                     'release_year': int_or_none(release_year),
3015                 })
3016
3017         initial_data = None
3018         if webpage:
3019             initial_data = self._extract_yt_initial_variable(
3020                 webpage, self._YT_INITIAL_DATA_RE, video_id,
3021                 'yt initial data')
3022         if not initial_data:
3023             headers = self.generate_api_headers(
3024                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
3025                 session_index=self._extract_session_index(master_ytcfg))
3026
3027             initial_data = self._extract_response(
3028                 item_id=video_id, ep='next', fatal=False,
3029                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
3030                 note='Downloading initial data API JSON')
3031
3032         try:
3033             # This will error if there is no livechat
3034             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3035             info['subtitles']['live_chat'] = [{
3036                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3037                 'video_id': video_id,
3038                 'ext': 'json',
3039                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3040             }]
3041         except (KeyError, IndexError, TypeError):
3042             pass
3043
3044         if initial_data:
3045             info['chapters'] = (
3046                 self._extract_chapters_from_json(initial_data, duration)
3047                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3048                 or None)
3049
3050             contents = try_get(
3051                 initial_data,
3052                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3053                 list) or []
3054             for content in contents:
3055                 vpir = content.get('videoPrimaryInfoRenderer')
3056                 if vpir:
3057                     stl = vpir.get('superTitleLink')
3058                     if stl:
3059                         stl = self._get_text(stl)
3060                         if try_get(
3061                                 vpir,
3062                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3063                             info['location'] = stl
3064                         else:
3065                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3066                             if mobj:
3067                                 info.update({
3068                                     'series': mobj.group(1),
3069                                     'season_number': int(mobj.group(2)),
3070                                     'episode_number': int(mobj.group(3)),
3071                                 })
3072                     for tlb in (try_get(
3073                             vpir,
3074                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3075                             list) or []):
3076                         tbr = tlb.get('toggleButtonRenderer') or {}
3077                         for getter, regex in [(
3078                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3079                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3080                                     lambda x: x['accessibility'],
3081                                     lambda x: x['accessibilityData']['accessibilityData'],
3082                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3083                             label = (try_get(tbr, getter, dict) or {}).get('label')
3084                             if label:
3085                                 mobj = re.match(regex, label)
3086                                 if mobj:
3087                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3088                                     break
3089                     sbr_tooltip = try_get(
3090                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3091                     if sbr_tooltip:
3092                         like_count, dislike_count = sbr_tooltip.split(' / ')
3093                         info.update({
3094                             'like_count': str_to_int(like_count),
3095                             'dislike_count': str_to_int(dislike_count),
3096                         })
3097                 vsir = content.get('videoSecondaryInfoRenderer')
3098                 if vsir:
3099                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3100                     rows = try_get(
3101                         vsir,
3102                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3103                         list) or []
3104                     multiple_songs = False
3105                     for row in rows:
3106                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3107                             multiple_songs = True
3108                             break
3109                     for row in rows:
3110                         mrr = row.get('metadataRowRenderer') or {}
3111                         mrr_title = mrr.get('title')
3112                         if not mrr_title:
3113                             continue
3114                         mrr_title = self._get_text(mrr, 'title')
3115                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3116                         if mrr_title == 'License':
3117                             info['license'] = mrr_contents_text
3118                         elif not multiple_songs:
3119                             if mrr_title == 'Album':
3120                                 info['album'] = mrr_contents_text
3121                             elif mrr_title == 'Artist':
3122                                 info['artist'] = mrr_contents_text
3123                             elif mrr_title == 'Song':
3124                                 info['track'] = mrr_contents_text
3125
3126         fallbacks = {
3127             'channel': 'uploader',
3128             'channel_id': 'uploader_id',
3129             'channel_url': 'uploader_url',
3130         }
3131         for to, frm in fallbacks.items():
3132             if not info.get(to):
3133                 info[to] = info.get(frm)
3134
3135         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3136             v = info.get(s_k)
3137             if v:
3138                 info[d_k] = v
3139
3140         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3141         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3142         is_membersonly = None
3143         is_premium = None
3144         if initial_data and is_private is not None:
3145             is_membersonly = False
3146             is_premium = False
3147             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3148             badge_labels = set()
3149             for content in contents:
3150                 if not isinstance(content, dict):
3151                     continue
3152                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3153             for badge_label in badge_labels:
3154                 if badge_label.lower() == 'members only':
3155                     is_membersonly = True
3156                 elif badge_label.lower() == 'premium':
3157                     is_premium = True
3158                 elif badge_label.lower() == 'unlisted':
3159                     is_unlisted = True
3160
3161         info['availability'] = self._availability(
3162             is_private=is_private,
3163             needs_premium=is_premium,
3164             needs_subscription=is_membersonly,
3165             needs_auth=info['age_limit'] >= 18,
3166             is_unlisted=None if is_private is None else is_unlisted)
3167
3168         # get xsrf for annotations or comments
3169         get_annotations = self.get_param('writeannotations', False)
3170         get_comments = self.get_param('getcomments', False)
3171         if get_annotations or get_comments:
3172             xsrf_token = None
3173             if master_ytcfg:
3174                 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
3175             if not xsrf_token:
3176                 xsrf_token = self._search_regex(
3177                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3178                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
3179
3180         # annotations
3181         if get_annotations:
3182             invideo_url = get_first(
3183                 player_responses,
3184                 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3185                 expected_type=str)
3186             if xsrf_token and invideo_url:
3187                 xsrf_field_name = None
3188                 if master_ytcfg:
3189                     xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3190                 if not xsrf_field_name:
3191                     xsrf_field_name = self._search_regex(
3192                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3193                         webpage, 'xsrf field name',
3194                         group='xsrf_field_name', default='session_token')
3195                 info['annotations'] = self._download_webpage(
3196                     self._proto_relative_url(invideo_url),
3197                     video_id, note='Downloading annotations',
3198                     errnote='Unable to download video annotations', fatal=False,
3199                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3200
3201         if get_comments:
3202             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3203
3204         self.mark_watched(video_id, player_responses)
3205
3206         return info
3207
3208
3209 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3210     IE_DESC = 'YouTube.com tab'
3211     _VALID_URL = r'''(?x)
3212                     https?://
3213                         (?:\w+\.)?
3214                         (?:
3215                             youtube(?:kids)?\.com|
3216                             invidio\.us
3217                         )/
3218                         (?:
3219                             (?P<channel_type>channel|c|user|browse)/|
3220                             (?P<not_channel>
3221                                 feed/|hashtag/|
3222                                 (?:playlist|watch)\?.*?\blist=
3223                             )|
3224                             (?!(?:%s)\b)  # Direct URLs
3225                         )
3226                         (?P<id>[^/?\#&]+)
3227                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3228     IE_NAME = 'youtube:tab'
3229
3230     _TESTS = [{
3231         'note': 'playlists, multipage',
3232         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3233         'playlist_mincount': 94,
3234         'info_dict': {
3235             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3236             'title': 'Игорь Клейнер - Playlists',
3237             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3238             'uploader': 'Игорь Клейнер',
3239             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3240         },
3241     }, {
3242         'note': 'playlists, multipage, different order',
3243         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3244         'playlist_mincount': 94,
3245         'info_dict': {
3246             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3247             'title': 'Игорь Клейнер - Playlists',
3248             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3249             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3250             'uploader': 'Игорь Клейнер',
3251         },
3252     }, {
3253         'note': 'playlists, series',
3254         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3255         'playlist_mincount': 5,
3256         'info_dict': {
3257             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3258             'title': '3Blue1Brown - Playlists',
3259             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3260             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3261             'uploader': '3Blue1Brown',
3262         },
3263     }, {
3264         'note': 'playlists, singlepage',
3265         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3266         'playlist_mincount': 4,
3267         'info_dict': {
3268             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3269             'title': 'ThirstForScience - Playlists',
3270             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3271             'uploader': 'ThirstForScience',
3272             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3273         }
3274     }, {
3275         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3276         'only_matching': True,
3277     }, {
3278         'note': 'basic, single video playlist',
3279         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3280         'info_dict': {
3281             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3282             'uploader': 'Sergey M.',
3283             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3284             'title': 'youtube-dl public playlist',
3285         },
3286         'playlist_count': 1,
3287     }, {
3288         'note': 'empty playlist',
3289         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3290         'info_dict': {
3291             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3292             'uploader': 'Sergey M.',
3293             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3294             'title': 'youtube-dl empty playlist',
3295         },
3296         'playlist_count': 0,
3297     }, {
3298         'note': 'Home tab',
3299         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3300         'info_dict': {
3301             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3302             'title': 'lex will - Home',
3303             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3304             'uploader': 'lex will',
3305             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3306         },
3307         'playlist_mincount': 2,
3308     }, {
3309         'note': 'Videos tab',
3310         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3311         'info_dict': {
3312             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3313             'title': 'lex will - Videos',
3314             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3315             'uploader': 'lex will',
3316             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3317         },
3318         'playlist_mincount': 975,
3319     }, {
3320         'note': 'Videos tab, sorted by popular',
3321         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3322         'info_dict': {
3323             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3324             'title': 'lex will - Videos',
3325             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3326             'uploader': 'lex will',
3327             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3328         },
3329         'playlist_mincount': 199,
3330     }, {
3331         'note': 'Playlists tab',
3332         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3333         'info_dict': {
3334             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3335             'title': 'lex will - Playlists',
3336             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3337             'uploader': 'lex will',
3338             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3339         },
3340         'playlist_mincount': 17,
3341     }, {
3342         'note': 'Community tab',
3343         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3344         'info_dict': {
3345             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3346             'title': 'lex will - Community',
3347             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3348             'uploader': 'lex will',
3349             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3350         },
3351         'playlist_mincount': 18,
3352     }, {
3353         'note': 'Channels tab',
3354         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3355         'info_dict': {
3356             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3357             'title': 'lex will - Channels',
3358             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3359             'uploader': 'lex will',
3360             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3361         },
3362         'playlist_mincount': 12,
3363     }, {
3364         'note': 'Search tab',
3365         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3366         'playlist_mincount': 40,
3367         'info_dict': {
3368             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3369             'title': '3Blue1Brown - Search - linear algebra',
3370             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3371             'uploader': '3Blue1Brown',
3372             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3373         },
3374     }, {
3375         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3376         'only_matching': True,
3377     }, {
3378         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3379         'only_matching': True,
3380     }, {
3381         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3382         'only_matching': True,
3383     }, {
3384         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3385         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3386         'info_dict': {
3387             'title': '29C3: Not my department',
3388             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3389             'uploader': 'Christiaan008',
3390             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3391             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3392         },
3393         'playlist_count': 96,
3394     }, {
3395         'note': 'Large playlist',
3396         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3397         'info_dict': {
3398             'title': 'Uploads from Cauchemar',
3399             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3400             'uploader': 'Cauchemar',
3401             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3402         },
3403         'playlist_mincount': 1123,
3404     }, {
3405         'note': 'even larger playlist, 8832 videos',
3406         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3407         'only_matching': True,
3408     }, {
3409         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3410         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3411         'info_dict': {
3412             'title': 'Uploads from Interstellar Movie',
3413             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3414             'uploader': 'Interstellar Movie',
3415             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3416         },
3417         'playlist_mincount': 21,
3418     }, {
3419         'note': 'Playlist with "show unavailable videos" button',
3420         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3421         'info_dict': {
3422             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3423             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3424             'uploader': 'Phim Siêu Nhân Nhật Bản',
3425             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3426         },
3427         'playlist_mincount': 200,
3428     }, {
3429         'note': 'Playlist with unavailable videos in page 7',
3430         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3431         'info_dict': {
3432             'title': 'Uploads from BlankTV',
3433             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3434             'uploader': 'BlankTV',
3435             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3436         },
3437         'playlist_mincount': 1000,
3438     }, {
3439         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3440         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3441         'info_dict': {
3442             'title': 'Data Analysis with Dr Mike Pound',
3443             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3444             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3445             'uploader': 'Computerphile',
3446             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3447         },
3448         'playlist_mincount': 11,
3449     }, {
3450         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3451         'only_matching': True,
3452     }, {
3453         'note': 'Playlist URL that does not actually serve a playlist',
3454         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3455         'info_dict': {
3456             'id': 'FqZTN594JQw',
3457             'ext': 'webm',
3458             'title': "Smiley's People 01 detective, Adventure Series, Action",
3459             'uploader': 'STREEM',
3460             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3461             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3462             'upload_date': '20150526',
3463             'license': 'Standard YouTube License',
3464             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3465             'categories': ['People & Blogs'],
3466             'tags': list,
3467             'view_count': int,
3468             'like_count': int,
3469             'dislike_count': int,
3470         },
3471         'params': {
3472             'skip_download': True,
3473         },
3474         'skip': 'This video is not available.',
3475         'add_ie': [YoutubeIE.ie_key()],
3476     }, {
3477         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3478         'only_matching': True,
3479     }, {
3480         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3481         'only_matching': True,
3482     }, {
3483         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3484         'info_dict': {
3485             'id': '3yImotZU3tw',  # This will keep changing
3486             'ext': 'mp4',
3487             'title': compat_str,
3488             'uploader': 'Sky News',
3489             'uploader_id': 'skynews',
3490             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3491             'upload_date': r're:\d{8}',
3492             'description': compat_str,
3493             'categories': ['News & Politics'],
3494             'tags': list,
3495             'like_count': int,
3496             'dislike_count': int,
3497         },
3498         'params': {
3499             'skip_download': True,
3500         },
3501         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3502     }, {
3503         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3504         'info_dict': {
3505             'id': 'a48o2S1cPoo',
3506             'ext': 'mp4',
3507             'title': 'The Young Turks - Live Main Show',
3508             'uploader': 'The Young Turks',
3509             'uploader_id': 'TheYoungTurks',
3510             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3511             'upload_date': '20150715',
3512             'license': 'Standard YouTube License',
3513             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3514             'categories': ['News & Politics'],
3515             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3516             'like_count': int,
3517             'dislike_count': int,
3518         },
3519         'params': {
3520             'skip_download': True,
3521         },
3522         'only_matching': True,
3523     }, {
3524         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3525         'only_matching': True,
3526     }, {
3527         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3528         'only_matching': True,
3529     }, {
3530         'note': 'A channel that is not live. Should raise error',
3531         'url': 'https://www.youtube.com/user/numberphile/live',
3532         'only_matching': True,
3533     }, {
3534         'url': 'https://www.youtube.com/feed/trending',
3535         'only_matching': True,
3536     }, {
3537         'url': 'https://www.youtube.com/feed/library',
3538         'only_matching': True,
3539     }, {
3540         'url': 'https://www.youtube.com/feed/history',
3541         'only_matching': True,
3542     }, {
3543         'url': 'https://www.youtube.com/feed/subscriptions',
3544         'only_matching': True,
3545     }, {
3546         'url': 'https://www.youtube.com/feed/watch_later',
3547         'only_matching': True,
3548     }, {
3549         'note': 'Recommended - redirects to home page',
3550         'url': 'https://www.youtube.com/feed/recommended',
3551         'only_matching': True,
3552     }, {
3553         'note': 'inline playlist with not always working continuations',
3554         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3555         'only_matching': True,
3556     }, {
3557         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3558         'only_matching': True,
3559     }, {
3560         'url': 'https://www.youtube.com/course',
3561         'only_matching': True,
3562     }, {
3563         'url': 'https://www.youtube.com/zsecurity',
3564         'only_matching': True,
3565     }, {
3566         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3567         'only_matching': True,
3568     }, {
3569         'url': 'https://www.youtube.com/TheYoungTurks/live',
3570         'only_matching': True,
3571     }, {
3572         'url': 'https://www.youtube.com/hashtag/cctv9',
3573         'info_dict': {
3574             'id': 'cctv9',
3575             'title': '#cctv9',
3576         },
3577         'playlist_mincount': 350,
3578     }, {
3579         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3580         'only_matching': True,
3581     }, {
3582         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3583         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3584         'only_matching': True
3585     }, {
3586         'note': '/browse/ should redirect to /channel/',
3587         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3588         'only_matching': True
3589     }, {
3590         'note': 'VLPL, should redirect to playlist?list=PL...',
3591         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3592         'info_dict': {
3593             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3594             'uploader': 'NoCopyrightSounds',
3595             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3596             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3597             'title': 'NCS Releases',
3598         },
3599         'playlist_mincount': 166,
3600     }, {
3601         'note': 'Topic, should redirect to playlist?list=UU...',
3602         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3603         'info_dict': {
3604             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3605             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3606             'title': 'Uploads from Royalty Free Music - Topic',
3607             'uploader': 'Royalty Free Music - Topic',
3608         },
3609         'expected_warnings': [
3610             'A channel/user page was given',
3611             'The URL does not have a videos tab',
3612         ],
3613         'playlist_mincount': 101,
3614     }, {
3615         'note': 'Topic without a UU playlist',
3616         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3617         'info_dict': {
3618             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3619             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3620         },
3621         'expected_warnings': [
3622             'A channel/user page was given',
3623             'The URL does not have a videos tab',
3624             'Falling back to channel URL',
3625         ],
3626         'playlist_mincount': 9,
3627     }, {
3628         'note': 'Youtube music Album',
3629         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3630         'info_dict': {
3631             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3632             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3633         },
3634         'playlist_count': 50,
3635     }, {
3636         'note': 'unlisted single video playlist',
3637         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3638         'info_dict': {
3639             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3640             'uploader': 'colethedj',
3641             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3642             'title': 'yt-dlp unlisted playlist test',
3643             'availability': 'unlisted'
3644         },
3645         'playlist_count': 1,
3646     }]
3647
3648     @classmethod
3649     def suitable(cls, url):
3650         return False if YoutubeIE.suitable(url) else super(
3651             YoutubeTabIE, cls).suitable(url)
3652
3653     def _extract_channel_id(self, webpage):
3654         channel_id = self._html_search_meta(
3655             'channelId', webpage, 'channel id', default=None)
3656         if channel_id:
3657             return channel_id
3658         channel_url = self._html_search_meta(
3659             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3660              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3661              'twitter:app:url:googleplay'), webpage, 'channel url')
3662         return self._search_regex(
3663             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3664             channel_url, 'channel id')
3665
3666     @staticmethod
3667     def _extract_basic_item_renderer(item):
3668         # Modified from _extract_grid_item_renderer
3669         known_basic_renderers = (
3670             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3671         )
3672         for key, renderer in item.items():
3673             if not isinstance(renderer, dict):
3674                 continue
3675             elif key in known_basic_renderers:
3676                 return renderer
3677             elif key.startswith('grid') and key.endswith('Renderer'):
3678                 return renderer
3679
3680     def _grid_entries(self, grid_renderer):
3681         for item in grid_renderer['items']:
3682             if not isinstance(item, dict):
3683                 continue
3684             renderer = self._extract_basic_item_renderer(item)
3685             if not isinstance(renderer, dict):
3686                 continue
3687             title = self._get_text(renderer, 'title')
3688
3689             # playlist
3690             playlist_id = renderer.get('playlistId')
3691             if playlist_id:
3692                 yield self.url_result(
3693                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3694                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3695                     video_title=title)
3696                 continue
3697             # video
3698             video_id = renderer.get('videoId')
3699             if video_id:
3700                 yield self._extract_video(renderer)
3701                 continue
3702             # channel
3703             channel_id = renderer.get('channelId')
3704             if channel_id:
3705                 yield self.url_result(
3706                     'https://www.youtube.com/channel/%s' % channel_id,
3707                     ie=YoutubeTabIE.ie_key(), video_title=title)
3708                 continue
3709             # generic endpoint URL support
3710             ep_url = urljoin('https://www.youtube.com/', try_get(
3711                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3712                 compat_str))
3713             if ep_url:
3714                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3715                     if ie.suitable(ep_url):
3716                         yield self.url_result(
3717                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3718                         break
3719
3720     def _shelf_entries_from_content(self, shelf_renderer):
3721         content = shelf_renderer.get('content')
3722         if not isinstance(content, dict):
3723             return
3724         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3725         if renderer:
3726             # TODO: add support for nested playlists so each shelf is processed
3727             # as separate playlist
3728             # TODO: this includes only first N items
3729             for entry in self._grid_entries(renderer):
3730                 yield entry
3731         renderer = content.get('horizontalListRenderer')
3732         if renderer:
3733             # TODO
3734             pass
3735
3736     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3737         ep = try_get(
3738             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3739             compat_str)
3740         shelf_url = urljoin('https://www.youtube.com', ep)
3741         if shelf_url:
3742             # Skipping links to another channels, note that checking for
3743             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3744             # will not work
3745             if skip_channels and '/channels?' in shelf_url:
3746                 return
3747             title = self._get_text(shelf_renderer, 'title')
3748             yield self.url_result(shelf_url, video_title=title)
3749         # Shelf may not contain shelf URL, fallback to extraction from content
3750         for entry in self._shelf_entries_from_content(shelf_renderer):
3751             yield entry
3752
3753     def _playlist_entries(self, video_list_renderer):
3754         for content in video_list_renderer['contents']:
3755             if not isinstance(content, dict):
3756                 continue
3757             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3758             if not isinstance(renderer, dict):
3759                 continue
3760             video_id = renderer.get('videoId')
3761             if not video_id:
3762                 continue
3763             yield self._extract_video(renderer)
3764
3765     def _rich_entries(self, rich_grid_renderer):
3766         renderer = try_get(
3767             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3768         video_id = renderer.get('videoId')
3769         if not video_id:
3770             return
3771         yield self._extract_video(renderer)
3772
3773     def _video_entry(self, video_renderer):
3774         video_id = video_renderer.get('videoId')
3775         if video_id:
3776             return self._extract_video(video_renderer)
3777
3778     def _post_thread_entries(self, post_thread_renderer):
3779         post_renderer = try_get(
3780             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3781         if not post_renderer:
3782             return
3783         # video attachment
3784         video_renderer = try_get(
3785             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3786         video_id = video_renderer.get('videoId')
3787         if video_id:
3788             entry = self._extract_video(video_renderer)
3789             if entry:
3790                 yield entry
3791         # playlist attachment
3792         playlist_id = try_get(
3793             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3794         if playlist_id:
3795             yield self.url_result(
3796                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3797                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3798         # inline video links
3799         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3800         for run in runs:
3801             if not isinstance(run, dict):
3802                 continue
3803             ep_url = try_get(
3804                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3805             if not ep_url:
3806                 continue
3807             if not YoutubeIE.suitable(ep_url):
3808                 continue
3809             ep_video_id = YoutubeIE._match_id(ep_url)
3810             if video_id == ep_video_id:
3811                 continue
3812             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3813
3814     def _post_thread_continuation_entries(self, post_thread_continuation):
3815         contents = post_thread_continuation.get('contents')
3816         if not isinstance(contents, list):
3817             return
3818         for content in contents:
3819             renderer = content.get('backstagePostThreadRenderer')
3820             if not isinstance(renderer, dict):
3821                 continue
3822             for entry in self._post_thread_entries(renderer):
3823                 yield entry
3824
3825     r''' # unused
3826     def _rich_grid_entries(self, contents):
3827         for content in contents:
3828             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3829             if video_renderer:
3830                 entry = self._video_entry(video_renderer)
3831                 if entry:
3832                     yield entry
3833     '''
3834     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3835
3836         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3837             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3838             for content in contents:
3839                 if not isinstance(content, dict):
3840                     continue
3841                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3842                 if not is_renderer:
3843                     renderer = content.get('richItemRenderer')
3844                     if renderer:
3845                         for entry in self._rich_entries(renderer):
3846                             yield entry
3847                         continuation_list[0] = self._extract_continuation(parent_renderer)
3848                     continue
3849                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3850                 for isr_content in isr_contents:
3851                     if not isinstance(isr_content, dict):
3852                         continue
3853
3854                     known_renderers = {
3855                         'playlistVideoListRenderer': self._playlist_entries,
3856                         'gridRenderer': self._grid_entries,
3857                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3858                         'backstagePostThreadRenderer': self._post_thread_entries,
3859                         'videoRenderer': lambda x: [self._video_entry(x)],
3860                     }
3861                     for key, renderer in isr_content.items():
3862                         if key not in known_renderers:
3863                             continue
3864                         for entry in known_renderers[key](renderer):
3865                             if entry:
3866                                 yield entry
3867                         continuation_list[0] = self._extract_continuation(renderer)
3868                         break
3869
3870                 if not continuation_list[0]:
3871                     continuation_list[0] = self._extract_continuation(is_renderer)
3872
3873             if not continuation_list[0]:
3874                 continuation_list[0] = self._extract_continuation(parent_renderer)
3875
3876         continuation_list = [None]  # Python 2 doesnot support nonlocal
3877         tab_content = try_get(tab, lambda x: x['content'], dict)
3878         if not tab_content:
3879             return
3880         parent_renderer = (
3881             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3882             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3883         for entry in extract_entries(parent_renderer):
3884             yield entry
3885         continuation = continuation_list[0]
3886         visitor_data = None
3887
3888         for page_num in itertools.count(1):
3889             if not continuation:
3890                 break
3891             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3892             response = self._extract_response(
3893                 item_id='%s page %s' % (item_id, page_num),
3894                 query=continuation, headers=headers, ytcfg=ytcfg,
3895                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3896
3897             if not response:
3898                 break
3899             visitor_data = try_get(
3900                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3901
3902             known_continuation_renderers = {
3903                 'playlistVideoListContinuation': self._playlist_entries,
3904                 'gridContinuation': self._grid_entries,
3905                 'itemSectionContinuation': self._post_thread_continuation_entries,
3906                 'sectionListContinuation': extract_entries,  # for feeds
3907             }
3908             continuation_contents = try_get(
3909                 response, lambda x: x['continuationContents'], dict) or {}
3910             continuation_renderer = None
3911             for key, value in continuation_contents.items():
3912                 if key not in known_continuation_renderers:
3913                     continue
3914                 continuation_renderer = value
3915                 continuation_list = [None]
3916                 for entry in known_continuation_renderers[key](continuation_renderer):
3917                     yield entry
3918                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3919                 break
3920             if continuation_renderer:
3921                 continue
3922
3923             known_renderers = {
3924                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3925                 'gridVideoRenderer': (self._grid_entries, 'items'),
3926                 'gridChannelRenderer': (self._grid_entries, 'items'),
3927                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3928                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3929                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3930                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3931             }
3932             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3933             continuation_items = try_get(
3934                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3935             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3936             video_items_renderer = None
3937             for key, value in continuation_item.items():
3938                 if key not in known_renderers:
3939                     continue
3940                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3941                 continuation_list = [None]
3942                 for entry in known_renderers[key][0](video_items_renderer):
3943                     yield entry
3944                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3945                 break
3946             if video_items_renderer:
3947                 continue
3948             break
3949
3950     @staticmethod
3951     def _extract_selected_tab(tabs):
3952         for tab in tabs:
3953             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3954             if renderer.get('selected') is True:
3955                 return renderer
3956         else:
3957             raise ExtractorError('Unable to find selected tab')
3958
3959     @classmethod
3960     def _extract_uploader(cls, data):
3961         uploader = {}
3962         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3963         owner = try_get(
3964             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3965         if owner:
3966             uploader['uploader'] = owner.get('text')
3967             uploader['uploader_id'] = try_get(
3968                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3969             uploader['uploader_url'] = urljoin(
3970                 'https://www.youtube.com/',
3971                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3972         return {k: v for k, v in uploader.items() if v is not None}
3973
3974     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3975         playlist_id = title = description = channel_url = channel_name = channel_id = None
3976         thumbnails_list = tags = []
3977
3978         selected_tab = self._extract_selected_tab(tabs)
3979         renderer = try_get(
3980             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3981         if renderer:
3982             channel_name = renderer.get('title')
3983             channel_url = renderer.get('channelUrl')
3984             channel_id = renderer.get('externalId')
3985         else:
3986             renderer = try_get(
3987                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3988
3989         if renderer:
3990             title = renderer.get('title')
3991             description = renderer.get('description', '')
3992             playlist_id = channel_id
3993             tags = renderer.get('keywords', '').split()
3994             thumbnails_list = (
3995                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3996                 or try_get(
3997                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3998                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3999                     list)
4000                 or [])
4001
4002         thumbnails = []
4003         for t in thumbnails_list:
4004             if not isinstance(t, dict):
4005                 continue
4006             thumbnail_url = url_or_none(t.get('url'))
4007             if not thumbnail_url:
4008                 continue
4009             thumbnails.append({
4010                 'url': thumbnail_url,
4011                 'width': int_or_none(t.get('width')),
4012                 'height': int_or_none(t.get('height')),
4013             })
4014         if playlist_id is None:
4015             playlist_id = item_id
4016         if title is None:
4017             title = (
4018                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
4019                 or playlist_id)
4020         title += format_field(selected_tab, 'title', ' - %s')
4021         title += format_field(selected_tab, 'expandedText', ' - %s')
4022         metadata = {
4023             'playlist_id': playlist_id,
4024             'playlist_title': title,
4025             'playlist_description': description,
4026             'uploader': channel_name,
4027             'uploader_id': channel_id,
4028             'uploader_url': channel_url,
4029             'thumbnails': thumbnails,
4030             'tags': tags,
4031         }
4032         availability = self._extract_availability(data)
4033         if availability:
4034             metadata['availability'] = availability
4035         if not channel_id:
4036             metadata.update(self._extract_uploader(data))
4037         metadata.update({
4038             'channel': metadata['uploader'],
4039             'channel_id': metadata['uploader_id'],
4040             'channel_url': metadata['uploader_url']})
4041         ytcfg = self.extract_ytcfg(item_id, webpage)
4042         return self.playlist_result(
4043             self._entries(
4044                 selected_tab, playlist_id,
4045                 self._extract_identity_token(webpage, item_id),
4046                 self._extract_account_syncid(ytcfg, data), ytcfg),
4047             **metadata)
4048
4049     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4050         first_id = last_id = None
4051         ytcfg = self.extract_ytcfg(playlist_id, webpage)
4052         headers = self.generate_api_headers(
4053             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4054             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
4055         for page_num in itertools.count(1):
4056             videos = list(self._playlist_entries(playlist))
4057             if not videos:
4058                 return
4059             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4060             if start >= len(videos):
4061                 return
4062             for video in videos[start:]:
4063                 if video['id'] == first_id:
4064                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4065                     return
4066                 yield video
4067             first_id = first_id or videos[0]['id']
4068             last_id = videos[-1]['id']
4069             watch_endpoint = try_get(
4070                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4071             query = {
4072                 'playlistId': playlist_id,
4073                 'videoId': watch_endpoint.get('videoId') or last_id,
4074                 'index': watch_endpoint.get('index') or len(videos),
4075                 'params': watch_endpoint.get('params') or 'OAE%3D'
4076             }
4077             response = self._extract_response(
4078                 item_id='%s page %d' % (playlist_id, page_num),
4079                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4080                 check_get_keys='contents'
4081             )
4082             playlist = try_get(
4083                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4084
4085     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4086         title = playlist.get('title') or try_get(
4087             data, lambda x: x['titleText']['simpleText'], compat_str)
4088         playlist_id = playlist.get('playlistId') or item_id
4089
4090         # Delegating everything except mix playlists to regular tab-based playlist URL
4091         playlist_url = urljoin(url, try_get(
4092             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4093             compat_str))
4094         if playlist_url and playlist_url != url:
4095             return self.url_result(
4096                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4097                 video_title=title)
4098
4099         return self.playlist_result(
4100             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4101             playlist_id=playlist_id, playlist_title=title)
4102
4103     def _extract_availability(self, data):
4104         """
4105         Gets the availability of a given playlist/tab.
4106         Note: Unless YouTube tells us explicitly, we do not assume it is public
4107         @param data: response
4108         """
4109         is_private = is_unlisted = None
4110         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4111         badge_labels = self._extract_badges(renderer)
4112
4113         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4114         privacy_dropdown_entries = try_get(
4115             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4116         for renderer_dict in privacy_dropdown_entries:
4117             is_selected = try_get(
4118                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4119             if not is_selected:
4120                 continue
4121             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4122             if label:
4123                 badge_labels.add(label.lower())
4124                 break
4125
4126         for badge_label in badge_labels:
4127             if badge_label == 'unlisted':
4128                 is_unlisted = True
4129             elif badge_label == 'private':
4130                 is_private = True
4131             elif badge_label == 'public':
4132                 is_unlisted = is_private = False
4133         return self._availability(is_private, False, False, False, is_unlisted)
4134
4135     @staticmethod
4136     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4137         sidebar_renderer = try_get(
4138             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4139         for item in sidebar_renderer:
4140             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4141             if renderer:
4142                 return renderer
4143
4144     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4145         """
4146         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4147         """
4148         browse_id = params = None
4149         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4150         if not renderer:
4151             return
4152         menu_renderer = try_get(
4153             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4154         for menu_item in menu_renderer:
4155             if not isinstance(menu_item, dict):
4156                 continue
4157             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4158             text = try_get(
4159                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4160             if not text or text.lower() != 'show unavailable videos':
4161                 continue
4162             browse_endpoint = try_get(
4163                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4164             browse_id = browse_endpoint.get('browseId')
4165             params = browse_endpoint.get('params')
4166             break
4167
4168         ytcfg = self.extract_ytcfg(item_id, webpage)
4169         headers = self.generate_api_headers(
4170             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4171             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4172             visitor_data=try_get(
4173                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4174         query = {
4175             'params': params or 'wgYCCAA=',
4176             'browseId': browse_id or 'VL%s' % item_id
4177         }
4178         return self._extract_response(
4179             item_id=item_id, headers=headers, query=query,
4180             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4181             note='Downloading API JSON with unavailable videos')
4182
4183     def _extract_webpage(self, url, item_id):
4184         retries = self.get_param('extractor_retries', 3)
4185         count = -1
4186         last_error = 'Incomplete yt initial data recieved'
4187         while count < retries:
4188             count += 1
4189             # Sometimes youtube returns a webpage with incomplete ytInitialData
4190             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4191             if count:
4192                 self.report_warning('%s. Retrying ...' % last_error)
4193             webpage = self._download_webpage(
4194                 url, item_id,
4195                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4196             data = self.extract_yt_initial_data(item_id, webpage)
4197             if data.get('contents') or data.get('currentVideoEndpoint'):
4198                 break
4199             # Extract alerts here only when there is error
4200             self._extract_and_report_alerts(data)
4201             if count >= retries:
4202                 raise ExtractorError(last_error)
4203         return webpage, data
4204
4205     @staticmethod
4206     def _smuggle_data(entries, data):
4207         for entry in entries:
4208             if data:
4209                 entry['url'] = smuggle_url(entry['url'], data)
4210             yield entry
4211
4212     def _real_extract(self, url):
4213         url, smuggled_data = unsmuggle_url(url, {})
4214         if self.is_music_url(url):
4215             smuggled_data['is_music_url'] = True
4216         info_dict = self.__real_extract(url, smuggled_data)
4217         if info_dict.get('entries'):
4218             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4219         return info_dict
4220
4221     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4222
4223     def __real_extract(self, url, smuggled_data):
4224         item_id = self._match_id(url)
4225         url = compat_urlparse.urlunparse(
4226             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4227         compat_opts = self.get_param('compat_opts', [])
4228
4229         def get_mobj(url):
4230             mobj = self._url_re.match(url).groupdict()
4231             mobj.update((k, '') for k, v in mobj.items() if v is None)
4232             return mobj
4233
4234         mobj = get_mobj(url)
4235         # Youtube returns incomplete data if tabname is not lower case
4236         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4237
4238         if is_channel:
4239             if smuggled_data.get('is_music_url'):
4240                 if item_id[:2] == 'VL':
4241                     # Youtube music VL channels have an equivalent playlist
4242                     item_id = item_id[2:]
4243                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4244                 elif item_id[:2] == 'MP':
4245                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4246                     item_id = self._search_regex(
4247                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4248                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4249                         'playlist id')
4250                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4251                 elif mobj['channel_type'] == 'browse':
4252                     # Youtube music /browse/ should be changed to /channel/
4253                     pre = 'https://www.youtube.com/channel/%s' % item_id
4254         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4255             # Home URLs should redirect to /videos/
4256             self.report_warning(
4257                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4258                 'To download only the videos in the home page, add a "/featured" to the URL')
4259             tab = '/videos'
4260
4261         url = ''.join((pre, tab, post))
4262         mobj = get_mobj(url)
4263
4264         # Handle both video/playlist URLs
4265         qs = parse_qs(url)
4266         video_id = qs.get('v', [None])[0]
4267         playlist_id = qs.get('list', [None])[0]
4268
4269         if not video_id and mobj['not_channel'].startswith('watch'):
4270             if not playlist_id:
4271                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4272                 raise ExtractorError('Unable to recognize tab page')
4273             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4274             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4275             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4276             mobj = get_mobj(url)
4277
4278         if video_id and playlist_id:
4279             if self.get_param('noplaylist'):
4280                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4281                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4282             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4283
4284         webpage, data = self._extract_webpage(url, item_id)
4285
4286         tabs = try_get(
4287             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4288         if tabs:
4289             selected_tab = self._extract_selected_tab(tabs)
4290             tab_name = selected_tab.get('title', '')
4291             if 'no-youtube-channel-redirect' not in compat_opts:
4292                 if mobj['tab'] == '/live':
4293                     # Live tab should have redirected to the video
4294                     raise ExtractorError('The channel is not currently live', expected=True)
4295                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4296                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4297                         # Topic channels don't have /videos. Use the equivalent playlist instead
4298                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4299                         pl_id = 'UU%s' % item_id[2:]
4300                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4301                         try:
4302                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4303                             for alert_type, alert_message in self._extract_alerts(pl_data):
4304                                 if alert_type == 'error':
4305                                     raise ExtractorError('Youtube said: %s' % alert_message)
4306                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4307                         except ExtractorError:
4308                             self.report_warning('The playlist gave error. Falling back to channel URL')
4309                     else:
4310                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4311
4312         self.write_debug('Final URL: %s' % url)
4313
4314         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4315         if 'no-youtube-unavailable-videos' not in compat_opts:
4316             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4317         self._extract_and_report_alerts(data)
4318         tabs = try_get(
4319             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4320         if tabs:
4321             return self._extract_from_tabs(item_id, webpage, data, tabs)
4322
4323         playlist = try_get(
4324             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4325         if playlist:
4326             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4327
4328         video_id = try_get(
4329             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4330             compat_str) or video_id
4331         if video_id:
4332             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4333                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4334             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4335
4336         raise ExtractorError('Unable to recognize tab page')
4337
4338
4339 class YoutubePlaylistIE(InfoExtractor):
4340     IE_DESC = 'YouTube.com playlists'
4341     _VALID_URL = r'''(?x)(?:
4342                         (?:https?://)?
4343                         (?:\w+\.)?
4344                         (?:
4345                             (?:
4346                                 youtube(?:kids)?\.com|
4347                                 invidio\.us
4348                             )
4349                             /.*?\?.*?\blist=
4350                         )?
4351                         (?P<id>%(playlist_id)s)
4352                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4353     IE_NAME = 'youtube:playlist'
4354     _TESTS = [{
4355         'note': 'issue #673',
4356         'url': 'PLBB231211A4F62143',
4357         'info_dict': {
4358             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4359             'id': 'PLBB231211A4F62143',
4360             'uploader': 'Wickydoo',
4361             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4362             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4363         },
4364         'playlist_mincount': 29,
4365     }, {
4366         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4367         'info_dict': {
4368             'title': 'YDL_safe_search',
4369             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4370         },
4371         'playlist_count': 2,
4372         'skip': 'This playlist is private',
4373     }, {
4374         'note': 'embedded',
4375         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4376         'playlist_count': 4,
4377         'info_dict': {
4378             'title': 'JODA15',
4379             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4380             'uploader': 'milan',
4381             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4382         }
4383     }, {
4384         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4385         'playlist_mincount': 654,
4386         'info_dict': {
4387             'title': '2018 Chinese New Singles (11/6 updated)',
4388             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4389             'uploader': 'LBK',
4390             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4391             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4392         }
4393     }, {
4394         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4395         'only_matching': True,
4396     }, {
4397         # music album playlist
4398         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4399         'only_matching': True,
4400     }]
4401
4402     @classmethod
4403     def suitable(cls, url):
4404         if YoutubeTabIE.suitable(url):
4405             return False
4406         # Hack for lazy extractors until more generic solution is implemented
4407         # (see #28780)
4408         from .youtube import parse_qs
4409         qs = parse_qs(url)
4410         if qs.get('v', [None])[0]:
4411             return False
4412         return super(YoutubePlaylistIE, cls).suitable(url)
4413
4414     def _real_extract(self, url):
4415         playlist_id = self._match_id(url)
4416         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4417         url = update_url_query(
4418             'https://www.youtube.com/playlist',
4419             parse_qs(url) or {'list': playlist_id})
4420         if is_music_url:
4421             url = smuggle_url(url, {'is_music_url': True})
4422         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4423
4424
4425 class YoutubeYtBeIE(InfoExtractor):
4426     IE_DESC = 'youtu.be'
4427     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4428     _TESTS = [{
4429         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4430         'info_dict': {
4431             'id': 'yeWKywCrFtk',
4432             'ext': 'mp4',
4433             'title': 'Small Scale Baler and Braiding Rugs',
4434             'uploader': 'Backus-Page House Museum',
4435             'uploader_id': 'backuspagemuseum',
4436             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4437             'upload_date': '20161008',
4438             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4439             'categories': ['Nonprofits & Activism'],
4440             'tags': list,
4441             'like_count': int,
4442             'dislike_count': int,
4443         },
4444         'params': {
4445             'noplaylist': True,
4446             'skip_download': True,
4447         },
4448     }, {
4449         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4450         'only_matching': True,
4451     }]
4452
4453     def _real_extract(self, url):
4454         mobj = self._match_valid_url(url)
4455         video_id = mobj.group('id')
4456         playlist_id = mobj.group('playlist_id')
4457         return self.url_result(
4458             update_url_query('https://www.youtube.com/watch', {
4459                 'v': video_id,
4460                 'list': playlist_id,
4461                 'feature': 'youtu.be',
4462             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4463
4464
4465 class YoutubeYtUserIE(InfoExtractor):
4466     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4467     _VALID_URL = r'ytuser:(?P<id>.+)'
4468     _TESTS = [{
4469         'url': 'ytuser:phihag',
4470         'only_matching': True,
4471     }]
4472
4473     def _real_extract(self, url):
4474         user_id = self._match_id(url)
4475         return self.url_result(
4476             'https://www.youtube.com/user/%s' % user_id,
4477             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4478
4479
4480 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4481     IE_NAME = 'youtube:favorites'
4482     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4483     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4484     _LOGIN_REQUIRED = True
4485     _TESTS = [{
4486         'url': ':ytfav',
4487         'only_matching': True,
4488     }, {
4489         'url': ':ytfavorites',
4490         'only_matching': True,
4491     }]
4492
4493     def _real_extract(self, url):
4494         return self.url_result(
4495             'https://www.youtube.com/playlist?list=LL',
4496             ie=YoutubeTabIE.ie_key())
4497
4498
4499 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4500     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4501     # there doesn't appear to be a real limit, for example if you search for
4502     # 'python' you get more than 8.000.000 results
4503     _MAX_RESULTS = float('inf')
4504     IE_NAME = 'youtube:search'
4505     _SEARCH_KEY = 'ytsearch'
4506     _SEARCH_PARAMS = None
4507     _TESTS = []
4508
4509     def _entries(self, query, n):
4510         data = {'query': query}
4511         if self._SEARCH_PARAMS:
4512             data['params'] = self._SEARCH_PARAMS
4513         total = 0
4514         continuation = {}
4515         for page_num in itertools.count(1):
4516             data.update(continuation)
4517             search = self._extract_response(
4518                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4519                 check_get_keys=('contents', 'onResponseReceivedCommands')
4520             )
4521             if not search:
4522                 break
4523             slr_contents = try_get(
4524                 search,
4525                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4526                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4527                 list)
4528             if not slr_contents:
4529                 break
4530
4531             # Youtube sometimes adds promoted content to searches,
4532             # changing the index location of videos and token.
4533             # So we search through all entries till we find them.
4534             continuation = None
4535             for slr_content in slr_contents:
4536                 if not continuation:
4537                     continuation = self._extract_continuation({'contents': [slr_content]})
4538
4539                 isr_contents = try_get(
4540                     slr_content,
4541                     lambda x: x['itemSectionRenderer']['contents'],
4542                     list)
4543                 if not isr_contents:
4544                     continue
4545                 for content in isr_contents:
4546                     if not isinstance(content, dict):
4547                         continue
4548                     video = content.get('videoRenderer')
4549                     if not isinstance(video, dict):
4550                         continue
4551                     video_id = video.get('videoId')
4552                     if not video_id:
4553                         continue
4554
4555                     yield self._extract_video(video)
4556                     total += 1
4557                     if total == n:
4558                         return
4559
4560             if not continuation:
4561                 break
4562
4563     def _get_n_results(self, query, n):
4564         """Get a specified number of results for a query"""
4565         return self.playlist_result(self._entries(query, n), query, query)
4566
4567
4568 class YoutubeSearchDateIE(YoutubeSearchIE):
4569     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4570     _SEARCH_KEY = 'ytsearchdate'
4571     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4572     _SEARCH_PARAMS = 'CAI%3D'
4573
4574
4575 class YoutubeSearchURLIE(YoutubeSearchIE):
4576     IE_DESC = 'YouTube.com search URLs'
4577     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4578     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4579     # _MAX_RESULTS = 100
4580     _TESTS = [{
4581         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4582         'playlist_mincount': 5,
4583         'info_dict': {
4584             'id': 'youtube-dl test video',
4585             'title': 'youtube-dl test video',
4586         }
4587     }, {
4588         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4589         'only_matching': True,
4590     }]
4591
4592     @classmethod
4593     def _make_valid_url(cls):
4594         return cls._VALID_URL
4595
4596     def _real_extract(self, url):
4597         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4598         query = (qs.get('search_query') or qs.get('q'))[0]
4599         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4600         return self._get_n_results(query, self._MAX_RESULTS)
4601
4602
4603 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4604     """
4605     Base class for feed extractors
4606     Subclasses must define the _FEED_NAME property.
4607     """
4608     _LOGIN_REQUIRED = True
4609     _TESTS = []
4610
4611     @property
4612     def IE_NAME(self):
4613         return 'youtube:%s' % self._FEED_NAME
4614
4615     def _real_extract(self, url):
4616         return self.url_result(
4617             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4618             ie=YoutubeTabIE.ie_key())
4619
4620
4621 class YoutubeWatchLaterIE(InfoExtractor):
4622     IE_NAME = 'youtube:watchlater'
4623     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4624     _VALID_URL = r':ytwatchlater'
4625     _TESTS = [{
4626         'url': ':ytwatchlater',
4627         'only_matching': True,
4628     }]
4629
4630     def _real_extract(self, url):
4631         return self.url_result(
4632             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4633
4634
4635 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4636     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4637     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4638     _FEED_NAME = 'recommended'
4639     _LOGIN_REQUIRED = False
4640     _TESTS = [{
4641         'url': ':ytrec',
4642         'only_matching': True,
4643     }, {
4644         'url': ':ytrecommended',
4645         'only_matching': True,
4646     }, {
4647         'url': 'https://youtube.com',
4648         'only_matching': True,
4649     }]
4650
4651
4652 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4653     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4654     _VALID_URL = r':ytsub(?:scription)?s?'
4655     _FEED_NAME = 'subscriptions'
4656     _TESTS = [{
4657         'url': ':ytsubs',
4658         'only_matching': True,
4659     }, {
4660         'url': ':ytsubscriptions',
4661         'only_matching': True,
4662     }]
4663
4664
4665 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4666     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4667     _VALID_URL = r':ythis(?:tory)?'
4668     _FEED_NAME = 'history'
4669     _TESTS = [{
4670         'url': ':ythistory',
4671         'only_matching': True,
4672     }]
4673
4674
4675 class YoutubeTruncatedURLIE(InfoExtractor):
4676     IE_NAME = 'youtube:truncated_url'
4677     IE_DESC = False  # Do not list
4678     _VALID_URL = r'''(?x)
4679         (?:https?://)?
4680         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4681         (?:watch\?(?:
4682             feature=[a-z_]+|
4683             annotation_id=annotation_[^&]+|
4684             x-yt-cl=[0-9]+|
4685             hl=[^&]*|
4686             t=[0-9]+
4687         )?
4688         |
4689             attribution_link\?a=[^&]+
4690         )
4691         $
4692     '''
4693
4694     _TESTS = [{
4695         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4696         'only_matching': True,
4697     }, {
4698         'url': 'https://www.youtube.com/watch?',
4699         'only_matching': True,
4700     }, {
4701         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4702         'only_matching': True,
4703     }, {
4704         'url': 'https://www.youtube.com/watch?feature=foo',
4705         'only_matching': True,
4706     }, {
4707         'url': 'https://www.youtube.com/watch?hl=en-GB',
4708         'only_matching': True,
4709     }, {
4710         'url': 'https://www.youtube.com/watch?t=2372',
4711         'only_matching': True,
4712     }]
4713
4714     def _real_extract(self, url):
4715         raise ExtractorError(
4716             'Did you forget to quote the URL? Remember that & is a meta '
4717             'character in most shells, so you want to put the URL in quotes, '
4718             'like  youtube-dl '
4719             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4720             ' or simply  youtube-dl BaW_jenozKc  .',
4721             expected=True)
4722
4723
4724 class YoutubeTruncatedIDIE(InfoExtractor):
4725     IE_NAME = 'youtube:truncated_id'
4726     IE_DESC = False  # Do not list
4727     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4728
4729     _TESTS = [{
4730         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4731         'only_matching': True,
4732     }]
4733
4734     def _real_extract(self, url):
4735         video_id = self._match_id(url)
4736         raise ExtractorError(
4737             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4738             expected=True)