yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     is_html,
  42     mimetype2ext,
  43     network_exceptions,
  44     orderedSet,
  45     parse_codecs,
  46     parse_count,
  47     parse_duration,
  48     parse_iso8601,
  49     parse_qs,
  50     qualities,
  51     remove_start,
  52     smuggle_url,
  53     str_or_none,
  54     str_to_int,
  55     traverse_obj,
  56     try_get,
  57     unescapeHTML,
  58     unified_strdate,
  59     unsmuggle_url,
  60     update_url_query,
  61     url_or_none,
  62     urljoin,
  63     variadic,
  64 )
  65
  66
  67 # any clients starting with _ cannot be explicity requested by the user
  68 INNERTUBE_CLIENTS = {
  69     'web': {
  70         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  71         'INNERTUBE_CONTEXT': {
  72             'client': {
  73                 'clientName': 'WEB',
  74                 'clientVersion': '2.20210622.10.00',
  75             }
  76         },
  77         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  78     },
  79     'web_embedded': {
  80         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  81         'INNERTUBE_CONTEXT': {
  82             'client': {
  83                 'clientName': 'WEB_EMBEDDED_PLAYER',
  84                 'clientVersion': '1.20210620.0.1',
  85             },
  86         },
  87         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  88     },
  89     'web_music': {
  90         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  91         'INNERTUBE_HOST': 'music.youtube.com',
  92         'INNERTUBE_CONTEXT': {
  93             'client': {
  94                 'clientName': 'WEB_REMIX',
  95                 'clientVersion': '1.20210621.00.00',
  96             }
  97         },
  98         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
  99     },
 100     'web_creator': {
 101         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 102         'INNERTUBE_CONTEXT': {
 103             'client': {
 104                 'clientName': 'WEB_CREATOR',
 105                 'clientVersion': '1.20210621.00.00',
 106             }
 107         },
 108         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 109     },
 110     'android': {
 111         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 112         'INNERTUBE_CONTEXT': {
 113             'client': {
 114                 'clientName': 'ANDROID',
 115                 'clientVersion': '16.20',
 116             }
 117         },
 118         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 119     },
 120     'android_embedded': {
 121         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 122         'INNERTUBE_CONTEXT': {
 123             'client': {
 124                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 125                 'clientVersion': '16.20',
 126             },
 127         },
 128         'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 129     },
 130     'android_music': {
 131         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 132         'INNERTUBE_HOST': 'music.youtube.com',
 133         'INNERTUBE_CONTEXT': {
 134             'client': {
 135                 'clientName': 'ANDROID_MUSIC',
 136                 'clientVersion': '4.32',
 137             }
 138         },
 139         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 140     },
 141     'android_creator': {
 142         'INNERTUBE_CONTEXT': {
 143             'client': {
 144                 'clientName': 'ANDROID_CREATOR',
 145                 'clientVersion': '21.24.100',
 146             },
 147         },
 148         'INNERTUBE_CONTEXT_CLIENT_NAME': 14
 149     },
 150     # ios has HLS live streams
 151     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 152     'ios': {
 153         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 154         'INNERTUBE_CONTEXT': {
 155             'client': {
 156                 'clientName': 'IOS',
 157                 'clientVersion': '16.20',
 158             }
 159         },
 160         'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 161     },
 162     'ios_embedded': {
 163         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 164         'INNERTUBE_CONTEXT': {
 165             'client': {
 166                 'clientName': 'IOS_MESSAGES_EXTENSION',
 167                 'clientVersion': '16.20',
 168             },
 169         },
 170         'INNERTUBE_CONTEXT_CLIENT_NAME': 66
 171     },
 172     'ios_music': {
 173         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 174         'INNERTUBE_HOST': 'music.youtube.com',
 175         'INNERTUBE_CONTEXT': {
 176             'client': {
 177                 'clientName': 'IOS_MUSIC',
 178                 'clientVersion': '4.32',
 179             },
 180         },
 181         'INNERTUBE_CONTEXT_CLIENT_NAME': 26
 182     },
 183     'ios_creator': {
 184         'INNERTUBE_CONTEXT': {
 185             'client': {
 186                 'clientName': 'IOS_CREATOR',
 187                 'clientVersion': '21.24.100',
 188             },
 189         },
 190         'INNERTUBE_CONTEXT_CLIENT_NAME': 15
 191     },
 192     # mweb has 'ultralow' formats
 193     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 194     'mweb': {
 195         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 196         'INNERTUBE_CONTEXT': {
 197             'client': {
 198                 'clientName': 'MWEB',
 199                 'clientVersion': '2.20210721.07.00',
 200             }
 201         },
 202         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 203     },
 204 }
 205
 206
 207 def build_innertube_clients():
 208     third_party = {
 209         'embedUrl': 'https://google.com',  # Can be any valid URL
 210     }
 211     base_clients = ('android', 'web', 'ios', 'mweb')
 212     priority = qualities(base_clients[::-1])
 213
 214     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 215         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 216         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 217         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 218         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 219
 220         if client in base_clients:
 221             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 222             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 223             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 224             agegate_ytcfg['priority'] -= 1
 225         elif client.endswith('_embedded'):
 226             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 227             ytcfg['priority'] -= 2
 228         else:
 229             ytcfg['priority'] -= 3
 230
 231
 232 build_innertube_clients()
 233
 234
 235 class YoutubeBaseInfoExtractor(InfoExtractor):
 236     """Provide base functions for Youtube extractors"""
 237
 238     _RESERVED_NAMES = (
 239         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
 240         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 241         r'browse|oembed|get_video_info|iframe_api|s/player|'
 242         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 243
 244     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 245
 246     _NETRC_MACHINE = 'youtube'
 247
 248     # If True it will raise an error if no login info is provided
 249     _LOGIN_REQUIRED = False
 250
 251     r'''  # Unused since login is broken
 252     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 253     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
 254
 255     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
 256     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
 257     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 258     '''
 259
 260     def _login(self):
 261         """
 262         Attempt to log in to YouTube.
 263         True is returned if successful or skipped.
 264         False is returned if login failed.
 265
 266         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 267         """
 268
 269         def warn(message):
 270             self.report_warning(message)
 271
 272         # username+password login is broken
 273         if (self._LOGIN_REQUIRED
 274                 and self.get_param('cookiefile') is None
 275                 and self.get_param('cookiesfrombrowser') is None):
 276             self.raise_login_required(
 277                 'Login details are needed to download this content', method='cookies')
 278         username, password = self._get_login_info()
 279         if username:
 280             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 281         return
 282
 283         # Everything below this is broken!
 284         r'''
 285         # No authentication to be performed
 286         if username is None:
 287             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 288                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 289             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 290             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 291             return True
 292
 293         login_page = self._download_webpage(
 294             self._LOGIN_URL, None,
 295             note='Downloading login page',
 296             errnote='unable to fetch login page', fatal=False)
 297         if login_page is False:
 298             return
 299
 300         login_form = self._hidden_inputs(login_page)
 301
 302         def req(url, f_req, note, errnote):
 303             data = login_form.copy()
 304             data.update({
 305                 'pstMsg': 1,
 306                 'checkConnection': 'youtube',
 307                 'checkedDomains': 'youtube',
 308                 'hl': 'en',
 309                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 310                 'f.req': json.dumps(f_req),
 311                 'flowName': 'GlifWebSignIn',
 312                 'flowEntry': 'ServiceLogin',
 313                 # TODO: reverse actual botguard identifier generation algo
 314                 'bgRequest': '["identifier",""]',
 315             })
 316             return self._download_json(
 317                 url, None, note=note, errnote=errnote,
 318                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 319                 fatal=False,
 320                 data=urlencode_postdata(data), headers={
 321                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 322                     'Google-Accounts-XSRF': 1,
 323                 })
 324
 325         lookup_req = [
 326             username,
 327             None, [], None, 'US', None, None, 2, False, True,
 328             [
 329                 None, None,
 330                 [2, 1, None, 1,
 331                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 332                  None, [], 4],
 333                 1, [None, None, []], None, None, None, True
 334             ],
 335             username,
 336         ]
 337
 338         lookup_results = req(
 339             self._LOOKUP_URL, lookup_req,
 340             'Looking up account info', 'Unable to look up account info')
 341
 342         if lookup_results is False:
 343             return False
 344
 345         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 346         if not user_hash:
 347             warn('Unable to extract user hash')
 348             return False
 349
 350         challenge_req = [
 351             user_hash,
 352             None, 1, None, [1, None, None, None, [password, None, True]],
 353             [
 354                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 355                 1, [None, None, []], None, None, None, True
 356             ]]
 357
 358         challenge_results = req(
 359             self._CHALLENGE_URL, challenge_req,
 360             'Logging in', 'Unable to log in')
 361
 362         if challenge_results is False:
 363             return
 364
 365         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 366         if login_res:
 367             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 368             warn(
 369                 'Unable to login: %s' % 'Invalid password'
 370                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 371             return False
 372
 373         res = try_get(challenge_results, lambda x: x[0][-1], list)
 374         if not res:
 375             warn('Unable to extract result entry')
 376             return False
 377
 378         login_challenge = try_get(res, lambda x: x[0][0], list)
 379         if login_challenge:
 380             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 381             if challenge_str == 'TWO_STEP_VERIFICATION':
 382                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 383                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 384                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 385                 if status == 'QUOTA_EXCEEDED':
 386                     warn('Exceeded the limit of TFA codes, try later')
 387                     return False
 388
 389                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 390                 if not tl:
 391                     warn('Unable to extract TL')
 392                     return False
 393
 394                 tfa_code = self._get_tfa_info('2-step verification code')
 395
 396                 if not tfa_code:
 397                     warn(
 398                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 399                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 400                     return False
 401
 402                 tfa_code = remove_start(tfa_code, 'G-')
 403
 404                 tfa_req = [
 405                     user_hash, None, 2, None,
 406                     [
 407                         9, None, None, None, None, None, None, None,
 408                         [None, tfa_code, True, 2]
 409                     ]]
 410
 411                 tfa_results = req(
 412                     self._TFA_URL.format(tl), tfa_req,
 413                     'Submitting TFA code', 'Unable to submit TFA code')
 414
 415                 if tfa_results is False:
 416                     return False
 417
 418                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 419                 if tfa_res:
 420                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 421                     warn(
 422                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 423                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 424                     return False
 425
 426                 check_cookie_url = try_get(
 427                     tfa_results, lambda x: x[0][-1][2], compat_str)
 428             else:
 429                 CHALLENGES = {
 430                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 431                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 432                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 433                 }
 434                 challenge = CHALLENGES.get(
 435                     challenge_str,
 436                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 437                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 438                 return False
 439         else:
 440             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 441
 442         if not check_cookie_url:
 443             warn('Unable to extract CheckCookie URL')
 444             return False
 445
 446         check_cookie_results = self._download_webpage(
 447             check_cookie_url, None, 'Checking cookie', fatal=False)
 448
 449         if check_cookie_results is False:
 450             return False
 451
 452         if 'https://myaccount.google.com/' not in check_cookie_results:
 453             warn('Unable to log in')
 454             return False
 455
 456         return True
 457         '''
 458
 459     def _initialize_consent(self):
 460         cookies = self._get_cookies('https://www.youtube.com/')
 461         if cookies.get('__Secure-3PSID'):
 462             return
 463         consent_id = None
 464         consent = cookies.get('CONSENT')
 465         if consent:
 466             if 'YES' in consent.value:
 467                 return
 468             consent_id = self._search_regex(
 469                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 470         if not consent_id:
 471             consent_id = random.randint(100, 999)
 472         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 473
 474     def _real_initialize(self):
 475         self._initialize_consent()
 476         if self._downloader is None:
 477             return
 478         if not self._login():
 479             return
 480
 481     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 482     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 483     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 484
 485     def _get_default_ytcfg(self, client='web'):
 486         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 487
 488     def _get_innertube_host(self, client='web'):
 489         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 490
 491     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 492         # try_get but with fallback to default ytcfg client values when present
 493         _func = lambda y: try_get(y, getter, expected_type)
 494         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 495
 496     def _extract_client_name(self, ytcfg, default_client='web'):
 497         return self._ytcfg_get_safe(
 498             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 499                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 500
 501     @staticmethod
 502     def _extract_session_index(*data):
 503         for ytcfg in data:
 504             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 505             if session_index is not None:
 506                 return session_index
 507
 508     def _extract_client_version(self, ytcfg, default_client='web'):
 509         return self._ytcfg_get_safe(
 510             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 511                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 512
 513     def _extract_api_key(self, ytcfg=None, default_client='web'):
 514         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 515
 516     def _extract_context(self, ytcfg=None, default_client='web'):
 517         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 518         context = _get_context(ytcfg)
 519         if context:
 520             return context
 521
 522         context = _get_context(self._get_default_ytcfg(default_client))
 523         if not ytcfg:
 524             return context
 525
 526         # Recreate the client context (required)
 527         context['client'].update({
 528             'clientVersion': self._extract_client_version(ytcfg, default_client),
 529             'clientName': self._extract_client_name(ytcfg, default_client),
 530         })
 531         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 532         if visitor_data:
 533             context['client']['visitorData'] = visitor_data
 534         return context
 535
 536     _SAPISID = None
 537
 538     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 539         time_now = round(time.time())
 540         if self._SAPISID is None:
 541             yt_cookies = self._get_cookies('https://www.youtube.com')
 542             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 543             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 544             sapisid_cookie = dict_get(
 545                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 546             if sapisid_cookie and sapisid_cookie.value:
 547                 self._SAPISID = sapisid_cookie.value
 548                 self.write_debug('Extracted SAPISID cookie')
 549                 # SAPISID cookie is required if not already present
 550                 if not yt_cookies.get('SAPISID'):
 551                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 552                     self._set_cookie(
 553                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 554             else:
 555                 self._SAPISID = False
 556         if not self._SAPISID:
 557             return None
 558         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 559         sapisidhash = hashlib.sha1(
 560             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 561         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 562
 563     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 564                   note='Downloading API JSON', errnote='Unable to download API page',
 565                   context=None, api_key=None, api_hostname=None, default_client='web'):
 566
 567         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 568         data.update(query)
 569         real_headers = self.generate_api_headers(default_client=default_client)
 570         real_headers.update({'content-type': 'application/json'})
 571         if headers:
 572             real_headers.update(headers)
 573         return self._download_json(
 574             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 575             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 576             data=json.dumps(data).encode('utf8'), headers=real_headers,
 577             query={'key': api_key or self._extract_api_key()})
 578
 579     def extract_yt_initial_data(self, video_id, webpage):
 580         return self._parse_json(
 581             self._search_regex(
 582                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 583                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 584             video_id)
 585
 586     def _extract_identity_token(self, webpage, item_id):
 587         if not webpage:
 588             return None
 589         ytcfg = self.extract_ytcfg(item_id, webpage)
 590         if ytcfg:
 591             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 592             if token:
 593                 return token
 594         return self._search_regex(
 595             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 596             'identity token', default=None)
 597
 598     @staticmethod
 599     def _extract_account_syncid(*args):
 600         """
 601         Extract syncId required to download private playlists of secondary channels
 602         @params response and/or ytcfg
 603         """
 604         for data in args:
 605             # ytcfg includes channel_syncid if on secondary channel
 606             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 607             if delegated_sid:
 608                 return delegated_sid
 609             sync_ids = (try_get(
 610                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 611                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 612             if len(sync_ids) >= 2 and sync_ids[1]:
 613                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 614                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 615                 return sync_ids[0]
 616
 617     def extract_ytcfg(self, video_id, webpage):
 618         if not webpage:
 619             return {}
 620         return self._parse_json(
 621             self._search_regex(
 622                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 623                 default='{}'), video_id, fatal=False) or {}
 624
 625     def generate_api_headers(
 626             self, ytcfg=None, identity_token=None, account_syncid=None,
 627             visitor_data=None, api_hostname=None, default_client='web', session_index=None):
 628         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 629         headers = {
 630             'X-YouTube-Client-Name': compat_str(
 631                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 632             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 633             'Origin': origin
 634         }
 635         if not visitor_data and ytcfg:
 636             visitor_data = try_get(
 637                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 638         if identity_token:
 639             headers['X-Youtube-Identity-Token'] = identity_token
 640         if account_syncid:
 641             headers['X-Goog-PageId'] = account_syncid
 642         if session_index is None and ytcfg:
 643             session_index = self._extract_session_index(ytcfg)
 644         if account_syncid or session_index is not None:
 645             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 646         if visitor_data:
 647             headers['X-Goog-Visitor-Id'] = visitor_data
 648         auth = self._generate_sapisidhash_header(origin)
 649         if auth is not None:
 650             headers['Authorization'] = auth
 651             headers['X-Origin'] = origin
 652         return headers
 653
 654     @staticmethod
 655     def _build_api_continuation_query(continuation, ctp=None):
 656         query = {
 657             'continuation': continuation
 658         }
 659         # TODO: Inconsistency with clickTrackingParams.
 660         # Currently we have a fixed ctp contained within context (from ytcfg)
 661         # and a ctp in root query for continuation.
 662         if ctp:
 663             query['clickTracking'] = {'clickTrackingParams': ctp}
 664         return query
 665
 666     @classmethod
 667     def _extract_next_continuation_data(cls, renderer):
 668         next_continuation = try_get(
 669             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 670                        lambda x: x['continuation']['reloadContinuationData']), dict)
 671         if not next_continuation:
 672             return
 673         continuation = next_continuation.get('continuation')
 674         if not continuation:
 675             return
 676         ctp = next_continuation.get('clickTrackingParams')
 677         return cls._build_api_continuation_query(continuation, ctp)
 678
 679     @classmethod
 680     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 681         if isinstance(continuation_ep, dict):
 682             continuation = try_get(
 683                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 684             if not continuation:
 685                 return
 686             ctp = continuation_ep.get('clickTrackingParams')
 687             return cls._build_api_continuation_query(continuation, ctp)
 688
 689     @classmethod
 690     def _extract_continuation(cls, renderer):
 691         next_continuation = cls._extract_next_continuation_data(renderer)
 692         if next_continuation:
 693             return next_continuation
 694
 695         contents = []
 696         for key in ('contents', 'items'):
 697             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 698
 699         for content in contents:
 700             if not isinstance(content, dict):
 701                 continue
 702             continuation_ep = try_get(
 703                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 704                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 705                 dict)
 706             continuation = cls._extract_continuation_ep_data(continuation_ep)
 707             if continuation:
 708                 return continuation
 709
 710     @classmethod
 711     def _extract_alerts(cls, data):
 712         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 713             if not isinstance(alert_dict, dict):
 714                 continue
 715             for alert in alert_dict.values():
 716                 alert_type = alert.get('type')
 717                 if not alert_type:
 718                     continue
 719                 message = cls._get_text(alert, 'text')
 720                 if message:
 721                     yield alert_type, message
 722
 723     def _report_alerts(self, alerts, expected=True, fatal=True):
 724         errors = []
 725         warnings = []
 726         for alert_type, alert_message in alerts:
 727             if alert_type.lower() == 'error' and fatal:
 728                 errors.append([alert_type, alert_message])
 729             else:
 730                 warnings.append([alert_type, alert_message])
 731
 732         for alert_type, alert_message in (warnings + errors[:-1]):
 733             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 734         if errors:
 735             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 736
 737     def _extract_and_report_alerts(self, data, *args, **kwargs):
 738         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 739
 740     def _extract_badges(self, renderer: dict):
 741         badges = set()
 742         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 743             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 744             if label:
 745                 badges.add(label.lower())
 746         return badges
 747
 748     @staticmethod
 749     def _get_text(data, *path_list, max_runs=None):
 750         for path in path_list or [None]:
 751             if path is None:
 752                 obj = [data]
 753             else:
 754                 obj = traverse_obj(data, path, default=[])
 755                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 756                     obj = [obj]
 757             for item in obj:
 758                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 759                 if text:
 760                     return text
 761                 runs = try_get(item, lambda x: x['runs'], list) or []
 762                 if not runs and isinstance(item, list):
 763                     runs = item
 764
 765                 runs = runs[:min(len(runs), max_runs or len(runs))]
 766                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 767                 if text:
 768                     return text
 769
 770     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 771                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 772                           default_client='web'):
 773         response = None
 774         last_error = None
 775         count = -1
 776         retries = self.get_param('extractor_retries', 3)
 777         if check_get_keys is None:
 778             check_get_keys = []
 779         while count < retries:
 780             count += 1
 781             if last_error:
 782                 self.report_warning('%s. Retrying ...' % last_error)
 783             try:
 784                 response = self._call_api(
 785                     ep=ep, fatal=True, headers=headers,
 786                     video_id=item_id, query=query,
 787                     context=self._extract_context(ytcfg, default_client),
 788                     api_key=self._extract_api_key(ytcfg, default_client),
 789                     api_hostname=api_hostname, default_client=default_client,
 790                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 791             except ExtractorError as e:
 792                 if isinstance(e.cause, network_exceptions):
 793                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 794                         e.cause.seek(0)
 795                         yt_error = try_get(
 796                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 797                             lambda x: x['error']['message'], compat_str)
 798                         if yt_error:
 799                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 800                     # Downloading page may result in intermittent 5xx HTTP error
 801                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 802                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 803                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 804                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 805                         last_error = error_to_compat_str(e.cause or e)
 806                         if count < retries:
 807                             continue
 808                 if fatal:
 809                     raise
 810                 else:
 811                     self.report_warning(error_to_compat_str(e))
 812                     return
 813
 814             else:
 815                 # Youtube may send alerts if there was an issue with the continuation page
 816                 try:
 817                     self._extract_and_report_alerts(response, expected=False)
 818                 except ExtractorError as e:
 819                     if fatal:
 820                         raise
 821                     self.report_warning(error_to_compat_str(e))
 822                     return
 823                 if not check_get_keys or dict_get(response, check_get_keys):
 824                     break
 825                 # Youtube sometimes sends incomplete data
 826                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 827                 last_error = 'Incomplete data received'
 828                 if count >= retries:
 829                     if fatal:
 830                         raise ExtractorError(last_error)
 831                     else:
 832                         self.report_warning(last_error)
 833                         return
 834         return response
 835
 836     @staticmethod
 837     def is_music_url(url):
 838         return re.match(r'https?://music\.youtube\.com/', url) is not None
 839
 840     def _extract_video(self, renderer):
 841         video_id = renderer.get('videoId')
 842         title = self._get_text(renderer, 'title')
 843         description = self._get_text(renderer, 'descriptionSnippet')
 844         duration = parse_duration(self._get_text(
 845             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 846         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 847         view_count = str_to_int(self._search_regex(
 848             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 849             'view count', default=None))
 850
 851         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 852
 853         return {
 854             '_type': 'url',
 855             'ie_key': YoutubeIE.ie_key(),
 856             'id': video_id,
 857             'url': video_id,
 858             'title': title,
 859             'description': description,
 860             'duration': duration,
 861             'view_count': view_count,
 862             'uploader': uploader,
 863         }
 864
 865
 866 class YoutubeIE(YoutubeBaseInfoExtractor):
 867     IE_DESC = 'YouTube.com'
 868     _INVIDIOUS_SITES = (
 869         # invidious-redirect websites
 870         r'(?:www\.)?redirect\.invidious\.io',
 871         r'(?:(?:www|dev)\.)?invidio\.us',
 872         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 873         r'(?:www\.)?invidious\.pussthecat\.org',
 874         r'(?:www\.)?invidious\.zee\.li',
 875         r'(?:www\.)?invidious\.ethibox\.fr',
 876         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 877         # youtube-dl invidious instances list
 878         r'(?:(?:www|no)\.)?invidiou\.sh',
 879         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 880         r'(?:www\.)?invidious\.kabi\.tk',
 881         r'(?:www\.)?invidious\.mastodon\.host',
 882         r'(?:www\.)?invidious\.zapashcanon\.fr',
 883         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 884         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 885         r'(?:www\.)?invidious\.himiko\.cloud',
 886         r'(?:www\.)?invidious\.reallyancient\.tech',
 887         r'(?:www\.)?invidious\.tube',
 888         r'(?:www\.)?invidiou\.site',
 889         r'(?:www\.)?invidious\.site',
 890         r'(?:www\.)?invidious\.xyz',
 891         r'(?:www\.)?invidious\.nixnet\.xyz',
 892         r'(?:www\.)?invidious\.048596\.xyz',
 893         r'(?:www\.)?invidious\.drycat\.fr',
 894         r'(?:www\.)?inv\.skyn3t\.in',
 895         r'(?:www\.)?tube\.poal\.co',
 896         r'(?:www\.)?tube\.connect\.cafe',
 897         r'(?:www\.)?vid\.wxzm\.sx',
 898         r'(?:www\.)?vid\.mint\.lgbt',
 899         r'(?:www\.)?vid\.puffyan\.us',
 900         r'(?:www\.)?yewtu\.be',
 901         r'(?:www\.)?yt\.elukerio\.org',
 902         r'(?:www\.)?yt\.lelux\.fi',
 903         r'(?:www\.)?invidious\.ggc-project\.de',
 904         r'(?:www\.)?yt\.maisputain\.ovh',
 905         r'(?:www\.)?ytprivate\.com',
 906         r'(?:www\.)?invidious\.13ad\.de',
 907         r'(?:www\.)?invidious\.toot\.koeln',
 908         r'(?:www\.)?invidious\.fdn\.fr',
 909         r'(?:www\.)?watch\.nettohikari\.com',
 910         r'(?:www\.)?invidious\.namazso\.eu',
 911         r'(?:www\.)?invidious\.silkky\.cloud',
 912         r'(?:www\.)?invidious\.exonip\.de',
 913         r'(?:www\.)?invidious\.riverside\.rocks',
 914         r'(?:www\.)?invidious\.blamefran\.net',
 915         r'(?:www\.)?invidious\.moomoo\.de',
 916         r'(?:www\.)?ytb\.trom\.tf',
 917         r'(?:www\.)?yt\.cyberhost\.uk',
 918         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 919         r'(?:www\.)?qklhadlycap4cnod\.onion',
 920         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 921         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 922         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 923         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 924         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 925         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 926         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 927         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 928         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 929         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 930     )
 931     _VALID_URL = r"""(?x)^
 932                      (
 933                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 934                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 935                             (?:www\.)?deturl\.com/www\.youtube\.com|
 936                             (?:www\.)?pwnyoutube\.com|
 937                             (?:www\.)?hooktube\.com|
 938                             (?:www\.)?yourepeat\.com|
 939                             tube\.majestyc\.net|
 940                             %(invidious)s|
 941                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 942                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 943                          (?:                                                  # the various things that can precede the ID:
 944                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 945                              |(?:                                             # or the v= param in all its forms
 946                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 947                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 948                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 949                                  v=
 950                              )
 951                          ))
 952                          |(?:
 953                             youtu\.be|                                        # just youtu.be/xxxx
 954                             vid\.plus|                                        # or vid.plus/xxxx
 955                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 956                             %(invidious)s
 957                          )/
 958                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 959                          )
 960                      )?                                                       # all until now is optional -> you can pass the naked ID
 961                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 962                      (?(1).+)?                                                # if we found the ID, everything can follow
 963                      (?:\#|$)""" % {
 964         'invidious': '|'.join(_INVIDIOUS_SITES),
 965     }
 966     _PLAYER_INFO_RE = (
 967         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 968         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 969         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 970     )
 971     _formats = {
 972         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 973         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 974         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 975         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 976         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 977         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 978         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 979         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 980         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 981         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 982         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 983         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 984         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 985         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 986         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 987         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 988         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 989         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 990
 991
 992         # 3D videos
 993         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 994         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 995         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 996         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 997         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 998         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 999         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1000
1001         # Apple HTTP Live Streaming
1002         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1003         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1004         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1005         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1006         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1007         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1008         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1009         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1010
1011         # DASH mp4 video
1012         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1013         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1014         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1015         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1016         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1017         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1018         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1019         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1020         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1021         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1022         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1023         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1024
1025         # Dash mp4 audio
1026         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1027         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1028         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1029         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1030         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1031         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1032         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1033
1034         # Dash webm
1035         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1036         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1037         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1038         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1039         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1040         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1041         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1042         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1043         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1044         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1045         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1046         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1047         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1048         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1049         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1050         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1051         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1052         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1053         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1054         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1055         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1056         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1057
1058         # Dash webm audio
1059         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1060         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1061
1062         # Dash webm audio with opus inside
1063         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1064         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1065         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1066
1067         # RTMP (unnamed)
1068         '_rtmp': {'protocol': 'rtmp'},
1069
1070         # av01 video only formats sometimes served with "unknown" codecs
1071         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1072         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1073         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1074         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1075         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1076         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1077         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1078         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1079     }
1080     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1081
1082     _GEO_BYPASS = False
1083
1084     IE_NAME = 'youtube'
1085     _TESTS = [
1086         {
1087             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1088             'info_dict': {
1089                 'id': 'BaW_jenozKc',
1090                 'ext': 'mp4',
1091                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1092                 'uploader': 'Philipp Hagemeister',
1093                 'uploader_id': 'phihag',
1094                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1095                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1096                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1097                 'upload_date': '20121002',
1098                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1099                 'categories': ['Science & Technology'],
1100                 'tags': ['youtube-dl'],
1101                 'duration': 10,
1102                 'view_count': int,
1103                 'like_count': int,
1104                 'dislike_count': int,
1105                 'start_time': 1,
1106                 'end_time': 9,
1107             }
1108         },
1109         {
1110             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1111             'note': 'Embed-only video (#1746)',
1112             'info_dict': {
1113                 'id': 'yZIXLfi8CZQ',
1114                 'ext': 'mp4',
1115                 'upload_date': '20120608',
1116                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1117                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1118                 'uploader': 'SET India',
1119                 'uploader_id': 'setindia',
1120                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1121                 'age_limit': 18,
1122             },
1123             'skip': 'Private video',
1124         },
1125         {
1126             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1127             'note': 'Use the first video ID in the URL',
1128             'info_dict': {
1129                 'id': 'BaW_jenozKc',
1130                 'ext': 'mp4',
1131                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1132                 'uploader': 'Philipp Hagemeister',
1133                 'uploader_id': 'phihag',
1134                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1135                 'upload_date': '20121002',
1136                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1137                 'categories': ['Science & Technology'],
1138                 'tags': ['youtube-dl'],
1139                 'duration': 10,
1140                 'view_count': int,
1141                 'like_count': int,
1142                 'dislike_count': int,
1143             },
1144             'params': {
1145                 'skip_download': True,
1146             },
1147         },
1148         {
1149             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1150             'note': '256k DASH audio (format 141) via DASH manifest',
1151             'info_dict': {
1152                 'id': 'a9LDPn-MO4I',
1153                 'ext': 'm4a',
1154                 'upload_date': '20121002',
1155                 'uploader_id': '8KVIDEO',
1156                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1157                 'description': '',
1158                 'uploader': '8KVIDEO',
1159                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1160             },
1161             'params': {
1162                 'youtube_include_dash_manifest': True,
1163                 'format': '141',
1164             },
1165             'skip': 'format 141 not served anymore',
1166         },
1167         # DASH manifest with encrypted signature
1168         {
1169             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1170             'info_dict': {
1171                 'id': 'IB3lcPjvWLA',
1172                 'ext': 'm4a',
1173                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1174                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1175                 'duration': 244,
1176                 'uploader': 'AfrojackVEVO',
1177                 'uploader_id': 'AfrojackVEVO',
1178                 'upload_date': '20131011',
1179                 'abr': 129.495,
1180             },
1181             'params': {
1182                 'youtube_include_dash_manifest': True,
1183                 'format': '141/bestaudio[ext=m4a]',
1184             },
1185         },
1186         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1187         {
1188             'note': 'Embed allowed age-gate video',
1189             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1190             'info_dict': {
1191                 'id': 'HtVdAasjOgU',
1192                 'ext': 'mp4',
1193                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1194                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1195                 'duration': 142,
1196                 'uploader': 'The Witcher',
1197                 'uploader_id': 'WitcherGame',
1198                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1199                 'upload_date': '20140605',
1200                 'age_limit': 18,
1201             },
1202         },
1203         {
1204             'note': 'Age-gate video with embed allowed in public site',
1205             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1206             'info_dict': {
1207                 'id': 'HsUATh_Nc2U',
1208                 'ext': 'mp4',
1209                 'title': 'Godzilla 2 (Official Video)',
1210                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1211                 'upload_date': '20200408',
1212                 'uploader_id': 'FlyingKitty900',
1213                 'uploader': 'FlyingKitty',
1214                 'age_limit': 18,
1215             },
1216         },
1217         {
1218             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1219             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1220             'info_dict': {
1221                 'id': 'Tq92D6wQ1mg',
1222                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1223                 'ext': 'mp4',
1224                 'upload_date': '20191227',
1225                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1226                 'uploader': 'Projekt Melody',
1227                 'description': 'md5:17eccca93a786d51bc67646756894066',
1228                 'age_limit': 18,
1229             },
1230         },
1231         {
1232             'note': 'Non-Agegated non-embeddable video',
1233             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1234             'info_dict': {
1235                 'id': 'MeJVWBSsPAY',
1236                 'ext': 'mp4',
1237                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1238                 'uploader': 'Herr Lurik',
1239                 'uploader_id': 'st3in234',
1240                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1241                 'upload_date': '20130730',
1242             },
1243         },
1244         {
1245             'note': 'Non-bypassable age-gated video',
1246             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1247             'only_matching': True,
1248         },
1249         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1250         # YouTube Red ad is not captured for creator
1251         {
1252             'url': '__2ABJjxzNo',
1253             'info_dict': {
1254                 'id': '__2ABJjxzNo',
1255                 'ext': 'mp4',
1256                 'duration': 266,
1257                 'upload_date': '20100430',
1258                 'uploader_id': 'deadmau5',
1259                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1260                 'creator': 'deadmau5',
1261                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1262                 'uploader': 'deadmau5',
1263                 'title': 'Deadmau5 - Some Chords (HD)',
1264                 'alt_title': 'Some Chords',
1265             },
1266             'expected_warnings': [
1267                 'DASH manifest missing',
1268             ]
1269         },
1270         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1271         {
1272             'url': 'lqQg6PlCWgI',
1273             'info_dict': {
1274                 'id': 'lqQg6PlCWgI',
1275                 'ext': 'mp4',
1276                 'duration': 6085,
1277                 'upload_date': '20150827',
1278                 'uploader_id': 'olympic',
1279                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1280                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1281                 'uploader': 'Olympics',
1282                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1283             },
1284             'params': {
1285                 'skip_download': 'requires avconv',
1286             }
1287         },
1288         # Non-square pixels
1289         {
1290             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1291             'info_dict': {
1292                 'id': '_b-2C3KPAM0',
1293                 'ext': 'mp4',
1294                 'stretched_ratio': 16 / 9.,
1295                 'duration': 85,
1296                 'upload_date': '20110310',
1297                 'uploader_id': 'AllenMeow',
1298                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1299                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1300                 'uploader': '孫ᄋᄅ',
1301                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1302             },
1303         },
1304         # url_encoded_fmt_stream_map is empty string
1305         {
1306             'url': 'qEJwOuvDf7I',
1307             'info_dict': {
1308                 'id': 'qEJwOuvDf7I',
1309                 'ext': 'webm',
1310                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1311                 'description': '',
1312                 'upload_date': '20150404',
1313                 'uploader_id': 'spbelect',
1314                 'uploader': 'Наблюдатели Петербурга',
1315             },
1316             'params': {
1317                 'skip_download': 'requires avconv',
1318             },
1319             'skip': 'This live event has ended.',
1320         },
1321         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1322         {
1323             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1324             'info_dict': {
1325                 'id': 'FIl7x6_3R5Y',
1326                 'ext': 'webm',
1327                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1328                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1329                 'duration': 220,
1330                 'upload_date': '20150625',
1331                 'uploader_id': 'dorappi2000',
1332                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1333                 'uploader': 'dorappi2000',
1334                 'formats': 'mincount:31',
1335             },
1336             'skip': 'not actual anymore',
1337         },
1338         # DASH manifest with segment_list
1339         {
1340             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1341             'md5': '8ce563a1d667b599d21064e982ab9e31',
1342             'info_dict': {
1343                 'id': 'CsmdDsKjzN8',
1344                 'ext': 'mp4',
1345                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1346                 'uploader': 'Airtek',
1347                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1348                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1349                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1350             },
1351             'params': {
1352                 'youtube_include_dash_manifest': True,
1353                 'format': '135',  # bestvideo
1354             },
1355             'skip': 'This live event has ended.',
1356         },
1357         {
1358             # Multifeed videos (multiple cameras), URL is for Main Camera
1359             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1360             'info_dict': {
1361                 'id': 'jvGDaLqkpTg',
1362                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1363                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1364             },
1365             'playlist': [{
1366                 'info_dict': {
1367                     'id': 'jvGDaLqkpTg',
1368                     'ext': 'mp4',
1369                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1370                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1371                     'duration': 10643,
1372                     'upload_date': '20161111',
1373                     'uploader': 'Team PGP',
1374                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1375                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1376                 },
1377             }, {
1378                 'info_dict': {
1379                     'id': '3AKt1R1aDnw',
1380                     'ext': 'mp4',
1381                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1382                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1383                     'duration': 10991,
1384                     'upload_date': '20161111',
1385                     'uploader': 'Team PGP',
1386                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1387                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1388                 },
1389             }, {
1390                 'info_dict': {
1391                     'id': 'RtAMM00gpVc',
1392                     'ext': 'mp4',
1393                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1394                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1395                     'duration': 10995,
1396                     'upload_date': '20161111',
1397                     'uploader': 'Team PGP',
1398                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1399                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1400                 },
1401             }, {
1402                 'info_dict': {
1403                     'id': '6N2fdlP3C5U',
1404                     'ext': 'mp4',
1405                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1406                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1407                     'duration': 10990,
1408                     'upload_date': '20161111',
1409                     'uploader': 'Team PGP',
1410                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1411                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1412                 },
1413             }],
1414             'params': {
1415                 'skip_download': True,
1416             },
1417             'skip': 'Not multifeed anymore',
1418         },
1419         {
1420             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1421             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1422             'info_dict': {
1423                 'id': 'gVfLd0zydlo',
1424                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1425             },
1426             'playlist_count': 2,
1427             'skip': 'Not multifeed anymore',
1428         },
1429         {
1430             'url': 'https://vid.plus/FlRa-iH7PGw',
1431             'only_matching': True,
1432         },
1433         {
1434             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1435             'only_matching': True,
1436         },
1437         {
1438             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1439             # Also tests cut-off URL expansion in video description (see
1440             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1441             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1442             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1443             'info_dict': {
1444                 'id': 'lsguqyKfVQg',
1445                 'ext': 'mp4',
1446                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1447                 'alt_title': 'Dark Walk',
1448                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1449                 'duration': 133,
1450                 'upload_date': '20151119',
1451                 'uploader_id': 'IronSoulElf',
1452                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1453                 'uploader': 'IronSoulElf',
1454                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1455                 'track': 'Dark Walk',
1456                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1457                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1458             },
1459             'params': {
1460                 'skip_download': True,
1461             },
1462         },
1463         {
1464             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1465             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1466             'only_matching': True,
1467         },
1468         {
1469             # Video with yt:stretch=17:0
1470             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1471             'info_dict': {
1472                 'id': 'Q39EVAstoRM',
1473                 'ext': 'mp4',
1474                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1475                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1476                 'upload_date': '20151107',
1477                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1478                 'uploader': 'CH GAMER DROID',
1479             },
1480             'params': {
1481                 'skip_download': True,
1482             },
1483             'skip': 'This video does not exist.',
1484         },
1485         {
1486             # Video with incomplete 'yt:stretch=16:'
1487             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1488             'only_matching': True,
1489         },
1490         {
1491             # Video licensed under Creative Commons
1492             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1493             'info_dict': {
1494                 'id': 'M4gD1WSo5mA',
1495                 'ext': 'mp4',
1496                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1497                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1498                 'duration': 721,
1499                 'upload_date': '20150127',
1500                 'uploader_id': 'BerkmanCenter',
1501                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1502                 'uploader': 'The Berkman Klein Center for Internet & Society',
1503                 'license': 'Creative Commons Attribution license (reuse allowed)',
1504             },
1505             'params': {
1506                 'skip_download': True,
1507             },
1508         },
1509         {
1510             # Channel-like uploader_url
1511             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1512             'info_dict': {
1513                 'id': 'eQcmzGIKrzg',
1514                 'ext': 'mp4',
1515                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1516                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1517                 'duration': 4060,
1518                 'upload_date': '20151119',
1519                 'uploader': 'Bernie Sanders',
1520                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1521                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1522                 'license': 'Creative Commons Attribution license (reuse allowed)',
1523             },
1524             'params': {
1525                 'skip_download': True,
1526             },
1527         },
1528         {
1529             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1530             'only_matching': True,
1531         },
1532         {
1533             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1534             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1535             'only_matching': True,
1536         },
1537         {
1538             # Rental video preview
1539             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1540             'info_dict': {
1541                 'id': 'uGpuVWrhIzE',
1542                 'ext': 'mp4',
1543                 'title': 'Piku - Trailer',
1544                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1545                 'upload_date': '20150811',
1546                 'uploader': 'FlixMatrix',
1547                 'uploader_id': 'FlixMatrixKaravan',
1548                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1549                 'license': 'Standard YouTube License',
1550             },
1551             'params': {
1552                 'skip_download': True,
1553             },
1554             'skip': 'This video is not available.',
1555         },
1556         {
1557             # YouTube Red video with episode data
1558             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1559             'info_dict': {
1560                 'id': 'iqKdEhx-dD4',
1561                 'ext': 'mp4',
1562                 'title': 'Isolation - Mind Field (Ep 1)',
1563                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1564                 'duration': 2085,
1565                 'upload_date': '20170118',
1566                 'uploader': 'Vsauce',
1567                 'uploader_id': 'Vsauce',
1568                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1569                 'series': 'Mind Field',
1570                 'season_number': 1,
1571                 'episode_number': 1,
1572             },
1573             'params': {
1574                 'skip_download': True,
1575             },
1576             'expected_warnings': [
1577                 'Skipping DASH manifest',
1578             ],
1579         },
1580         {
1581             # The following content has been identified by the YouTube community
1582             # as inappropriate or offensive to some audiences.
1583             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1584             'info_dict': {
1585                 'id': '6SJNVb0GnPI',
1586                 'ext': 'mp4',
1587                 'title': 'Race Differences in Intelligence',
1588                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1589                 'duration': 965,
1590                 'upload_date': '20140124',
1591                 'uploader': 'New Century Foundation',
1592                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1593                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1594             },
1595             'params': {
1596                 'skip_download': True,
1597             },
1598             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1599         },
1600         {
1601             # itag 212
1602             'url': '1t24XAntNCY',
1603             'only_matching': True,
1604         },
1605         {
1606             # geo restricted to JP
1607             'url': 'sJL6WA-aGkQ',
1608             'only_matching': True,
1609         },
1610         {
1611             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1612             'only_matching': True,
1613         },
1614         {
1615             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1616             'only_matching': True,
1617         },
1618         {
1619             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1620             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1621             'only_matching': True,
1622         },
1623         {
1624             # DRM protected
1625             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1626             'only_matching': True,
1627         },
1628         {
1629             # Video with unsupported adaptive stream type formats
1630             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1631             'info_dict': {
1632                 'id': 'Z4Vy8R84T1U',
1633                 'ext': 'mp4',
1634                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1635                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1636                 'duration': 433,
1637                 'upload_date': '20130923',
1638                 'uploader': 'Amelia Putri Harwita',
1639                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1640                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1641                 'formats': 'maxcount:10',
1642             },
1643             'params': {
1644                 'skip_download': True,
1645                 'youtube_include_dash_manifest': False,
1646             },
1647             'skip': 'not actual anymore',
1648         },
1649         {
1650             # Youtube Music Auto-generated description
1651             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1652             'info_dict': {
1653                 'id': 'MgNrAu2pzNs',
1654                 'ext': 'mp4',
1655                 'title': 'Voyeur Girl',
1656                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1657                 'upload_date': '20190312',
1658                 'uploader': 'Stephen - Topic',
1659                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1660                 'artist': 'Stephen',
1661                 'track': 'Voyeur Girl',
1662                 'album': 'it\'s too much love to know my dear',
1663                 'release_date': '20190313',
1664                 'release_year': 2019,
1665             },
1666             'params': {
1667                 'skip_download': True,
1668             },
1669         },
1670         {
1671             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1672             'only_matching': True,
1673         },
1674         {
1675             # invalid -> valid video id redirection
1676             'url': 'DJztXj2GPfl',
1677             'info_dict': {
1678                 'id': 'DJztXj2GPfk',
1679                 'ext': 'mp4',
1680                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1681                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1682                 'upload_date': '20090125',
1683                 'uploader': 'Prochorowka',
1684                 'uploader_id': 'Prochorowka',
1685                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1686                 'artist': 'Panjabi MC',
1687                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1688                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1689             },
1690             'params': {
1691                 'skip_download': True,
1692             },
1693             'skip': 'Video unavailable',
1694         },
1695         {
1696             # empty description results in an empty string
1697             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1698             'info_dict': {
1699                 'id': 'x41yOUIvK2k',
1700                 'ext': 'mp4',
1701                 'title': 'IMG 3456',
1702                 'description': '',
1703                 'upload_date': '20170613',
1704                 'uploader_id': 'ElevageOrVert',
1705                 'uploader': 'ElevageOrVert',
1706             },
1707             'params': {
1708                 'skip_download': True,
1709             },
1710         },
1711         {
1712             # with '};' inside yt initial data (see [1])
1713             # see [2] for an example with '};' inside ytInitialPlayerResponse
1714             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1715             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1716             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1717             'info_dict': {
1718                 'id': 'CHqg6qOn4no',
1719                 'ext': 'mp4',
1720                 'title': 'Part 77   Sort a list of simple types in c#',
1721                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1722                 'upload_date': '20130831',
1723                 'uploader_id': 'kudvenkat',
1724                 'uploader': 'kudvenkat',
1725             },
1726             'params': {
1727                 'skip_download': True,
1728             },
1729         },
1730         {
1731             # another example of '};' in ytInitialData
1732             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1733             'only_matching': True,
1734         },
1735         {
1736             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1737             'only_matching': True,
1738         },
1739         {
1740             # https://github.com/ytdl-org/youtube-dl/pull/28094
1741             'url': 'OtqTfy26tG0',
1742             'info_dict': {
1743                 'id': 'OtqTfy26tG0',
1744                 'ext': 'mp4',
1745                 'title': 'Burn Out',
1746                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1747                 'upload_date': '20141120',
1748                 'uploader': 'The Cinematic Orchestra - Topic',
1749                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1750                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1751                 'artist': 'The Cinematic Orchestra',
1752                 'track': 'Burn Out',
1753                 'album': 'Every Day',
1754                 'release_data': None,
1755                 'release_year': None,
1756             },
1757             'params': {
1758                 'skip_download': True,
1759             },
1760         },
1761         {
1762             # controversial video, only works with bpctr when authenticated with cookies
1763             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1764             'only_matching': True,
1765         },
1766         {
1767             # controversial video, requires bpctr/contentCheckOk
1768             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1769             'info_dict': {
1770                 'id': 'SZJvDhaSDnc',
1771                 'ext': 'mp4',
1772                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1773                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1774                 'uploader': 'CBS This Morning',
1775                 'uploader_id': 'CBSThisMorning',
1776                 'upload_date': '20140716',
1777                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1778             }
1779         },
1780         {
1781             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1782             'url': 'cBvYw8_A0vQ',
1783             'info_dict': {
1784                 'id': 'cBvYw8_A0vQ',
1785                 'ext': 'mp4',
1786                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1787                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1788                 'upload_date': '20201120',
1789                 'uploader': 'Walk around Japan',
1790                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1791                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1792             },
1793             'params': {
1794                 'skip_download': True,
1795             },
1796         }, {
1797             # Has multiple audio streams
1798             'url': 'WaOKSUlf4TM',
1799             'only_matching': True
1800         }, {
1801             # Requires Premium: has format 141 when requested using YTM url
1802             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1803             'only_matching': True
1804         }, {
1805             # multiple subtitles with same lang_code
1806             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1807             'only_matching': True,
1808         }, {
1809             # Force use android client fallback
1810             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1811             'info_dict': {
1812                 'id': 'YOelRv7fMxY',
1813                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1814                 'ext': '3gp',
1815                 'upload_date': '20210624',
1816                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1817                 'uploader': 'colinfurze',
1818                 'uploader_id': 'colinfurze',
1819                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1820                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1821             },
1822             'params': {
1823                 'format': '17',  # 3gp format available on android
1824                 'extractor_args': {'youtube': {'player_client': ['android']}},
1825             },
1826         },
1827         {
1828             # Skip download of additional client configs (remix client config in this case)
1829             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1830             'only_matching': True,
1831             'params': {
1832                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1833             },
1834         }, {
1835             # shorts
1836             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1837             'only_matching': True,
1838         },
1839     ]
1840
1841     @classmethod
1842     def suitable(cls, url):
1843         from ..utils import parse_qs
1844
1845         qs = parse_qs(url)
1846         if qs.get('list', [None])[0]:
1847             return False
1848         return super(YoutubeIE, cls).suitable(url)
1849
1850     def __init__(self, *args, **kwargs):
1851         super(YoutubeIE, self).__init__(*args, **kwargs)
1852         self._code_cache = {}
1853         self._player_cache = {}
1854
1855     def _extract_player_url(self, ytcfg=None, webpage=None):
1856         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1857         if not player_url and webpage:
1858             player_url = self._search_regex(
1859                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1860                 webpage, 'player URL', fatal=False)
1861         if not player_url:
1862             return None
1863         if player_url.startswith('//'):
1864             player_url = 'https:' + player_url
1865         elif not re.match(r'https?://', player_url):
1866             player_url = compat_urlparse.urljoin(
1867                 'https://www.youtube.com', player_url)
1868         return player_url
1869
1870     def _signature_cache_id(self, example_sig):
1871         """ Return a string representation of a signature """
1872         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1873
1874     @classmethod
1875     def _extract_player_info(cls, player_url):
1876         for player_re in cls._PLAYER_INFO_RE:
1877             id_m = re.search(player_re, player_url)
1878             if id_m:
1879                 break
1880         else:
1881             raise ExtractorError('Cannot identify player %r' % player_url)
1882         return id_m.group('id')
1883
1884     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1885         player_id = self._extract_player_info(player_url)
1886         if player_id not in self._code_cache:
1887             self._code_cache[player_id] = self._download_webpage(
1888                 player_url, video_id, fatal=fatal,
1889                 note='Downloading player ' + player_id,
1890                 errnote='Download of %s failed' % player_url)
1891         return player_id in self._code_cache
1892
1893     def _extract_signature_function(self, video_id, player_url, example_sig):
1894         player_id = self._extract_player_info(player_url)
1895
1896         # Read from filesystem cache
1897         func_id = 'js_%s_%s' % (
1898             player_id, self._signature_cache_id(example_sig))
1899         assert os.path.basename(func_id) == func_id
1900
1901         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1902         if cache_spec is not None:
1903             return lambda s: ''.join(s[i] for i in cache_spec)
1904
1905         if self._load_player(video_id, player_url):
1906             code = self._code_cache[player_id]
1907             res = self._parse_sig_js(code)
1908
1909             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1910             cache_res = res(test_string)
1911             cache_spec = [ord(c) for c in cache_res]
1912
1913             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1914             return res
1915
1916     def _print_sig_code(self, func, example_sig):
1917         def gen_sig_code(idxs):
1918             def _genslice(start, end, step):
1919                 starts = '' if start == 0 else str(start)
1920                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1921                 steps = '' if step == 1 else (':%d' % step)
1922                 return 's[%s%s%s]' % (starts, ends, steps)
1923
1924             step = None
1925             # Quelch pyflakes warnings - start will be set when step is set
1926             start = '(Never used)'
1927             for i, prev in zip(idxs[1:], idxs[:-1]):
1928                 if step is not None:
1929                     if i - prev == step:
1930                         continue
1931                     yield _genslice(start, prev, step)
1932                     step = None
1933                     continue
1934                 if i - prev in [-1, 1]:
1935                     step = i - prev
1936                     start = prev
1937                     continue
1938                 else:
1939                     yield 's[%d]' % prev
1940             if step is None:
1941                 yield 's[%d]' % i
1942             else:
1943                 yield _genslice(start, i, step)
1944
1945         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1946         cache_res = func(test_string)
1947         cache_spec = [ord(c) for c in cache_res]
1948         expr_code = ' + '.join(gen_sig_code(cache_spec))
1949         signature_id_tuple = '(%s)' % (
1950             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1951         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1952                 '    return %s\n') % (signature_id_tuple, expr_code)
1953         self.to_screen('Extracted signature function:\n' + code)
1954
1955     def _parse_sig_js(self, jscode):
1956         funcname = self._search_regex(
1957             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1958              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1959              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1960              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1961              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1962              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1963              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1964              # Obsolete patterns
1965              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1966              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1967              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1968              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1969              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1970              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1971              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1972              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1973             jscode, 'Initial JS player signature function name', group='sig')
1974
1975         jsi = JSInterpreter(jscode)
1976         initial_function = jsi.extract_function(funcname)
1977         return lambda s: initial_function([s])
1978
1979     def _decrypt_signature(self, s, video_id, player_url):
1980         """Turn the encrypted s field into a working signature"""
1981
1982         if player_url is None:
1983             raise ExtractorError('Cannot decrypt signature without player_url')
1984
1985         try:
1986             player_id = (player_url, self._signature_cache_id(s))
1987             if player_id not in self._player_cache:
1988                 func = self._extract_signature_function(
1989                     video_id, player_url, s
1990                 )
1991                 self._player_cache[player_id] = func
1992             func = self._player_cache[player_id]
1993             if self.get_param('youtube_print_sig_code'):
1994                 self._print_sig_code(func, s)
1995             return func(s)
1996         except Exception as e:
1997             tb = traceback.format_exc()
1998             raise ExtractorError(
1999                 'Signature extraction failed: ' + tb, cause=e)
2000
2001     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2002         """
2003         Extract signatureTimestamp (sts)
2004         Required to tell API what sig/player version is in use.
2005         """
2006         sts = None
2007         if isinstance(ytcfg, dict):
2008             sts = int_or_none(ytcfg.get('STS'))
2009
2010         if not sts:
2011             # Attempt to extract from player
2012             if player_url is None:
2013                 error_msg = 'Cannot extract signature timestamp without player_url.'
2014                 if fatal:
2015                     raise ExtractorError(error_msg)
2016                 self.report_warning(error_msg)
2017                 return
2018             if self._load_player(video_id, player_url, fatal=fatal):
2019                 player_id = self._extract_player_info(player_url)
2020                 code = self._code_cache[player_id]
2021                 sts = int_or_none(self._search_regex(
2022                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2023                     'JS player signature timestamp', group='sts', fatal=fatal))
2024         return sts
2025
2026     def _mark_watched(self, video_id, player_responses):
2027         playback_url = traverse_obj(
2028             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2029             expected_type=url_or_none, get_all=False)
2030         if not playback_url:
2031             self.report_warning('Unable to mark watched')
2032             return
2033         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2034         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2035
2036         # cpn generation algorithm is reverse engineered from base.js.
2037         # In fact it works even with dummy cpn.
2038         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2039         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2040
2041         qs.update({
2042             'ver': ['2'],
2043             'cpn': [cpn],
2044         })
2045         playback_url = compat_urlparse.urlunparse(
2046             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2047
2048         self._download_webpage(
2049             playback_url, video_id, 'Marking watched',
2050             'Unable to mark watched', fatal=False)
2051
2052     @staticmethod
2053     def _extract_urls(webpage):
2054         # Embedded YouTube player
2055         entries = [
2056             unescapeHTML(mobj.group('url'))
2057             for mobj in re.finditer(r'''(?x)
2058             (?:
2059                 <iframe[^>]+?src=|
2060                 data-video-url=|
2061                 <embed[^>]+?src=|
2062                 embedSWF\(?:\s*|
2063                 <object[^>]+data=|
2064                 new\s+SWFObject\(
2065             )
2066             (["\'])
2067                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2068                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2069             \1''', webpage)]
2070
2071         # lazyYT YouTube embed
2072         entries.extend(list(map(
2073             unescapeHTML,
2074             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2075
2076         # Wordpress "YouTube Video Importer" plugin
2077         matches = re.findall(r'''(?x)<div[^>]+
2078             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2079             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2080         entries.extend(m[-1] for m in matches)
2081
2082         return entries
2083
2084     @staticmethod
2085     def _extract_url(webpage):
2086         urls = YoutubeIE._extract_urls(webpage)
2087         return urls[0] if urls else None
2088
2089     @classmethod
2090     def extract_id(cls, url):
2091         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2092         if mobj is None:
2093             raise ExtractorError('Invalid URL: %s' % url)
2094         return mobj.group('id')
2095
2096     def _extract_chapters_from_json(self, data, duration):
2097         chapter_list = traverse_obj(
2098             data, (
2099                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2100                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2101             ), expected_type=list)
2102
2103         return self._extract_chapters(
2104             chapter_list,
2105             chapter_time=lambda chapter: float_or_none(
2106                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2107             chapter_title=lambda chapter: traverse_obj(
2108                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2109             duration=duration)
2110
2111     def _extract_chapters_from_engagement_panel(self, data, duration):
2112         content_list = traverse_obj(
2113             data,
2114             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2115             expected_type=list, default=[])
2116         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2117         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2118
2119         return next((
2120             filter(None, (
2121                 self._extract_chapters(
2122                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2123                     chapter_time, chapter_title, duration)
2124                 for contents in content_list
2125             ))), [])
2126
2127     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2128         chapters = []
2129         last_chapter = {'start_time': 0}
2130         for idx, chapter in enumerate(chapter_list or []):
2131             title = chapter_title(chapter)
2132             start_time = chapter_time(chapter)
2133             if start_time is None:
2134                 continue
2135             last_chapter['end_time'] = start_time
2136             if start_time < last_chapter['start_time']:
2137                 if idx == 1:
2138                     chapters.pop()
2139                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2140                 else:
2141                     self.report_warning(f'Invalid start time for chapter "{title}"')
2142                     continue
2143             last_chapter = {'start_time': start_time, 'title': title}
2144             chapters.append(last_chapter)
2145         last_chapter['end_time'] = duration
2146         return chapters
2147
2148     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2149         return self._parse_json(self._search_regex(
2150             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2151              regex), webpage, name, default='{}'), video_id, fatal=False)
2152
2153     @staticmethod
2154     def parse_time_text(time_text):
2155         """
2156         Parse the comment time text
2157         time_text is in the format 'X units ago (edited)'
2158         """
2159         time_text_split = time_text.split(' ')
2160         if len(time_text_split) >= 3:
2161             try:
2162                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2163             except ValueError:
2164                 return None
2165
2166     def _extract_comment(self, comment_renderer, parent=None):
2167         comment_id = comment_renderer.get('commentId')
2168         if not comment_id:
2169             return
2170
2171         text = self._get_text(comment_renderer, 'contentText')
2172
2173         # note: timestamp is an estimate calculated from the current time and time_text
2174         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2175         time_text_dt = self.parse_time_text(time_text)
2176         if isinstance(time_text_dt, datetime.datetime):
2177             timestamp = calendar.timegm(time_text_dt.timetuple())
2178         author = self._get_text(comment_renderer, 'authorText')
2179         author_id = try_get(comment_renderer,
2180                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2181
2182         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2183                                                        lambda x: x['likeCount']), compat_str)) or 0
2184         author_thumbnail = try_get(comment_renderer,
2185                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2186
2187         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2188         is_favorited = 'creatorHeart' in (try_get(
2189             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2190         return {
2191             'id': comment_id,
2192             'text': text,
2193             'timestamp': timestamp,
2194             'time_text': time_text,
2195             'like_count': votes,
2196             'is_favorited': is_favorited,
2197             'author': author,
2198             'author_id': author_id,
2199             'author_thumbnail': author_thumbnail,
2200             'author_is_uploader': author_is_uploader,
2201             'parent': parent or 'root'
2202         }
2203
2204     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2205                          ytcfg, video_id, parent=None, comment_counts=None):
2206
2207         def extract_header(contents):
2208             _total_comments = 0
2209             _continuation = None
2210             for content in contents:
2211                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2212                 expected_comment_count = parse_count(self._get_text(
2213                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2214
2215                 if expected_comment_count:
2216                     comment_counts[1] = expected_comment_count
2217                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2218                     _total_comments = comment_counts[1]
2219                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2220                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2221
2222                 sort_menu_item = try_get(
2223                     comments_header_renderer,
2224                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2225                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2226
2227                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2228                 if not _continuation:
2229                     continue
2230
2231                 sort_text = sort_menu_item.get('title')
2232                 if isinstance(sort_text, compat_str):
2233                     sort_text = sort_text.lower()
2234                 else:
2235                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2236                 self.to_screen('Sorting comments by %s' % sort_text)
2237                 break
2238             return _total_comments, _continuation
2239
2240         def extract_thread(contents):
2241             if not parent:
2242                 comment_counts[2] = 0
2243             for content in contents:
2244                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2245                 comment_renderer = try_get(
2246                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2247                     content, (lambda x: x['commentRenderer'], dict))
2248
2249                 if not comment_renderer:
2250                     continue
2251                 comment = self._extract_comment(comment_renderer, parent)
2252                 if not comment:
2253                     continue
2254                 comment_counts[0] += 1
2255                 yield comment
2256                 # Attempt to get the replies
2257                 comment_replies_renderer = try_get(
2258                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2259
2260                 if comment_replies_renderer:
2261                     comment_counts[2] += 1
2262                     comment_entries_iter = self._comment_entries(
2263                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2264                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2265
2266                     for reply_comment in comment_entries_iter:
2267                         yield reply_comment
2268
2269         # YouTube comments have a max depth of 2
2270         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2271         if max_depth == 1 and parent:
2272             return
2273         if not comment_counts:
2274             # comment so far, est. total comments, current comment thread #
2275             comment_counts = [0, 0, 0]
2276
2277         continuation = self._extract_continuation(root_continuation_data)
2278         if continuation and len(continuation['continuation']) < 27:
2279             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2280             continuation_token = self._generate_comment_continuation(video_id)
2281             continuation = self._build_api_continuation_query(continuation_token, None)
2282
2283         visitor_data = None
2284         is_first_continuation = parent is None
2285
2286         for page_num in itertools.count(0):
2287             if not continuation:
2288                 break
2289             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2290             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2291             if page_num == 0:
2292                 if is_first_continuation:
2293                     note_prefix = 'Downloading comment section API JSON'
2294                 else:
2295                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2296                         comment_counts[2], comment_prog_str)
2297             else:
2298                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2299                     '       ' if parent else '', ' replies' if parent else '',
2300                     page_num, comment_prog_str)
2301
2302             response = self._extract_response(
2303                 item_id=None, query=continuation,
2304                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2305                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2306             if not response:
2307                 break
2308             visitor_data = try_get(
2309                 response,
2310                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2311                 compat_str) or visitor_data
2312
2313             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2314
2315             continuation = None
2316             if isinstance(continuation_contents, list):
2317                 for continuation_section in continuation_contents:
2318                     if not isinstance(continuation_section, dict):
2319                         continue
2320                     continuation_items = try_get(
2321                         continuation_section,
2322                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2323                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2324                         list) or []
2325                     if is_first_continuation:
2326                         total_comments, continuation = extract_header(continuation_items)
2327                         if total_comments:
2328                             yield total_comments
2329                         is_first_continuation = False
2330                         if continuation:
2331                             break
2332                         continue
2333                     count = 0
2334                     for count, entry in enumerate(extract_thread(continuation_items)):
2335                         yield entry
2336                     continuation = self._extract_continuation({'contents': continuation_items})
2337                     if continuation:
2338                         # Sometimes YouTube provides a continuation without any comments
2339                         # In most cases we end up just downloading these with very little comments to come.
2340                         if count == 0:
2341                             if not parent:
2342                                 self.report_warning('No comments received - assuming end of comments')
2343                             continuation = None
2344                         break
2345
2346             # Deprecated response structure
2347             elif isinstance(continuation_contents, dict):
2348                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2349                 for key, continuation_renderer in continuation_contents.items():
2350                     if key not in known_continuation_renderers:
2351                         continue
2352                     if not isinstance(continuation_renderer, dict):
2353                         continue
2354                     if is_first_continuation:
2355                         header_continuation_items = [continuation_renderer.get('header') or {}]
2356                         total_comments, continuation = extract_header(header_continuation_items)
2357                         if total_comments:
2358                             yield total_comments
2359                         is_first_continuation = False
2360                         if continuation:
2361                             break
2362
2363                     # Sometimes YouTube provides a continuation without any comments
2364                     # In most cases we end up just downloading these with very little comments to come.
2365                     count = 0
2366                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2367                         yield entry
2368                     continuation = self._extract_continuation(continuation_renderer)
2369                     if count == 0:
2370                         if not parent:
2371                             self.report_warning('No comments received - assuming end of comments')
2372                         continuation = None
2373                     break
2374
2375     @staticmethod
2376     def _generate_comment_continuation(video_id):
2377         """
2378         Generates initial comment section continuation token from given video id
2379         """
2380         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2381         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2382         new_continuation_intlist = list(itertools.chain.from_iterable(
2383             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2384         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2385
2386     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2387         """Entry for comment extraction"""
2388         def _real_comment_extract(contents):
2389             if isinstance(contents, list):
2390                 for entry in contents:
2391                     for key, renderer in entry.items():
2392                         if key not in known_entry_comment_renderers:
2393                             continue
2394                         yield from self._comment_entries(
2395                             renderer, video_id=video_id, ytcfg=ytcfg,
2396                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2397                             account_syncid=self._extract_account_syncid(ytcfg))
2398                         break
2399         comments = []
2400         known_entry_comment_renderers = ('itemSectionRenderer',)
2401         estimated_total = 0
2402         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2403         # Force English regardless of account setting to prevent parsing issues
2404         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2405         ytcfg = copy.deepcopy(ytcfg)
2406         traverse_obj(
2407             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2408         try:
2409             for comment in _real_comment_extract(contents):
2410                 if len(comments) >= max_comments:
2411                     break
2412                 if isinstance(comment, int):
2413                     estimated_total = comment
2414                     continue
2415                 comments.append(comment)
2416         except KeyboardInterrupt:
2417             self.to_screen('Interrupted by user')
2418         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2419         return {
2420             'comments': comments,
2421             'comment_count': len(comments),
2422         }
2423
2424     @staticmethod
2425     def _generate_player_context(sts=None):
2426         context = {
2427             'html5Preference': 'HTML5_PREF_WANTS',
2428         }
2429         if sts is not None:
2430             context['signatureTimestamp'] = sts
2431         return {
2432             'playbackContext': {
2433                 'contentPlaybackContext': context
2434             },
2435             'contentCheckOk': True,
2436             'racyCheckOk': True
2437         }
2438
2439     @staticmethod
2440     def _is_agegated(player_response):
2441         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2442             return True
2443
2444         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2445         AGE_GATE_REASONS = (
2446             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2447             'age_verification_required', 'age_check_required',  # status
2448         )
2449         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2450
2451     @staticmethod
2452     def _is_unplayable(player_response):
2453         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2454
2455     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2456
2457         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2458         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2459         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2460         headers = self.generate_api_headers(
2461             player_ytcfg, identity_token, syncid,
2462             default_client=client, session_index=session_index)
2463
2464         yt_query = {'videoId': video_id}
2465         yt_query.update(self._generate_player_context(sts))
2466         return self._extract_response(
2467             item_id=video_id, ep='player', query=yt_query,
2468             ytcfg=player_ytcfg, headers=headers, fatal=True,
2469             default_client=client,
2470             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2471         ) or None
2472
2473     def _get_requested_clients(self, url, smuggled_data):
2474         requested_clients = []
2475         allowed_clients = sorted(
2476             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2477             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2478         for client in self._configuration_arg('player_client'):
2479             if client in allowed_clients:
2480                 requested_clients.append(client)
2481             elif client == 'all':
2482                 requested_clients.extend(allowed_clients)
2483             else:
2484                 self.report_warning(f'Skipping unsupported client {client}')
2485         if not requested_clients:
2486             requested_clients = ['android', 'web']
2487
2488         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2489             requested_clients.extend(
2490                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2491
2492         return orderedSet(requested_clients)
2493
2494     def _extract_player_ytcfg(self, client, video_id):
2495         url = {
2496             'web_music': 'https://music.youtube.com',
2497             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2498         }.get(client)
2499         if not url:
2500             return {}
2501         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2502         return self.extract_ytcfg(video_id, webpage) or {}
2503
2504     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2505         initial_pr = None
2506         if webpage:
2507             initial_pr = self._extract_yt_initial_variable(
2508                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2509                 video_id, 'initial player response')
2510
2511         original_clients = clients
2512         clients = clients[::-1]
2513
2514         def append_client(client_name):
2515             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2516                 clients.append(client_name)
2517
2518         # Android player_response does not have microFormats which are needed for
2519         # extraction of some data. So we return the initial_pr with formats
2520         # stripped out even if not requested by the user
2521         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2522         yielded_pr = False
2523         if initial_pr:
2524             pr = dict(initial_pr)
2525             pr['streamingData'] = None
2526             yielded_pr = True
2527             yield pr
2528
2529         last_error = None
2530         while clients:
2531             client = clients.pop()
2532             player_ytcfg = master_ytcfg if client == 'web' else {}
2533             if 'configs' not in self._configuration_arg('player_skip'):
2534                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2535
2536             try:
2537                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2538                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2539             except ExtractorError as e:
2540                 if last_error:
2541                     self.report_warning(last_error)
2542                 last_error = e
2543                 continue
2544
2545             if pr:
2546                 yielded_pr = True
2547                 yield pr
2548
2549             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2550             if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
2551                 append_client(client.replace('_agegate', '_creator'))
2552             elif self._is_agegated(pr):
2553                 append_client(f'{client}_agegate')
2554
2555         if last_error:
2556             if not yielded_pr:
2557                 raise last_error
2558             self.report_warning(last_error)
2559
2560     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2561         itags, stream_ids = [], []
2562         itag_qualities, res_qualities = {}, {}
2563         q = qualities([
2564             # Normally tiny is the smallest video-only formats. But
2565             # audio-only formats with unknown quality may get tagged as tiny
2566             'tiny',
2567             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2568             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2569         ])
2570         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2571
2572         for fmt in streaming_formats:
2573             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2574                 continue
2575
2576             itag = str_or_none(fmt.get('itag'))
2577             audio_track = fmt.get('audioTrack') or {}
2578             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2579             if stream_id in stream_ids:
2580                 continue
2581
2582             quality = fmt.get('quality')
2583             height = int_or_none(fmt.get('height'))
2584             if quality == 'tiny' or not quality:
2585                 quality = fmt.get('audioQuality', '').lower() or quality
2586             # The 3gp format (17) in android client has a quality of "small",
2587             # but is actually worse than other formats
2588             if itag == '17':
2589                 quality = 'tiny'
2590             if quality:
2591                 if itag:
2592                     itag_qualities[itag] = quality
2593                 if height:
2594                     res_qualities[height] = quality
2595             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2596             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2597             # number of fragment that would subsequently requested with (`&sq=N`)
2598             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2599                 continue
2600
2601             fmt_url = fmt.get('url')
2602             if not fmt_url:
2603                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2604                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2605                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2606                 if not (sc and fmt_url and encrypted_sig):
2607                     continue
2608                 if not player_url:
2609                     continue
2610                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2611                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2612                 fmt_url += '&' + sp + '=' + signature
2613
2614             if itag:
2615                 itags.append(itag)
2616                 stream_ids.append(stream_id)
2617
2618             tbr = float_or_none(
2619                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2620             dct = {
2621                 'asr': int_or_none(fmt.get('audioSampleRate')),
2622                 'filesize': int_or_none(fmt.get('contentLength')),
2623                 'format_id': itag,
2624                 'format_note': ', '.join(filter(None, (
2625                     '%s%s' % (audio_track.get('displayName') or '',
2626                               ' (default)' if audio_track.get('audioIsDefault') else ''),
2627                     fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2628                 'fps': int_or_none(fmt.get('fps')),
2629                 'height': height,
2630                 'quality': q(quality),
2631                 'tbr': tbr,
2632                 'url': fmt_url,
2633                 'width': int_or_none(fmt.get('width')),
2634                 'language': audio_track.get('id', '').split('.')[0],
2635                 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2636             }
2637             mime_mobj = re.match(
2638                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2639             if mime_mobj:
2640                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2641                 dct.update(parse_codecs(mime_mobj.group(2)))
2642             no_audio = dct.get('acodec') == 'none'
2643             no_video = dct.get('vcodec') == 'none'
2644             if no_audio:
2645                 dct['vbr'] = tbr
2646             if no_video:
2647                 dct['abr'] = tbr
2648             if no_audio or no_video:
2649                 dct['downloader_options'] = {
2650                     # Youtube throttles chunks >~10M
2651                     'http_chunk_size': 10485760,
2652                 }
2653                 if dct.get('ext'):
2654                     dct['container'] = dct['ext'] + '_dash'
2655             yield dct
2656
2657         skip_manifests = self._configuration_arg('skip')
2658         get_dash = (
2659             (not is_live or self._configuration_arg('include_live_dash'))
2660             and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2661         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2662
2663         def guess_quality(f):
2664             for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2665                 if val in qdict:
2666                     return q(qdict[val])
2667             return -1
2668
2669         for sd in streaming_data:
2670             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2671             if hls_manifest_url:
2672                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2673                     itag = self._search_regex(
2674                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2675                     if itag in itags:
2676                         continue
2677                     if itag:
2678                         f['format_id'] = itag
2679                         itags.append(itag)
2680                     f['quality'] = guess_quality(f)
2681                     yield f
2682
2683             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2684             if dash_manifest_url:
2685                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2686                     itag = f['format_id']
2687                     if itag in itags:
2688                         continue
2689                     if itag:
2690                         itags.append(itag)
2691                     f['quality'] = guess_quality(f)
2692                     filesize = int_or_none(self._search_regex(
2693                         r'/clen/(\d+)', f.get('fragment_base_url')
2694                         or f['url'], 'file size', default=None))
2695                     if filesize:
2696                         f['filesize'] = filesize
2697                     yield f
2698
2699     def _real_extract(self, url):
2700         url, smuggled_data = unsmuggle_url(url, {})
2701         video_id = self._match_id(url)
2702
2703         base_url = self.http_scheme() + '//www.youtube.com/'
2704         webpage_url = base_url + 'watch?v=' + video_id
2705         webpage = self._download_webpage(
2706             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2707
2708         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2709         player_url = self._extract_player_url(master_ytcfg, webpage)
2710         identity_token = self._extract_identity_token(webpage, video_id)
2711
2712         player_responses = list(self._extract_player_responses(
2713             self._get_requested_clients(url, smuggled_data),
2714             video_id, webpage, master_ytcfg, player_url, identity_token))
2715
2716         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2717
2718         playability_statuses = traverse_obj(
2719             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2720
2721         trailer_video_id = get_first(
2722             playability_statuses,
2723             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2724             expected_type=str)
2725         if trailer_video_id:
2726             return self.url_result(
2727                 trailer_video_id, self.ie_key(), trailer_video_id)
2728
2729         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2730                        if webpage else (lambda x: None))
2731
2732         video_details = traverse_obj(
2733             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2734         microformats = traverse_obj(
2735             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2736             expected_type=dict, default=[])
2737         video_title = (
2738             get_first(video_details, 'title')
2739             or self._get_text(microformats, (..., 'title'))
2740             or search_meta(['og:title', 'twitter:title', 'title']))
2741         video_description = get_first(video_details, 'shortDescription')
2742
2743         if not smuggled_data.get('force_singlefeed', False):
2744             if not self.get_param('noplaylist'):
2745                 multifeed_metadata_list = get_first(
2746                     player_responses,
2747                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2748                     expected_type=str)
2749                 if multifeed_metadata_list:
2750                     entries = []
2751                     feed_ids = []
2752                     for feed in multifeed_metadata_list.split(','):
2753                         # Unquote should take place before split on comma (,) since textual
2754                         # fields may contain comma as well (see
2755                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2756                         feed_data = compat_parse_qs(
2757                             compat_urllib_parse_unquote_plus(feed))
2758
2759                         def feed_entry(name):
2760                             return try_get(
2761                                 feed_data, lambda x: x[name][0], compat_str)
2762
2763                         feed_id = feed_entry('id')
2764                         if not feed_id:
2765                             continue
2766                         feed_title = feed_entry('title')
2767                         title = video_title
2768                         if feed_title:
2769                             title += ' (%s)' % feed_title
2770                         entries.append({
2771                             '_type': 'url_transparent',
2772                             'ie_key': 'Youtube',
2773                             'url': smuggle_url(
2774                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2775                                 {'force_singlefeed': True}),
2776                             'title': title,
2777                         })
2778                         feed_ids.append(feed_id)
2779                     self.to_screen(
2780                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2781                         % (', '.join(feed_ids), video_id))
2782                     return self.playlist_result(
2783                         entries, video_id, video_title, video_description)
2784             else:
2785                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2786
2787         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2788         is_live = get_first(video_details, 'isLive')
2789         if is_live is None:
2790             is_live = get_first(live_broadcast_details, 'isLiveNow')
2791
2792         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2793         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2794
2795         if not formats:
2796             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2797                 self.report_drm(video_id)
2798             pemr = get_first(
2799                 playability_statuses,
2800                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2801             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2802             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2803             if subreason:
2804                 if subreason == 'The uploader has not made this video available in your country.':
2805                     countries = get_first(microformats, 'availableCountries')
2806                     if not countries:
2807                         regions_allowed = search_meta('regionsAllowed')
2808                         countries = regions_allowed.split(',') if regions_allowed else None
2809                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2810                 reason += f'. {subreason}'
2811             if reason:
2812                 self.raise_no_formats(reason, expected=True)
2813
2814         for f in formats:
2815             if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']:  # throttled
2816                 f['source_preference'] = -10
2817                 # TODO: this method is not reliable
2818                 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2819
2820         # Source is given priority since formats that throttle are given lower source_preference
2821         # When throttling issue is fully fixed, remove this
2822         self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
2823
2824         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2825         if not keywords and webpage:
2826             keywords = [
2827                 unescapeHTML(m.group('content'))
2828                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2829         for keyword in keywords:
2830             if keyword.startswith('yt:stretch='):
2831                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2832                 if mobj:
2833                     # NB: float is intentional for forcing float division
2834                     w, h = (float(v) for v in mobj.groups())
2835                     if w > 0 and h > 0:
2836                         ratio = w / h
2837                         for f in formats:
2838                             if f.get('vcodec') != 'none':
2839                                 f['stretched_ratio'] = ratio
2840                         break
2841
2842         thumbnails = []
2843         thumbnail_dicts = traverse_obj(
2844             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2845             expected_type=dict, default=[])
2846         for thumbnail in thumbnail_dicts:
2847             thumbnail_url = thumbnail.get('url')
2848             if not thumbnail_url:
2849                 continue
2850             # Sometimes youtube gives a wrong thumbnail URL. See:
2851             # https://github.com/yt-dlp/yt-dlp/issues/233
2852             # https://github.com/ytdl-org/youtube-dl/issues/28023
2853             if 'maxresdefault' in thumbnail_url:
2854                 thumbnail_url = thumbnail_url.split('?')[0]
2855             thumbnails.append({
2856                 'url': thumbnail_url,
2857                 'height': int_or_none(thumbnail.get('height')),
2858                 'width': int_or_none(thumbnail.get('width')),
2859             })
2860         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2861         if thumbnail_url:
2862             thumbnails.append({
2863                 'url': thumbnail_url,
2864             })
2865         # The best resolution thumbnails sometimes does not appear in the webpage
2866         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2867         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2868         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2869         # TODO: Test them also? - For some videos, even these don't exist
2870         guaranteed_thumbnail_names = [
2871             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2872             'mqdefault', 'mq1', 'mq2', 'mq3',
2873             'default', '1', '2', '3'
2874         ]
2875         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2876         n_thumbnail_names = len(thumbnail_names)
2877
2878         thumbnails.extend({
2879             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2880                 video_id=video_id, name=name, ext=ext,
2881                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2882             '_test_url': name in hq_thumbnail_names,
2883         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2884         for thumb in thumbnails:
2885             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2886             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2887         self._remove_duplicate_formats(thumbnails)
2888
2889         category = get_first(microformats, 'category') or search_meta('genre')
2890         channel_id = str_or_none(
2891             get_first(video_details, 'channelId')
2892             or get_first(microformats, 'externalChannelId')
2893             or search_meta('channelId'))
2894         duration = int_or_none(
2895             get_first(video_details, 'lengthSeconds')
2896             or get_first(microformats, 'lengthSeconds')
2897             or parse_duration(search_meta('duration'))) or None
2898         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2899
2900         live_content = get_first(video_details, 'isLiveContent')
2901         is_upcoming = get_first(video_details, 'isUpcoming')
2902         if is_live is None:
2903             if is_upcoming or live_content is False:
2904                 is_live = False
2905         if is_upcoming is None and (live_content or is_live):
2906             is_upcoming = False
2907         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2908         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2909         if not duration and live_endtime and live_starttime:
2910             duration = live_endtime - live_starttime
2911
2912         info = {
2913             'id': video_id,
2914             'title': self._live_title(video_title) if is_live else video_title,
2915             'formats': formats,
2916             'thumbnails': thumbnails,
2917             'description': video_description,
2918             'upload_date': unified_strdate(
2919                 get_first(microformats, 'uploadDate')
2920                 or search_meta('uploadDate')),
2921             'uploader': get_first(video_details, 'author'),
2922             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2923             'uploader_url': owner_profile_url,
2924             'channel_id': channel_id,
2925             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2926             'duration': duration,
2927             'view_count': int_or_none(
2928                 get_first((video_details, microformats), (..., 'viewCount'))
2929                 or search_meta('interactionCount')),
2930             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2931             'age_limit': 18 if (
2932                 get_first(microformats, 'isFamilySafe') is False
2933                 or search_meta('isFamilyFriendly') == 'false'
2934                 or search_meta('og:restrictions:age') == '18+') else 0,
2935             'webpage_url': webpage_url,
2936             'categories': [category] if category else None,
2937             'tags': keywords,
2938             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2939             'is_live': is_live,
2940             'was_live': (False if is_live or is_upcoming or live_content is False
2941                          else None if is_live is None or is_upcoming is None
2942                          else live_content),
2943             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2944             'release_timestamp': live_starttime,
2945         }
2946
2947         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2948         # Converted into dicts to remove duplicates
2949         captions = {
2950             sub.get('baseUrl'): sub
2951             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2952         translation_languages = {
2953             lang.get('languageCode'): lang.get('languageName')
2954             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2955         subtitles = {}
2956         if pctr:
2957             def process_language(container, base_url, lang_code, sub_name, query):
2958                 lang_subs = container.setdefault(lang_code, [])
2959                 for fmt in self._SUBTITLE_FORMATS:
2960                     query.update({
2961                         'fmt': fmt,
2962                     })
2963                     lang_subs.append({
2964                         'ext': fmt,
2965                         'url': update_url_query(base_url, query),
2966                         'name': sub_name,
2967                     })
2968
2969             for base_url, caption_track in captions.items():
2970                 if not base_url:
2971                     continue
2972                 if caption_track.get('kind') != 'asr':
2973                     lang_code = (
2974                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2975                         or caption_track.get('languageCode'))
2976                     if not lang_code:
2977                         continue
2978                     process_language(
2979                         subtitles, base_url, lang_code,
2980                         traverse_obj(caption_track, ('name', 'simpleText')),
2981                         {})
2982                     continue
2983                 automatic_captions = {}
2984                 for trans_code, trans_name in translation_languages.items():
2985                     if not trans_code:
2986                         continue
2987                     process_language(
2988                         automatic_captions, base_url, trans_code,
2989                         self._get_text(trans_name, max_runs=1),
2990                         {'tlang': trans_code})
2991                 info['automatic_captions'] = automatic_captions
2992         info['subtitles'] = subtitles
2993
2994         parsed_url = compat_urllib_parse_urlparse(url)
2995         for component in [parsed_url.fragment, parsed_url.query]:
2996             query = compat_parse_qs(component)
2997             for k, v in query.items():
2998                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2999                     d_k += '_time'
3000                     if d_k not in info and k in s_ks:
3001                         info[d_k] = parse_duration(query[k][0])
3002
3003         # Youtube Music Auto-generated description
3004         if video_description:
3005             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3006             if mobj:
3007                 release_year = mobj.group('release_year')
3008                 release_date = mobj.group('release_date')
3009                 if release_date:
3010                     release_date = release_date.replace('-', '')
3011                     if not release_year:
3012                         release_year = release_date[:4]
3013                 info.update({
3014                     'album': mobj.group('album'.strip()),
3015                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3016                     'track': mobj.group('track').strip(),
3017                     'release_date': release_date,
3018                     'release_year': int_or_none(release_year),
3019                 })
3020
3021         initial_data = None
3022         if webpage:
3023             initial_data = self._extract_yt_initial_variable(
3024                 webpage, self._YT_INITIAL_DATA_RE, video_id,
3025                 'yt initial data')
3026         if not initial_data:
3027             headers = self.generate_api_headers(
3028                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
3029                 session_index=self._extract_session_index(master_ytcfg))
3030
3031             initial_data = self._extract_response(
3032                 item_id=video_id, ep='next', fatal=False,
3033                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
3034                 note='Downloading initial data API JSON')
3035
3036         try:
3037             # This will error if there is no livechat
3038             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3039             info['subtitles']['live_chat'] = [{
3040                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3041                 'video_id': video_id,
3042                 'ext': 'json',
3043                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3044             }]
3045         except (KeyError, IndexError, TypeError):
3046             pass
3047
3048         if initial_data:
3049             info['chapters'] = (
3050                 self._extract_chapters_from_json(initial_data, duration)
3051                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3052                 or None)
3053
3054             contents = try_get(
3055                 initial_data,
3056                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3057                 list) or []
3058             for content in contents:
3059                 vpir = content.get('videoPrimaryInfoRenderer')
3060                 if vpir:
3061                     stl = vpir.get('superTitleLink')
3062                     if stl:
3063                         stl = self._get_text(stl)
3064                         if try_get(
3065                                 vpir,
3066                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3067                             info['location'] = stl
3068                         else:
3069                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3070                             if mobj:
3071                                 info.update({
3072                                     'series': mobj.group(1),
3073                                     'season_number': int(mobj.group(2)),
3074                                     'episode_number': int(mobj.group(3)),
3075                                 })
3076                     for tlb in (try_get(
3077                             vpir,
3078                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3079                             list) or []):
3080                         tbr = tlb.get('toggleButtonRenderer') or {}
3081                         for getter, regex in [(
3082                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3083                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3084                                     lambda x: x['accessibility'],
3085                                     lambda x: x['accessibilityData']['accessibilityData'],
3086                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3087                             label = (try_get(tbr, getter, dict) or {}).get('label')
3088                             if label:
3089                                 mobj = re.match(regex, label)
3090                                 if mobj:
3091                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3092                                     break
3093                     sbr_tooltip = try_get(
3094                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3095                     if sbr_tooltip:
3096                         like_count, dislike_count = sbr_tooltip.split(' / ')
3097                         info.update({
3098                             'like_count': str_to_int(like_count),
3099                             'dislike_count': str_to_int(dislike_count),
3100                         })
3101                 vsir = content.get('videoSecondaryInfoRenderer')
3102                 if vsir:
3103                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3104                     rows = try_get(
3105                         vsir,
3106                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3107                         list) or []
3108                     multiple_songs = False
3109                     for row in rows:
3110                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3111                             multiple_songs = True
3112                             break
3113                     for row in rows:
3114                         mrr = row.get('metadataRowRenderer') or {}
3115                         mrr_title = mrr.get('title')
3116                         if not mrr_title:
3117                             continue
3118                         mrr_title = self._get_text(mrr, 'title')
3119                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3120                         if mrr_title == 'License':
3121                             info['license'] = mrr_contents_text
3122                         elif not multiple_songs:
3123                             if mrr_title == 'Album':
3124                                 info['album'] = mrr_contents_text
3125                             elif mrr_title == 'Artist':
3126                                 info['artist'] = mrr_contents_text
3127                             elif mrr_title == 'Song':
3128                                 info['track'] = mrr_contents_text
3129
3130         fallbacks = {
3131             'channel': 'uploader',
3132             'channel_id': 'uploader_id',
3133             'channel_url': 'uploader_url',
3134         }
3135         for to, frm in fallbacks.items():
3136             if not info.get(to):
3137                 info[to] = info.get(frm)
3138
3139         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3140             v = info.get(s_k)
3141             if v:
3142                 info[d_k] = v
3143
3144         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3145         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3146         is_membersonly = None
3147         is_premium = None
3148         if initial_data and is_private is not None:
3149             is_membersonly = False
3150             is_premium = False
3151             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3152             badge_labels = set()
3153             for content in contents:
3154                 if not isinstance(content, dict):
3155                     continue
3156                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3157             for badge_label in badge_labels:
3158                 if badge_label.lower() == 'members only':
3159                     is_membersonly = True
3160                 elif badge_label.lower() == 'premium':
3161                     is_premium = True
3162                 elif badge_label.lower() == 'unlisted':
3163                     is_unlisted = True
3164
3165         info['availability'] = self._availability(
3166             is_private=is_private,
3167             needs_premium=is_premium,
3168             needs_subscription=is_membersonly,
3169             needs_auth=info['age_limit'] >= 18,
3170             is_unlisted=None if is_private is None else is_unlisted)
3171
3172         if self.get_param('getcomments', False):
3173             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3174
3175         self.mark_watched(video_id, player_responses)
3176
3177         return info
3178
3179
3180 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3181     IE_DESC = 'YouTube.com tab'
3182     _VALID_URL = r'''(?x)
3183                     https?://
3184                         (?:\w+\.)?
3185                         (?:
3186                             youtube(?:kids)?\.com|
3187                             invidio\.us
3188                         )/
3189                         (?:
3190                             (?P<channel_type>channel|c|user|browse)/|
3191                             (?P<not_channel>
3192                                 feed/|hashtag/|
3193                                 (?:playlist|watch)\?.*?\blist=
3194                             )|
3195                             (?!(?:%s)\b)  # Direct URLs
3196                         )
3197                         (?P<id>[^/?\#&]+)
3198                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3199     IE_NAME = 'youtube:tab'
3200
3201     _TESTS = [{
3202         'note': 'playlists, multipage',
3203         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3204         'playlist_mincount': 94,
3205         'info_dict': {
3206             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3207             'title': 'Игорь Клейнер - Playlists',
3208             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3209             'uploader': 'Игорь Клейнер',
3210             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3211         },
3212     }, {
3213         'note': 'playlists, multipage, different order',
3214         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3215         'playlist_mincount': 94,
3216         'info_dict': {
3217             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3218             'title': 'Игорь Клейнер - Playlists',
3219             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3220             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3221             'uploader': 'Игорь Клейнер',
3222         },
3223     }, {
3224         'note': 'playlists, series',
3225         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3226         'playlist_mincount': 5,
3227         'info_dict': {
3228             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3229             'title': '3Blue1Brown - Playlists',
3230             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3231             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3232             'uploader': '3Blue1Brown',
3233         },
3234     }, {
3235         'note': 'playlists, singlepage',
3236         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3237         'playlist_mincount': 4,
3238         'info_dict': {
3239             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3240             'title': 'ThirstForScience - Playlists',
3241             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3242             'uploader': 'ThirstForScience',
3243             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3244         }
3245     }, {
3246         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3247         'only_matching': True,
3248     }, {
3249         'note': 'basic, single video playlist',
3250         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3251         'info_dict': {
3252             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3253             'uploader': 'Sergey M.',
3254             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3255             'title': 'youtube-dl public playlist',
3256         },
3257         'playlist_count': 1,
3258     }, {
3259         'note': 'empty playlist',
3260         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3261         'info_dict': {
3262             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3263             'uploader': 'Sergey M.',
3264             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3265             'title': 'youtube-dl empty playlist',
3266         },
3267         'playlist_count': 0,
3268     }, {
3269         'note': 'Home tab',
3270         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3271         'info_dict': {
3272             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3273             'title': 'lex will - Home',
3274             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3275             'uploader': 'lex will',
3276             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3277         },
3278         'playlist_mincount': 2,
3279     }, {
3280         'note': 'Videos tab',
3281         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3282         'info_dict': {
3283             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3284             'title': 'lex will - Videos',
3285             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3286             'uploader': 'lex will',
3287             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3288         },
3289         'playlist_mincount': 975,
3290     }, {
3291         'note': 'Videos tab, sorted by popular',
3292         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3293         'info_dict': {
3294             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3295             'title': 'lex will - Videos',
3296             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3297             'uploader': 'lex will',
3298             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3299         },
3300         'playlist_mincount': 199,
3301     }, {
3302         'note': 'Playlists tab',
3303         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3304         'info_dict': {
3305             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3306             'title': 'lex will - Playlists',
3307             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3308             'uploader': 'lex will',
3309             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3310         },
3311         'playlist_mincount': 17,
3312     }, {
3313         'note': 'Community tab',
3314         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3315         'info_dict': {
3316             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3317             'title': 'lex will - Community',
3318             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3319             'uploader': 'lex will',
3320             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3321         },
3322         'playlist_mincount': 18,
3323     }, {
3324         'note': 'Channels tab',
3325         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3326         'info_dict': {
3327             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3328             'title': 'lex will - Channels',
3329             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3330             'uploader': 'lex will',
3331             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3332         },
3333         'playlist_mincount': 12,
3334     }, {
3335         'note': 'Search tab',
3336         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3337         'playlist_mincount': 40,
3338         'info_dict': {
3339             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3340             'title': '3Blue1Brown - Search - linear algebra',
3341             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3342             'uploader': '3Blue1Brown',
3343             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3344         },
3345     }, {
3346         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3347         'only_matching': True,
3348     }, {
3349         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3350         'only_matching': True,
3351     }, {
3352         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3353         'only_matching': True,
3354     }, {
3355         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3356         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3357         'info_dict': {
3358             'title': '29C3: Not my department',
3359             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3360             'uploader': 'Christiaan008',
3361             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3362             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3363         },
3364         'playlist_count': 96,
3365     }, {
3366         'note': 'Large playlist',
3367         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3368         'info_dict': {
3369             'title': 'Uploads from Cauchemar',
3370             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3371             'uploader': 'Cauchemar',
3372             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3373         },
3374         'playlist_mincount': 1123,
3375     }, {
3376         'note': 'even larger playlist, 8832 videos',
3377         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3378         'only_matching': True,
3379     }, {
3380         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3381         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3382         'info_dict': {
3383             'title': 'Uploads from Interstellar Movie',
3384             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3385             'uploader': 'Interstellar Movie',
3386             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3387         },
3388         'playlist_mincount': 21,
3389     }, {
3390         'note': 'Playlist with "show unavailable videos" button',
3391         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3392         'info_dict': {
3393             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3394             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3395             'uploader': 'Phim Siêu Nhân Nhật Bản',
3396             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3397         },
3398         'playlist_mincount': 200,
3399     }, {
3400         'note': 'Playlist with unavailable videos in page 7',
3401         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3402         'info_dict': {
3403             'title': 'Uploads from BlankTV',
3404             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3405             'uploader': 'BlankTV',
3406             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3407         },
3408         'playlist_mincount': 1000,
3409     }, {
3410         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3411         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3412         'info_dict': {
3413             'title': 'Data Analysis with Dr Mike Pound',
3414             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3415             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3416             'uploader': 'Computerphile',
3417             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3418         },
3419         'playlist_mincount': 11,
3420     }, {
3421         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3422         'only_matching': True,
3423     }, {
3424         'note': 'Playlist URL that does not actually serve a playlist',
3425         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3426         'info_dict': {
3427             'id': 'FqZTN594JQw',
3428             'ext': 'webm',
3429             'title': "Smiley's People 01 detective, Adventure Series, Action",
3430             'uploader': 'STREEM',
3431             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3432             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3433             'upload_date': '20150526',
3434             'license': 'Standard YouTube License',
3435             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3436             'categories': ['People & Blogs'],
3437             'tags': list,
3438             'view_count': int,
3439             'like_count': int,
3440             'dislike_count': int,
3441         },
3442         'params': {
3443             'skip_download': True,
3444         },
3445         'skip': 'This video is not available.',
3446         'add_ie': [YoutubeIE.ie_key()],
3447     }, {
3448         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3449         'only_matching': True,
3450     }, {
3451         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3452         'only_matching': True,
3453     }, {
3454         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3455         'info_dict': {
3456             'id': '3yImotZU3tw',  # This will keep changing
3457             'ext': 'mp4',
3458             'title': compat_str,
3459             'uploader': 'Sky News',
3460             'uploader_id': 'skynews',
3461             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3462             'upload_date': r're:\d{8}',
3463             'description': compat_str,
3464             'categories': ['News & Politics'],
3465             'tags': list,
3466             'like_count': int,
3467             'dislike_count': int,
3468         },
3469         'params': {
3470             'skip_download': True,
3471         },
3472         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3473     }, {
3474         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3475         'info_dict': {
3476             'id': 'a48o2S1cPoo',
3477             'ext': 'mp4',
3478             'title': 'The Young Turks - Live Main Show',
3479             'uploader': 'The Young Turks',
3480             'uploader_id': 'TheYoungTurks',
3481             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3482             'upload_date': '20150715',
3483             'license': 'Standard YouTube License',
3484             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3485             'categories': ['News & Politics'],
3486             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3487             'like_count': int,
3488             'dislike_count': int,
3489         },
3490         'params': {
3491             'skip_download': True,
3492         },
3493         'only_matching': True,
3494     }, {
3495         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3496         'only_matching': True,
3497     }, {
3498         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3499         'only_matching': True,
3500     }, {
3501         'note': 'A channel that is not live. Should raise error',
3502         'url': 'https://www.youtube.com/user/numberphile/live',
3503         'only_matching': True,
3504     }, {
3505         'url': 'https://www.youtube.com/feed/trending',
3506         'only_matching': True,
3507     }, {
3508         'url': 'https://www.youtube.com/feed/library',
3509         'only_matching': True,
3510     }, {
3511         'url': 'https://www.youtube.com/feed/history',
3512         'only_matching': True,
3513     }, {
3514         'url': 'https://www.youtube.com/feed/subscriptions',
3515         'only_matching': True,
3516     }, {
3517         'url': 'https://www.youtube.com/feed/watch_later',
3518         'only_matching': True,
3519     }, {
3520         'note': 'Recommended - redirects to home page',
3521         'url': 'https://www.youtube.com/feed/recommended',
3522         'only_matching': True,
3523     }, {
3524         'note': 'inline playlist with not always working continuations',
3525         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3526         'only_matching': True,
3527     }, {
3528         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3529         'only_matching': True,
3530     }, {
3531         'url': 'https://www.youtube.com/course',
3532         'only_matching': True,
3533     }, {
3534         'url': 'https://www.youtube.com/zsecurity',
3535         'only_matching': True,
3536     }, {
3537         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3538         'only_matching': True,
3539     }, {
3540         'url': 'https://www.youtube.com/TheYoungTurks/live',
3541         'only_matching': True,
3542     }, {
3543         'url': 'https://www.youtube.com/hashtag/cctv9',
3544         'info_dict': {
3545             'id': 'cctv9',
3546             'title': '#cctv9',
3547         },
3548         'playlist_mincount': 350,
3549     }, {
3550         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3551         'only_matching': True,
3552     }, {
3553         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3554         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3555         'only_matching': True
3556     }, {
3557         'note': '/browse/ should redirect to /channel/',
3558         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3559         'only_matching': True
3560     }, {
3561         'note': 'VLPL, should redirect to playlist?list=PL...',
3562         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3563         'info_dict': {
3564             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3565             'uploader': 'NoCopyrightSounds',
3566             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3567             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3568             'title': 'NCS Releases',
3569         },
3570         'playlist_mincount': 166,
3571     }, {
3572         'note': 'Topic, should redirect to playlist?list=UU...',
3573         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3574         'info_dict': {
3575             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3576             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3577             'title': 'Uploads from Royalty Free Music - Topic',
3578             'uploader': 'Royalty Free Music - Topic',
3579         },
3580         'expected_warnings': [
3581             'A channel/user page was given',
3582             'The URL does not have a videos tab',
3583         ],
3584         'playlist_mincount': 101,
3585     }, {
3586         'note': 'Topic without a UU playlist',
3587         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3588         'info_dict': {
3589             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3590             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3591         },
3592         'expected_warnings': [
3593             'A channel/user page was given',
3594             'The URL does not have a videos tab',
3595             'Falling back to channel URL',
3596         ],
3597         'playlist_mincount': 9,
3598     }, {
3599         'note': 'Youtube music Album',
3600         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3601         'info_dict': {
3602             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3603             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3604         },
3605         'playlist_count': 50,
3606     }, {
3607         'note': 'unlisted single video playlist',
3608         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3609         'info_dict': {
3610             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3611             'uploader': 'colethedj',
3612             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3613             'title': 'yt-dlp unlisted playlist test',
3614             'availability': 'unlisted'
3615         },
3616         'playlist_count': 1,
3617     }]
3618
3619     @classmethod
3620     def suitable(cls, url):
3621         return False if YoutubeIE.suitable(url) else super(
3622             YoutubeTabIE, cls).suitable(url)
3623
3624     def _extract_channel_id(self, webpage):
3625         channel_id = self._html_search_meta(
3626             'channelId', webpage, 'channel id', default=None)
3627         if channel_id:
3628             return channel_id
3629         channel_url = self._html_search_meta(
3630             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3631              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3632              'twitter:app:url:googleplay'), webpage, 'channel url')
3633         return self._search_regex(
3634             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3635             channel_url, 'channel id')
3636
3637     @staticmethod
3638     def _extract_basic_item_renderer(item):
3639         # Modified from _extract_grid_item_renderer
3640         known_basic_renderers = (
3641             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3642         )
3643         for key, renderer in item.items():
3644             if not isinstance(renderer, dict):
3645                 continue
3646             elif key in known_basic_renderers:
3647                 return renderer
3648             elif key.startswith('grid') and key.endswith('Renderer'):
3649                 return renderer
3650
3651     def _grid_entries(self, grid_renderer):
3652         for item in grid_renderer['items']:
3653             if not isinstance(item, dict):
3654                 continue
3655             renderer = self._extract_basic_item_renderer(item)
3656             if not isinstance(renderer, dict):
3657                 continue
3658             title = self._get_text(renderer, 'title')
3659
3660             # playlist
3661             playlist_id = renderer.get('playlistId')
3662             if playlist_id:
3663                 yield self.url_result(
3664                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3665                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3666                     video_title=title)
3667                 continue
3668             # video
3669             video_id = renderer.get('videoId')
3670             if video_id:
3671                 yield self._extract_video(renderer)
3672                 continue
3673             # channel
3674             channel_id = renderer.get('channelId')
3675             if channel_id:
3676                 yield self.url_result(
3677                     'https://www.youtube.com/channel/%s' % channel_id,
3678                     ie=YoutubeTabIE.ie_key(), video_title=title)
3679                 continue
3680             # generic endpoint URL support
3681             ep_url = urljoin('https://www.youtube.com/', try_get(
3682                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3683                 compat_str))
3684             if ep_url:
3685                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3686                     if ie.suitable(ep_url):
3687                         yield self.url_result(
3688                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3689                         break
3690
3691     def _shelf_entries_from_content(self, shelf_renderer):
3692         content = shelf_renderer.get('content')
3693         if not isinstance(content, dict):
3694             return
3695         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3696         if renderer:
3697             # TODO: add support for nested playlists so each shelf is processed
3698             # as separate playlist
3699             # TODO: this includes only first N items
3700             for entry in self._grid_entries(renderer):
3701                 yield entry
3702         renderer = content.get('horizontalListRenderer')
3703         if renderer:
3704             # TODO
3705             pass
3706
3707     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3708         ep = try_get(
3709             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3710             compat_str)
3711         shelf_url = urljoin('https://www.youtube.com', ep)
3712         if shelf_url:
3713             # Skipping links to another channels, note that checking for
3714             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3715             # will not work
3716             if skip_channels and '/channels?' in shelf_url:
3717                 return
3718             title = self._get_text(shelf_renderer, 'title')
3719             yield self.url_result(shelf_url, video_title=title)
3720         # Shelf may not contain shelf URL, fallback to extraction from content
3721         for entry in self._shelf_entries_from_content(shelf_renderer):
3722             yield entry
3723
3724     def _playlist_entries(self, video_list_renderer):
3725         for content in video_list_renderer['contents']:
3726             if not isinstance(content, dict):
3727                 continue
3728             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3729             if not isinstance(renderer, dict):
3730                 continue
3731             video_id = renderer.get('videoId')
3732             if not video_id:
3733                 continue
3734             yield self._extract_video(renderer)
3735
3736     def _rich_entries(self, rich_grid_renderer):
3737         renderer = try_get(
3738             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3739         video_id = renderer.get('videoId')
3740         if not video_id:
3741             return
3742         yield self._extract_video(renderer)
3743
3744     def _video_entry(self, video_renderer):
3745         video_id = video_renderer.get('videoId')
3746         if video_id:
3747             return self._extract_video(video_renderer)
3748
3749     def _post_thread_entries(self, post_thread_renderer):
3750         post_renderer = try_get(
3751             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3752         if not post_renderer:
3753             return
3754         # video attachment
3755         video_renderer = try_get(
3756             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3757         video_id = video_renderer.get('videoId')
3758         if video_id:
3759             entry = self._extract_video(video_renderer)
3760             if entry:
3761                 yield entry
3762         # playlist attachment
3763         playlist_id = try_get(
3764             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3765         if playlist_id:
3766             yield self.url_result(
3767                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3768                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3769         # inline video links
3770         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3771         for run in runs:
3772             if not isinstance(run, dict):
3773                 continue
3774             ep_url = try_get(
3775                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3776             if not ep_url:
3777                 continue
3778             if not YoutubeIE.suitable(ep_url):
3779                 continue
3780             ep_video_id = YoutubeIE._match_id(ep_url)
3781             if video_id == ep_video_id:
3782                 continue
3783             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3784
3785     def _post_thread_continuation_entries(self, post_thread_continuation):
3786         contents = post_thread_continuation.get('contents')
3787         if not isinstance(contents, list):
3788             return
3789         for content in contents:
3790             renderer = content.get('backstagePostThreadRenderer')
3791             if not isinstance(renderer, dict):
3792                 continue
3793             for entry in self._post_thread_entries(renderer):
3794                 yield entry
3795
3796     r''' # unused
3797     def _rich_grid_entries(self, contents):
3798         for content in contents:
3799             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3800             if video_renderer:
3801                 entry = self._video_entry(video_renderer)
3802                 if entry:
3803                     yield entry
3804     '''
3805     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3806
3807         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3808             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3809             for content in contents:
3810                 if not isinstance(content, dict):
3811                     continue
3812                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3813                 if not is_renderer:
3814                     renderer = content.get('richItemRenderer')
3815                     if renderer:
3816                         for entry in self._rich_entries(renderer):
3817                             yield entry
3818                         continuation_list[0] = self._extract_continuation(parent_renderer)
3819                     continue
3820                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3821                 for isr_content in isr_contents:
3822                     if not isinstance(isr_content, dict):
3823                         continue
3824
3825                     known_renderers = {
3826                         'playlistVideoListRenderer': self._playlist_entries,
3827                         'gridRenderer': self._grid_entries,
3828                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3829                         'backstagePostThreadRenderer': self._post_thread_entries,
3830                         'videoRenderer': lambda x: [self._video_entry(x)],
3831                     }
3832                     for key, renderer in isr_content.items():
3833                         if key not in known_renderers:
3834                             continue
3835                         for entry in known_renderers[key](renderer):
3836                             if entry:
3837                                 yield entry
3838                         continuation_list[0] = self._extract_continuation(renderer)
3839                         break
3840
3841                 if not continuation_list[0]:
3842                     continuation_list[0] = self._extract_continuation(is_renderer)
3843
3844             if not continuation_list[0]:
3845                 continuation_list[0] = self._extract_continuation(parent_renderer)
3846
3847         continuation_list = [None]  # Python 2 doesnot support nonlocal
3848         tab_content = try_get(tab, lambda x: x['content'], dict)
3849         if not tab_content:
3850             return
3851         parent_renderer = (
3852             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3853             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3854         for entry in extract_entries(parent_renderer):
3855             yield entry
3856         continuation = continuation_list[0]
3857         visitor_data = None
3858
3859         for page_num in itertools.count(1):
3860             if not continuation:
3861                 break
3862             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3863             response = self._extract_response(
3864                 item_id='%s page %s' % (item_id, page_num),
3865                 query=continuation, headers=headers, ytcfg=ytcfg,
3866                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3867
3868             if not response:
3869                 break
3870             visitor_data = try_get(
3871                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3872
3873             known_continuation_renderers = {
3874                 'playlistVideoListContinuation': self._playlist_entries,
3875                 'gridContinuation': self._grid_entries,
3876                 'itemSectionContinuation': self._post_thread_continuation_entries,
3877                 'sectionListContinuation': extract_entries,  # for feeds
3878             }
3879             continuation_contents = try_get(
3880                 response, lambda x: x['continuationContents'], dict) or {}
3881             continuation_renderer = None
3882             for key, value in continuation_contents.items():
3883                 if key not in known_continuation_renderers:
3884                     continue
3885                 continuation_renderer = value
3886                 continuation_list = [None]
3887                 for entry in known_continuation_renderers[key](continuation_renderer):
3888                     yield entry
3889                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3890                 break
3891             if continuation_renderer:
3892                 continue
3893
3894             known_renderers = {
3895                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3896                 'gridVideoRenderer': (self._grid_entries, 'items'),
3897                 'gridChannelRenderer': (self._grid_entries, 'items'),
3898                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3899                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3900                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3901                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3902             }
3903             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3904             continuation_items = try_get(
3905                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3906             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3907             video_items_renderer = None
3908             for key, value in continuation_item.items():
3909                 if key not in known_renderers:
3910                     continue
3911                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3912                 continuation_list = [None]
3913                 for entry in known_renderers[key][0](video_items_renderer):
3914                     yield entry
3915                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3916                 break
3917             if video_items_renderer:
3918                 continue
3919             break
3920
3921     @staticmethod
3922     def _extract_selected_tab(tabs):
3923         for tab in tabs:
3924             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3925             if renderer.get('selected') is True:
3926                 return renderer
3927         else:
3928             raise ExtractorError('Unable to find selected tab')
3929
3930     @classmethod
3931     def _extract_uploader(cls, data):
3932         uploader = {}
3933         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3934         owner = try_get(
3935             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3936         if owner:
3937             uploader['uploader'] = owner.get('text')
3938             uploader['uploader_id'] = try_get(
3939                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3940             uploader['uploader_url'] = urljoin(
3941                 'https://www.youtube.com/',
3942                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3943         return {k: v for k, v in uploader.items() if v is not None}
3944
3945     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3946         playlist_id = title = description = channel_url = channel_name = channel_id = None
3947         thumbnails_list = tags = []
3948
3949         selected_tab = self._extract_selected_tab(tabs)
3950         renderer = try_get(
3951             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3952         if renderer:
3953             channel_name = renderer.get('title')
3954             channel_url = renderer.get('channelUrl')
3955             channel_id = renderer.get('externalId')
3956         else:
3957             renderer = try_get(
3958                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3959
3960         if renderer:
3961             title = renderer.get('title')
3962             description = renderer.get('description', '')
3963             playlist_id = channel_id
3964             tags = renderer.get('keywords', '').split()
3965             thumbnails_list = (
3966                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3967                 or try_get(
3968                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3969                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3970                     list)
3971                 or [])
3972
3973         thumbnails = []
3974         for t in thumbnails_list:
3975             if not isinstance(t, dict):
3976                 continue
3977             thumbnail_url = url_or_none(t.get('url'))
3978             if not thumbnail_url:
3979                 continue
3980             thumbnails.append({
3981                 'url': thumbnail_url,
3982                 'width': int_or_none(t.get('width')),
3983                 'height': int_or_none(t.get('height')),
3984             })
3985         if playlist_id is None:
3986             playlist_id = item_id
3987         if title is None:
3988             title = (
3989                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3990                 or playlist_id)
3991         title += format_field(selected_tab, 'title', ' - %s')
3992         title += format_field(selected_tab, 'expandedText', ' - %s')
3993         metadata = {
3994             'playlist_id': playlist_id,
3995             'playlist_title': title,
3996             'playlist_description': description,
3997             'uploader': channel_name,
3998             'uploader_id': channel_id,
3999             'uploader_url': channel_url,
4000             'thumbnails': thumbnails,
4001             'tags': tags,
4002         }
4003         availability = self._extract_availability(data)
4004         if availability:
4005             metadata['availability'] = availability
4006         if not channel_id:
4007             metadata.update(self._extract_uploader(data))
4008         metadata.update({
4009             'channel': metadata['uploader'],
4010             'channel_id': metadata['uploader_id'],
4011             'channel_url': metadata['uploader_url']})
4012         ytcfg = self.extract_ytcfg(item_id, webpage)
4013         return self.playlist_result(
4014             self._entries(
4015                 selected_tab, playlist_id,
4016                 self._extract_identity_token(webpage, item_id),
4017                 self._extract_account_syncid(ytcfg, data), ytcfg),
4018             **metadata)
4019
4020     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4021         first_id = last_id = None
4022         ytcfg = self.extract_ytcfg(playlist_id, webpage)
4023         headers = self.generate_api_headers(
4024             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4025             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
4026         for page_num in itertools.count(1):
4027             videos = list(self._playlist_entries(playlist))
4028             if not videos:
4029                 return
4030             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4031             if start >= len(videos):
4032                 return
4033             for video in videos[start:]:
4034                 if video['id'] == first_id:
4035                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4036                     return
4037                 yield video
4038             first_id = first_id or videos[0]['id']
4039             last_id = videos[-1]['id']
4040             watch_endpoint = try_get(
4041                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4042             query = {
4043                 'playlistId': playlist_id,
4044                 'videoId': watch_endpoint.get('videoId') or last_id,
4045                 'index': watch_endpoint.get('index') or len(videos),
4046                 'params': watch_endpoint.get('params') or 'OAE%3D'
4047             }
4048             response = self._extract_response(
4049                 item_id='%s page %d' % (playlist_id, page_num),
4050                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4051                 check_get_keys='contents'
4052             )
4053             playlist = try_get(
4054                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4055
4056     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4057         title = playlist.get('title') or try_get(
4058             data, lambda x: x['titleText']['simpleText'], compat_str)
4059         playlist_id = playlist.get('playlistId') or item_id
4060
4061         # Delegating everything except mix playlists to regular tab-based playlist URL
4062         playlist_url = urljoin(url, try_get(
4063             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4064             compat_str))
4065         if playlist_url and playlist_url != url:
4066             return self.url_result(
4067                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4068                 video_title=title)
4069
4070         return self.playlist_result(
4071             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4072             playlist_id=playlist_id, playlist_title=title)
4073
4074     def _extract_availability(self, data):
4075         """
4076         Gets the availability of a given playlist/tab.
4077         Note: Unless YouTube tells us explicitly, we do not assume it is public
4078         @param data: response
4079         """
4080         is_private = is_unlisted = None
4081         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4082         badge_labels = self._extract_badges(renderer)
4083
4084         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4085         privacy_dropdown_entries = try_get(
4086             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4087         for renderer_dict in privacy_dropdown_entries:
4088             is_selected = try_get(
4089                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4090             if not is_selected:
4091                 continue
4092             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4093             if label:
4094                 badge_labels.add(label.lower())
4095                 break
4096
4097         for badge_label in badge_labels:
4098             if badge_label == 'unlisted':
4099                 is_unlisted = True
4100             elif badge_label == 'private':
4101                 is_private = True
4102             elif badge_label == 'public':
4103                 is_unlisted = is_private = False
4104         return self._availability(is_private, False, False, False, is_unlisted)
4105
4106     @staticmethod
4107     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4108         sidebar_renderer = try_get(
4109             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4110         for item in sidebar_renderer:
4111             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4112             if renderer:
4113                 return renderer
4114
4115     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4116         """
4117         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4118         """
4119         browse_id = params = None
4120         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4121         if not renderer:
4122             return
4123         menu_renderer = try_get(
4124             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4125         for menu_item in menu_renderer:
4126             if not isinstance(menu_item, dict):
4127                 continue
4128             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4129             text = try_get(
4130                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4131             if not text or text.lower() != 'show unavailable videos':
4132                 continue
4133             browse_endpoint = try_get(
4134                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4135             browse_id = browse_endpoint.get('browseId')
4136             params = browse_endpoint.get('params')
4137             break
4138
4139         ytcfg = self.extract_ytcfg(item_id, webpage)
4140         headers = self.generate_api_headers(
4141             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4142             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4143             visitor_data=try_get(
4144                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4145         query = {
4146             'params': params or 'wgYCCAA=',
4147             'browseId': browse_id or 'VL%s' % item_id
4148         }
4149         return self._extract_response(
4150             item_id=item_id, headers=headers, query=query,
4151             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4152             note='Downloading API JSON with unavailable videos')
4153
4154     def _extract_webpage(self, url, item_id):
4155         retries = self.get_param('extractor_retries', 3)
4156         count = -1
4157         last_error = 'Incomplete yt initial data recieved'
4158         while count < retries:
4159             count += 1
4160             # Sometimes youtube returns a webpage with incomplete ytInitialData
4161             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4162             if count:
4163                 self.report_warning('%s. Retrying ...' % last_error)
4164             webpage = self._download_webpage(
4165                 url, item_id,
4166                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4167             data = self.extract_yt_initial_data(item_id, webpage)
4168             if data.get('contents') or data.get('currentVideoEndpoint'):
4169                 break
4170             # Extract alerts here only when there is error
4171             self._extract_and_report_alerts(data)
4172             if count >= retries:
4173                 raise ExtractorError(last_error)
4174         return webpage, data
4175
4176     @staticmethod
4177     def _smuggle_data(entries, data):
4178         for entry in entries:
4179             if data:
4180                 entry['url'] = smuggle_url(entry['url'], data)
4181             yield entry
4182
4183     def _real_extract(self, url):
4184         url, smuggled_data = unsmuggle_url(url, {})
4185         if self.is_music_url(url):
4186             smuggled_data['is_music_url'] = True
4187         info_dict = self.__real_extract(url, smuggled_data)
4188         if info_dict.get('entries'):
4189             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4190         return info_dict
4191
4192     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4193
4194     def __real_extract(self, url, smuggled_data):
4195         item_id = self._match_id(url)
4196         url = compat_urlparse.urlunparse(
4197             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4198         compat_opts = self.get_param('compat_opts', [])
4199
4200         def get_mobj(url):
4201             mobj = self._url_re.match(url).groupdict()
4202             mobj.update((k, '') for k, v in mobj.items() if v is None)
4203             return mobj
4204
4205         mobj = get_mobj(url)
4206         # Youtube returns incomplete data if tabname is not lower case
4207         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4208
4209         if is_channel:
4210             if smuggled_data.get('is_music_url'):
4211                 if item_id[:2] == 'VL':
4212                     # Youtube music VL channels have an equivalent playlist
4213                     item_id = item_id[2:]
4214                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4215                 elif item_id[:2] == 'MP':
4216                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4217                     item_id = self._search_regex(
4218                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4219                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4220                         'playlist id')
4221                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4222                 elif mobj['channel_type'] == 'browse':
4223                     # Youtube music /browse/ should be changed to /channel/
4224                     pre = 'https://www.youtube.com/channel/%s' % item_id
4225         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4226             # Home URLs should redirect to /videos/
4227             self.report_warning(
4228                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4229                 'To download only the videos in the home page, add a "/featured" to the URL')
4230             tab = '/videos'
4231
4232         url = ''.join((pre, tab, post))
4233         mobj = get_mobj(url)
4234
4235         # Handle both video/playlist URLs
4236         qs = parse_qs(url)
4237         video_id = qs.get('v', [None])[0]
4238         playlist_id = qs.get('list', [None])[0]
4239
4240         if not video_id and mobj['not_channel'].startswith('watch'):
4241             if not playlist_id:
4242                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4243                 raise ExtractorError('Unable to recognize tab page')
4244             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4245             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4246             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4247             mobj = get_mobj(url)
4248
4249         if video_id and playlist_id:
4250             if self.get_param('noplaylist'):
4251                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4252                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4253             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4254
4255         webpage, data = self._extract_webpage(url, item_id)
4256
4257         tabs = try_get(
4258             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4259         if tabs:
4260             selected_tab = self._extract_selected_tab(tabs)
4261             tab_name = selected_tab.get('title', '')
4262             if 'no-youtube-channel-redirect' not in compat_opts:
4263                 if mobj['tab'] == '/live':
4264                     # Live tab should have redirected to the video
4265                     raise ExtractorError('The channel is not currently live', expected=True)
4266                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4267                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4268                         # Topic channels don't have /videos. Use the equivalent playlist instead
4269                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4270                         pl_id = 'UU%s' % item_id[2:]
4271                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4272                         try:
4273                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4274                             for alert_type, alert_message in self._extract_alerts(pl_data):
4275                                 if alert_type == 'error':
4276                                     raise ExtractorError('Youtube said: %s' % alert_message)
4277                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4278                         except ExtractorError:
4279                             self.report_warning('The playlist gave error. Falling back to channel URL')
4280                     else:
4281                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4282
4283         self.write_debug('Final URL: %s' % url)
4284
4285         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4286         if 'no-youtube-unavailable-videos' not in compat_opts:
4287             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4288         self._extract_and_report_alerts(data)
4289         tabs = try_get(
4290             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4291         if tabs:
4292             return self._extract_from_tabs(item_id, webpage, data, tabs)
4293
4294         playlist = try_get(
4295             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4296         if playlist:
4297             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4298
4299         video_id = try_get(
4300             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4301             compat_str) or video_id
4302         if video_id:
4303             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4304                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4305             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4306
4307         raise ExtractorError('Unable to recognize tab page')
4308
4309
4310 class YoutubePlaylistIE(InfoExtractor):
4311     IE_DESC = 'YouTube.com playlists'
4312     _VALID_URL = r'''(?x)(?:
4313                         (?:https?://)?
4314                         (?:\w+\.)?
4315                         (?:
4316                             (?:
4317                                 youtube(?:kids)?\.com|
4318                                 invidio\.us
4319                             )
4320                             /.*?\?.*?\blist=
4321                         )?
4322                         (?P<id>%(playlist_id)s)
4323                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4324     IE_NAME = 'youtube:playlist'
4325     _TESTS = [{
4326         'note': 'issue #673',
4327         'url': 'PLBB231211A4F62143',
4328         'info_dict': {
4329             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4330             'id': 'PLBB231211A4F62143',
4331             'uploader': 'Wickydoo',
4332             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4333             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4334         },
4335         'playlist_mincount': 29,
4336     }, {
4337         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4338         'info_dict': {
4339             'title': 'YDL_safe_search',
4340             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4341         },
4342         'playlist_count': 2,
4343         'skip': 'This playlist is private',
4344     }, {
4345         'note': 'embedded',
4346         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4347         'playlist_count': 4,
4348         'info_dict': {
4349             'title': 'JODA15',
4350             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4351             'uploader': 'milan',
4352             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4353         }
4354     }, {
4355         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4356         'playlist_mincount': 654,
4357         'info_dict': {
4358             'title': '2018 Chinese New Singles (11/6 updated)',
4359             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4360             'uploader': 'LBK',
4361             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4362             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4363         }
4364     }, {
4365         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4366         'only_matching': True,
4367     }, {
4368         # music album playlist
4369         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4370         'only_matching': True,
4371     }]
4372
4373     @classmethod
4374     def suitable(cls, url):
4375         if YoutubeTabIE.suitable(url):
4376             return False
4377         # Hack for lazy extractors until more generic solution is implemented
4378         # (see #28780)
4379         from .youtube import parse_qs
4380         qs = parse_qs(url)
4381         if qs.get('v', [None])[0]:
4382             return False
4383         return super(YoutubePlaylistIE, cls).suitable(url)
4384
4385     def _real_extract(self, url):
4386         playlist_id = self._match_id(url)
4387         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4388         url = update_url_query(
4389             'https://www.youtube.com/playlist',
4390             parse_qs(url) or {'list': playlist_id})
4391         if is_music_url:
4392             url = smuggle_url(url, {'is_music_url': True})
4393         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4394
4395
4396 class YoutubeYtBeIE(InfoExtractor):
4397     IE_DESC = 'youtu.be'
4398     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4399     _TESTS = [{
4400         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4401         'info_dict': {
4402             'id': 'yeWKywCrFtk',
4403             'ext': 'mp4',
4404             'title': 'Small Scale Baler and Braiding Rugs',
4405             'uploader': 'Backus-Page House Museum',
4406             'uploader_id': 'backuspagemuseum',
4407             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4408             'upload_date': '20161008',
4409             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4410             'categories': ['Nonprofits & Activism'],
4411             'tags': list,
4412             'like_count': int,
4413             'dislike_count': int,
4414         },
4415         'params': {
4416             'noplaylist': True,
4417             'skip_download': True,
4418         },
4419     }, {
4420         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4421         'only_matching': True,
4422     }]
4423
4424     def _real_extract(self, url):
4425         mobj = self._match_valid_url(url)
4426         video_id = mobj.group('id')
4427         playlist_id = mobj.group('playlist_id')
4428         return self.url_result(
4429             update_url_query('https://www.youtube.com/watch', {
4430                 'v': video_id,
4431                 'list': playlist_id,
4432                 'feature': 'youtu.be',
4433             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4434
4435
4436 class YoutubeYtUserIE(InfoExtractor):
4437     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4438     _VALID_URL = r'ytuser:(?P<id>.+)'
4439     _TESTS = [{
4440         'url': 'ytuser:phihag',
4441         'only_matching': True,
4442     }]
4443
4444     def _real_extract(self, url):
4445         user_id = self._match_id(url)
4446         return self.url_result(
4447             'https://www.youtube.com/user/%s' % user_id,
4448             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4449
4450
4451 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4452     IE_NAME = 'youtube:favorites'
4453     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4454     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4455     _LOGIN_REQUIRED = True
4456     _TESTS = [{
4457         'url': ':ytfav',
4458         'only_matching': True,
4459     }, {
4460         'url': ':ytfavorites',
4461         'only_matching': True,
4462     }]
4463
4464     def _real_extract(self, url):
4465         return self.url_result(
4466             'https://www.youtube.com/playlist?list=LL',
4467             ie=YoutubeTabIE.ie_key())
4468
4469
4470 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4471     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4472     # there doesn't appear to be a real limit, for example if you search for
4473     # 'python' you get more than 8.000.000 results
4474     _MAX_RESULTS = float('inf')
4475     IE_NAME = 'youtube:search'
4476     _SEARCH_KEY = 'ytsearch'
4477     _SEARCH_PARAMS = None
4478     _TESTS = []
4479
4480     def _entries(self, query, n):
4481         data = {'query': query}
4482         if self._SEARCH_PARAMS:
4483             data['params'] = self._SEARCH_PARAMS
4484         total = 0
4485         continuation = {}
4486         for page_num in itertools.count(1):
4487             data.update(continuation)
4488             search = self._extract_response(
4489                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4490                 check_get_keys=('contents', 'onResponseReceivedCommands')
4491             )
4492             if not search:
4493                 break
4494             slr_contents = try_get(
4495                 search,
4496                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4497                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4498                 list)
4499             if not slr_contents:
4500                 break
4501
4502             # Youtube sometimes adds promoted content to searches,
4503             # changing the index location of videos and token.
4504             # So we search through all entries till we find them.
4505             continuation = None
4506             for slr_content in slr_contents:
4507                 if not continuation:
4508                     continuation = self._extract_continuation({'contents': [slr_content]})
4509
4510                 isr_contents = try_get(
4511                     slr_content,
4512                     lambda x: x['itemSectionRenderer']['contents'],
4513                     list)
4514                 if not isr_contents:
4515                     continue
4516                 for content in isr_contents:
4517                     if not isinstance(content, dict):
4518                         continue
4519                     video = content.get('videoRenderer')
4520                     if not isinstance(video, dict):
4521                         continue
4522                     video_id = video.get('videoId')
4523                     if not video_id:
4524                         continue
4525
4526                     yield self._extract_video(video)
4527                     total += 1
4528                     if total == n:
4529                         return
4530
4531             if not continuation:
4532                 break
4533
4534     def _get_n_results(self, query, n):
4535         """Get a specified number of results for a query"""
4536         return self.playlist_result(self._entries(query, n), query, query)
4537
4538
4539 class YoutubeSearchDateIE(YoutubeSearchIE):
4540     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4541     _SEARCH_KEY = 'ytsearchdate'
4542     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4543     _SEARCH_PARAMS = 'CAI%3D'
4544
4545
4546 class YoutubeSearchURLIE(YoutubeSearchIE):
4547     IE_DESC = 'YouTube.com search URLs'
4548     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4549     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4550     # _MAX_RESULTS = 100
4551     _TESTS = [{
4552         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4553         'playlist_mincount': 5,
4554         'info_dict': {
4555             'id': 'youtube-dl test video',
4556             'title': 'youtube-dl test video',
4557         }
4558     }, {
4559         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4560         'only_matching': True,
4561     }]
4562
4563     @classmethod
4564     def _make_valid_url(cls):
4565         return cls._VALID_URL
4566
4567     def _real_extract(self, url):
4568         qs = parse_qs(url)
4569         query = (qs.get('search_query') or qs.get('q'))[0]
4570         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4571         return self._get_n_results(query, self._MAX_RESULTS)
4572
4573
4574 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4575     """
4576     Base class for feed extractors
4577     Subclasses must define the _FEED_NAME property.
4578     """
4579     _LOGIN_REQUIRED = True
4580     _TESTS = []
4581
4582     @property
4583     def IE_NAME(self):
4584         return 'youtube:%s' % self._FEED_NAME
4585
4586     def _real_extract(self, url):
4587         return self.url_result(
4588             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4589             ie=YoutubeTabIE.ie_key())
4590
4591
4592 class YoutubeWatchLaterIE(InfoExtractor):
4593     IE_NAME = 'youtube:watchlater'
4594     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4595     _VALID_URL = r':ytwatchlater'
4596     _TESTS = [{
4597         'url': ':ytwatchlater',
4598         'only_matching': True,
4599     }]
4600
4601     def _real_extract(self, url):
4602         return self.url_result(
4603             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4604
4605
4606 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4607     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4608     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4609     _FEED_NAME = 'recommended'
4610     _LOGIN_REQUIRED = False
4611     _TESTS = [{
4612         'url': ':ytrec',
4613         'only_matching': True,
4614     }, {
4615         'url': ':ytrecommended',
4616         'only_matching': True,
4617     }, {
4618         'url': 'https://youtube.com',
4619         'only_matching': True,
4620     }]
4621
4622
4623 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4624     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4625     _VALID_URL = r':ytsub(?:scription)?s?'
4626     _FEED_NAME = 'subscriptions'
4627     _TESTS = [{
4628         'url': ':ytsubs',
4629         'only_matching': True,
4630     }, {
4631         'url': ':ytsubscriptions',
4632         'only_matching': True,
4633     }]
4634
4635
4636 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4637     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4638     _VALID_URL = r':ythis(?:tory)?'
4639     _FEED_NAME = 'history'
4640     _TESTS = [{
4641         'url': ':ythistory',
4642         'only_matching': True,
4643     }]
4644
4645
4646 class YoutubeTruncatedURLIE(InfoExtractor):
4647     IE_NAME = 'youtube:truncated_url'
4648     IE_DESC = False  # Do not list
4649     _VALID_URL = r'''(?x)
4650         (?:https?://)?
4651         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4652         (?:watch\?(?:
4653             feature=[a-z_]+|
4654             annotation_id=annotation_[^&]+|
4655             x-yt-cl=[0-9]+|
4656             hl=[^&]*|
4657             t=[0-9]+
4658         )?
4659         |
4660             attribution_link\?a=[^&]+
4661         )
4662         $
4663     '''
4664
4665     _TESTS = [{
4666         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4667         'only_matching': True,
4668     }, {
4669         'url': 'https://www.youtube.com/watch?',
4670         'only_matching': True,
4671     }, {
4672         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4673         'only_matching': True,
4674     }, {
4675         'url': 'https://www.youtube.com/watch?feature=foo',
4676         'only_matching': True,
4677     }, {
4678         'url': 'https://www.youtube.com/watch?hl=en-GB',
4679         'only_matching': True,
4680     }, {
4681         'url': 'https://www.youtube.com/watch?t=2372',
4682         'only_matching': True,
4683     }]
4684
4685     def _real_extract(self, url):
4686         raise ExtractorError(
4687             'Did you forget to quote the URL? Remember that & is a meta '
4688             'character in most shells, so you want to put the URL in quotes, '
4689             'like  youtube-dl '
4690             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4691             ' or simply  youtube-dl BaW_jenozKc  .',
4692             expected=True)
4693
4694
4695 class YoutubeTruncatedIDIE(InfoExtractor):
4696     IE_NAME = 'youtube:truncated_id'
4697     IE_DESC = False  # Do not list
4698     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4699
4700     _TESTS = [{
4701         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4702         'only_matching': True,
4703     }]
4704
4705     def _real_extract(self, url):
4706         video_id = self._match_id(url)
4707         raise ExtractorError(
4708             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4709             expected=True)