yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     mimetype2ext,
  42     network_exceptions,
  43     orderedSet,
  44     parse_codecs,
  45     parse_count,
  46     parse_duration,
  47     parse_iso8601,
  48     qualities,
  49     remove_start,
  50     smuggle_url,
  51     str_or_none,
  52     str_to_int,
  53     traverse_obj,
  54     try_get,
  55     unescapeHTML,
  56     unified_strdate,
  57     unsmuggle_url,
  58     update_url_query,
  59     url_or_none,
  60     urlencode_postdata,
  61     urljoin,
  62     variadic,
  63 )
  64
  65
  66 def parse_qs(url):
  67     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  68
  69
  70 # any clients starting with _ cannot be explicity requested by the user
  71 INNERTUBE_CLIENTS = {
  72     'web': {
  73         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  74         'INNERTUBE_CONTEXT': {
  75             'client': {
  76                 'clientName': 'WEB',
  77                 'clientVersion': '2.20210622.10.00',
  78             }
  79         },
  80         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  81     },
  82     'web_embedded': {
  83         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  84         'INNERTUBE_CONTEXT': {
  85             'client': {
  86                 'clientName': 'WEB_EMBEDDED_PLAYER',
  87                 'clientVersion': '1.20210620.0.1',
  88             },
  89         },
  90         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  91     },
  92     'web_music': {
  93         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  94         'INNERTUBE_HOST': 'music.youtube.com',
  95         'INNERTUBE_CONTEXT': {
  96             'client': {
  97                 'clientName': 'WEB_REMIX',
  98                 'clientVersion': '1.20210621.00.00',
  99             }
 100         },
 101         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 102     },
 103     'web_creator': {
 104         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 105         'INNERTUBE_CONTEXT': {
 106             'client': {
 107                 'clientName': 'WEB_CREATOR',
 108                 'clientVersion': '1.20210621.00.00',
 109             }
 110         },
 111         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 112     },
 113     'android': {
 114         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 115         'INNERTUBE_CONTEXT': {
 116             'client': {
 117                 'clientName': 'ANDROID',
 118                 'clientVersion': '16.20',
 119             }
 120         },
 121         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 122     },
 123     'android_embedded': {
 124         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 125         'INNERTUBE_CONTEXT': {
 126             'client': {
 127                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 128                 'clientVersion': '16.20',
 129             },
 130         },
 131         'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 132     },
 133     'android_music': {
 134         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 135         'INNERTUBE_HOST': 'music.youtube.com',
 136         'INNERTUBE_CONTEXT': {
 137             'client': {
 138                 'clientName': 'ANDROID_MUSIC',
 139                 'clientVersion': '4.32',
 140             }
 141         },
 142         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 143     },
 144     'android_creator': {
 145         'INNERTUBE_CONTEXT': {
 146             'client': {
 147                 'clientName': 'ANDROID_CREATOR',
 148                 'clientVersion': '21.24.100',
 149             },
 150         },
 151         'INNERTUBE_CONTEXT_CLIENT_NAME': 14
 152     },
 153     # ios has HLS live streams
 154     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 155     'ios': {
 156         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 157         'INNERTUBE_CONTEXT': {
 158             'client': {
 159                 'clientName': 'IOS',
 160                 'clientVersion': '16.20',
 161             }
 162         },
 163         'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 164     },
 165     'ios_embedded': {
 166         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 167         'INNERTUBE_CONTEXT': {
 168             'client': {
 169                 'clientName': 'IOS_MESSAGES_EXTENSION',
 170                 'clientVersion': '16.20',
 171             },
 172         },
 173         'INNERTUBE_CONTEXT_CLIENT_NAME': 66
 174     },
 175     'ios_music': {
 176         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 177         'INNERTUBE_HOST': 'music.youtube.com',
 178         'INNERTUBE_CONTEXT': {
 179             'client': {
 180                 'clientName': 'IOS_MUSIC',
 181                 'clientVersion': '4.32',
 182             },
 183         },
 184         'INNERTUBE_CONTEXT_CLIENT_NAME': 26
 185     },
 186     'ios_creator': {
 187         'INNERTUBE_CONTEXT': {
 188             'client': {
 189                 'clientName': 'IOS_CREATOR',
 190                 'clientVersion': '21.24.100',
 191             },
 192         },
 193         'INNERTUBE_CONTEXT_CLIENT_NAME': 15
 194     },
 195     # mweb has 'ultralow' formats
 196     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 197     'mweb': {
 198         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 199         'INNERTUBE_CONTEXT': {
 200             'client': {
 201                 'clientName': 'MWEB',
 202                 'clientVersion': '2.20210721.07.00',
 203             }
 204         },
 205         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 206     },
 207 }
 208
 209
 210 def build_innertube_clients():
 211     third_party = {
 212         'embedUrl': 'https://google.com',  # Can be any valid URL
 213     }
 214     base_clients = ('android', 'web', 'ios', 'mweb')
 215     priority = qualities(base_clients[::-1])
 216
 217     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 218         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 219         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 220         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 221         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 222
 223         if client in base_clients:
 224             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 225             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 226             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 227             agegate_ytcfg['priority'] -= 1
 228         elif client.endswith('_embedded'):
 229             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 230             ytcfg['priority'] -= 2
 231         else:
 232             ytcfg['priority'] -= 3
 233
 234
 235 build_innertube_clients()
 236
 237
 238 class YoutubeBaseInfoExtractor(InfoExtractor):
 239     """Provide base functions for Youtube extractors"""
 240
 241     _RESERVED_NAMES = (
 242         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
 243         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 244         r'browse|oembed|get_video_info|iframe_api|s/player|'
 245         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 246
 247     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 248
 249     _NETRC_MACHINE = 'youtube'
 250
 251     # If True it will raise an error if no login info is provided
 252     _LOGIN_REQUIRED = False
 253
 254     r'''  # Unused since login is broken
 255     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 256     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
 257
 258     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
 259     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
 260     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 261     '''
 262
 263     def _login(self):
 264         """
 265         Attempt to log in to YouTube.
 266         True is returned if successful or skipped.
 267         False is returned if login failed.
 268
 269         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 270         """
 271
 272         def warn(message):
 273             self.report_warning(message)
 274
 275         # username+password login is broken
 276         if (self._LOGIN_REQUIRED
 277                 and self.get_param('cookiefile') is None
 278                 and self.get_param('cookiesfrombrowser') is None):
 279             self.raise_login_required(
 280                 'Login details are needed to download this content', method='cookies')
 281         username, password = self._get_login_info()
 282         if username:
 283             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 284         return
 285
 286         # Everything below this is broken!
 287         r'''
 288         # No authentication to be performed
 289         if username is None:
 290             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 291                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 292             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 293             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 294             return True
 295
 296         login_page = self._download_webpage(
 297             self._LOGIN_URL, None,
 298             note='Downloading login page',
 299             errnote='unable to fetch login page', fatal=False)
 300         if login_page is False:
 301             return
 302
 303         login_form = self._hidden_inputs(login_page)
 304
 305         def req(url, f_req, note, errnote):
 306             data = login_form.copy()
 307             data.update({
 308                 'pstMsg': 1,
 309                 'checkConnection': 'youtube',
 310                 'checkedDomains': 'youtube',
 311                 'hl': 'en',
 312                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 313                 'f.req': json.dumps(f_req),
 314                 'flowName': 'GlifWebSignIn',
 315                 'flowEntry': 'ServiceLogin',
 316                 # TODO: reverse actual botguard identifier generation algo
 317                 'bgRequest': '["identifier",""]',
 318             })
 319             return self._download_json(
 320                 url, None, note=note, errnote=errnote,
 321                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 322                 fatal=False,
 323                 data=urlencode_postdata(data), headers={
 324                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 325                     'Google-Accounts-XSRF': 1,
 326                 })
 327
 328         lookup_req = [
 329             username,
 330             None, [], None, 'US', None, None, 2, False, True,
 331             [
 332                 None, None,
 333                 [2, 1, None, 1,
 334                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 335                  None, [], 4],
 336                 1, [None, None, []], None, None, None, True
 337             ],
 338             username,
 339         ]
 340
 341         lookup_results = req(
 342             self._LOOKUP_URL, lookup_req,
 343             'Looking up account info', 'Unable to look up account info')
 344
 345         if lookup_results is False:
 346             return False
 347
 348         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 349         if not user_hash:
 350             warn('Unable to extract user hash')
 351             return False
 352
 353         challenge_req = [
 354             user_hash,
 355             None, 1, None, [1, None, None, None, [password, None, True]],
 356             [
 357                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 358                 1, [None, None, []], None, None, None, True
 359             ]]
 360
 361         challenge_results = req(
 362             self._CHALLENGE_URL, challenge_req,
 363             'Logging in', 'Unable to log in')
 364
 365         if challenge_results is False:
 366             return
 367
 368         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 369         if login_res:
 370             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 371             warn(
 372                 'Unable to login: %s' % 'Invalid password'
 373                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 374             return False
 375
 376         res = try_get(challenge_results, lambda x: x[0][-1], list)
 377         if not res:
 378             warn('Unable to extract result entry')
 379             return False
 380
 381         login_challenge = try_get(res, lambda x: x[0][0], list)
 382         if login_challenge:
 383             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 384             if challenge_str == 'TWO_STEP_VERIFICATION':
 385                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 386                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 387                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 388                 if status == 'QUOTA_EXCEEDED':
 389                     warn('Exceeded the limit of TFA codes, try later')
 390                     return False
 391
 392                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 393                 if not tl:
 394                     warn('Unable to extract TL')
 395                     return False
 396
 397                 tfa_code = self._get_tfa_info('2-step verification code')
 398
 399                 if not tfa_code:
 400                     warn(
 401                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 402                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 403                     return False
 404
 405                 tfa_code = remove_start(tfa_code, 'G-')
 406
 407                 tfa_req = [
 408                     user_hash, None, 2, None,
 409                     [
 410                         9, None, None, None, None, None, None, None,
 411                         [None, tfa_code, True, 2]
 412                     ]]
 413
 414                 tfa_results = req(
 415                     self._TFA_URL.format(tl), tfa_req,
 416                     'Submitting TFA code', 'Unable to submit TFA code')
 417
 418                 if tfa_results is False:
 419                     return False
 420
 421                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 422                 if tfa_res:
 423                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 424                     warn(
 425                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 426                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 427                     return False
 428
 429                 check_cookie_url = try_get(
 430                     tfa_results, lambda x: x[0][-1][2], compat_str)
 431             else:
 432                 CHALLENGES = {
 433                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 434                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 435                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 436                 }
 437                 challenge = CHALLENGES.get(
 438                     challenge_str,
 439                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 440                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 441                 return False
 442         else:
 443             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 444
 445         if not check_cookie_url:
 446             warn('Unable to extract CheckCookie URL')
 447             return False
 448
 449         check_cookie_results = self._download_webpage(
 450             check_cookie_url, None, 'Checking cookie', fatal=False)
 451
 452         if check_cookie_results is False:
 453             return False
 454
 455         if 'https://myaccount.google.com/' not in check_cookie_results:
 456             warn('Unable to log in')
 457             return False
 458
 459         return True
 460         '''
 461
 462     def _initialize_consent(self):
 463         cookies = self._get_cookies('https://www.youtube.com/')
 464         if cookies.get('__Secure-3PSID'):
 465             return
 466         consent_id = None
 467         consent = cookies.get('CONSENT')
 468         if consent:
 469             if 'YES' in consent.value:
 470                 return
 471             consent_id = self._search_regex(
 472                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 473         if not consent_id:
 474             consent_id = random.randint(100, 999)
 475         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 476
 477     def _real_initialize(self):
 478         self._initialize_consent()
 479         if self._downloader is None:
 480             return
 481         if not self._login():
 482             return
 483
 484     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 485     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 486     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 487
 488     def _get_default_ytcfg(self, client='web'):
 489         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 490
 491     def _get_innertube_host(self, client='web'):
 492         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 493
 494     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 495         # try_get but with fallback to default ytcfg client values when present
 496         _func = lambda y: try_get(y, getter, expected_type)
 497         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 498
 499     def _extract_client_name(self, ytcfg, default_client='web'):
 500         return self._ytcfg_get_safe(
 501             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 502                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 503
 504     @staticmethod
 505     def _extract_session_index(*data):
 506         for ytcfg in data:
 507             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 508             if session_index is not None:
 509                 return session_index
 510
 511     def _extract_client_version(self, ytcfg, default_client='web'):
 512         return self._ytcfg_get_safe(
 513             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 514                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 515
 516     def _extract_api_key(self, ytcfg=None, default_client='web'):
 517         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 518
 519     def _extract_context(self, ytcfg=None, default_client='web'):
 520         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 521         context = _get_context(ytcfg)
 522         if context:
 523             return context
 524
 525         context = _get_context(self._get_default_ytcfg(default_client))
 526         if not ytcfg:
 527             return context
 528
 529         # Recreate the client context (required)
 530         context['client'].update({
 531             'clientVersion': self._extract_client_version(ytcfg, default_client),
 532             'clientName': self._extract_client_name(ytcfg, default_client),
 533         })
 534         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 535         if visitor_data:
 536             context['client']['visitorData'] = visitor_data
 537         return context
 538
 539     _SAPISID = None
 540
 541     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 542         time_now = round(time.time())
 543         if self._SAPISID is None:
 544             yt_cookies = self._get_cookies('https://www.youtube.com')
 545             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 546             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 547             sapisid_cookie = dict_get(
 548                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 549             if sapisid_cookie and sapisid_cookie.value:
 550                 self._SAPISID = sapisid_cookie.value
 551                 self.write_debug('Extracted SAPISID cookie')
 552                 # SAPISID cookie is required if not already present
 553                 if not yt_cookies.get('SAPISID'):
 554                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 555                     self._set_cookie(
 556                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 557             else:
 558                 self._SAPISID = False
 559         if not self._SAPISID:
 560             return None
 561         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 562         sapisidhash = hashlib.sha1(
 563             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 564         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 565
 566     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 567                   note='Downloading API JSON', errnote='Unable to download API page',
 568                   context=None, api_key=None, api_hostname=None, default_client='web'):
 569
 570         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 571         data.update(query)
 572         real_headers = self.generate_api_headers(default_client=default_client)
 573         real_headers.update({'content-type': 'application/json'})
 574         if headers:
 575             real_headers.update(headers)
 576         return self._download_json(
 577             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 578             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 579             data=json.dumps(data).encode('utf8'), headers=real_headers,
 580             query={'key': api_key or self._extract_api_key()})
 581
 582     def extract_yt_initial_data(self, video_id, webpage):
 583         return self._parse_json(
 584             self._search_regex(
 585                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 586                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 587             video_id)
 588
 589     def _extract_identity_token(self, webpage, item_id):
 590         if not webpage:
 591             return None
 592         ytcfg = self.extract_ytcfg(item_id, webpage)
 593         if ytcfg:
 594             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 595             if token:
 596                 return token
 597         return self._search_regex(
 598             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 599             'identity token', default=None)
 600
 601     @staticmethod
 602     def _extract_account_syncid(*args):
 603         """
 604         Extract syncId required to download private playlists of secondary channels
 605         @params response and/or ytcfg
 606         """
 607         for data in args:
 608             # ytcfg includes channel_syncid if on secondary channel
 609             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 610             if delegated_sid:
 611                 return delegated_sid
 612             sync_ids = (try_get(
 613                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 614                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 615             if len(sync_ids) >= 2 and sync_ids[1]:
 616                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 617                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 618                 return sync_ids[0]
 619
 620     def extract_ytcfg(self, video_id, webpage):
 621         if not webpage:
 622             return {}
 623         return self._parse_json(
 624             self._search_regex(
 625                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 626                 default='{}'), video_id, fatal=False) or {}
 627
 628     def generate_api_headers(
 629             self, ytcfg=None, identity_token=None, account_syncid=None,
 630             visitor_data=None, api_hostname=None, default_client='web', session_index=None):
 631         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 632         headers = {
 633             'X-YouTube-Client-Name': compat_str(
 634                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 635             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 636             'Origin': origin
 637         }
 638         if not visitor_data and ytcfg:
 639             visitor_data = try_get(
 640                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 641         if identity_token:
 642             headers['X-Youtube-Identity-Token'] = identity_token
 643         if account_syncid:
 644             headers['X-Goog-PageId'] = account_syncid
 645         if session_index is None and ytcfg:
 646             session_index = self._extract_session_index(ytcfg)
 647         if account_syncid or session_index is not None:
 648             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 649         if visitor_data:
 650             headers['X-Goog-Visitor-Id'] = visitor_data
 651         auth = self._generate_sapisidhash_header(origin)
 652         if auth is not None:
 653             headers['Authorization'] = auth
 654             headers['X-Origin'] = origin
 655         return headers
 656
 657     @staticmethod
 658     def _build_api_continuation_query(continuation, ctp=None):
 659         query = {
 660             'continuation': continuation
 661         }
 662         # TODO: Inconsistency with clickTrackingParams.
 663         # Currently we have a fixed ctp contained within context (from ytcfg)
 664         # and a ctp in root query for continuation.
 665         if ctp:
 666             query['clickTracking'] = {'clickTrackingParams': ctp}
 667         return query
 668
 669     @classmethod
 670     def _extract_next_continuation_data(cls, renderer):
 671         next_continuation = try_get(
 672             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 673                        lambda x: x['continuation']['reloadContinuationData']), dict)
 674         if not next_continuation:
 675             return
 676         continuation = next_continuation.get('continuation')
 677         if not continuation:
 678             return
 679         ctp = next_continuation.get('clickTrackingParams')
 680         return cls._build_api_continuation_query(continuation, ctp)
 681
 682     @classmethod
 683     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 684         if isinstance(continuation_ep, dict):
 685             continuation = try_get(
 686                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 687             if not continuation:
 688                 return
 689             ctp = continuation_ep.get('clickTrackingParams')
 690             return cls._build_api_continuation_query(continuation, ctp)
 691
 692     @classmethod
 693     def _extract_continuation(cls, renderer):
 694         next_continuation = cls._extract_next_continuation_data(renderer)
 695         if next_continuation:
 696             return next_continuation
 697
 698         contents = []
 699         for key in ('contents', 'items'):
 700             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 701
 702         for content in contents:
 703             if not isinstance(content, dict):
 704                 continue
 705             continuation_ep = try_get(
 706                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 707                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 708                 dict)
 709             continuation = cls._extract_continuation_ep_data(continuation_ep)
 710             if continuation:
 711                 return continuation
 712
 713     @classmethod
 714     def _extract_alerts(cls, data):
 715         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 716             if not isinstance(alert_dict, dict):
 717                 continue
 718             for alert in alert_dict.values():
 719                 alert_type = alert.get('type')
 720                 if not alert_type:
 721                     continue
 722                 message = cls._get_text(alert, 'text')
 723                 if message:
 724                     yield alert_type, message
 725
 726     def _report_alerts(self, alerts, expected=True):
 727         errors = []
 728         warnings = []
 729         for alert_type, alert_message in alerts:
 730             if alert_type.lower() == 'error':
 731                 errors.append([alert_type, alert_message])
 732             else:
 733                 warnings.append([alert_type, alert_message])
 734
 735         for alert_type, alert_message in (warnings + errors[:-1]):
 736             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 737         if errors:
 738             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 739
 740     def _extract_and_report_alerts(self, data, *args, **kwargs):
 741         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 742
 743     def _extract_badges(self, renderer: dict):
 744         badges = set()
 745         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 746             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 747             if label:
 748                 badges.add(label.lower())
 749         return badges
 750
 751     @staticmethod
 752     def _get_text(data, *path_list, max_runs=None):
 753         for path in path_list or [None]:
 754             if path is None:
 755                 obj = [data]
 756             else:
 757                 obj = traverse_obj(data, path, default=[])
 758                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 759                     obj = [obj]
 760             for item in obj:
 761                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 762                 if text:
 763                     return text
 764                 runs = try_get(item, lambda x: x['runs'], list) or []
 765                 if not runs and isinstance(item, list):
 766                     runs = item
 767
 768                 runs = runs[:min(len(runs), max_runs or len(runs))]
 769                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 770                 if text:
 771                     return text
 772
 773     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 774                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 775                           default_client='web'):
 776         response = None
 777         last_error = None
 778         count = -1
 779         retries = self.get_param('extractor_retries', 3)
 780         if check_get_keys is None:
 781             check_get_keys = []
 782         while count < retries:
 783             count += 1
 784             if last_error:
 785                 self.report_warning('%s. Retrying ...' % last_error)
 786             try:
 787                 response = self._call_api(
 788                     ep=ep, fatal=True, headers=headers,
 789                     video_id=item_id, query=query,
 790                     context=self._extract_context(ytcfg, default_client),
 791                     api_key=self._extract_api_key(ytcfg, default_client),
 792                     api_hostname=api_hostname, default_client=default_client,
 793                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 794             except ExtractorError as e:
 795                 if isinstance(e.cause, network_exceptions):
 796                     # Downloading page may result in intermittent 5xx HTTP error
 797                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 798                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 799                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 800                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 801                         last_error = error_to_compat_str(e.cause or e)
 802                         if count < retries:
 803                             continue
 804                 if fatal:
 805                     raise
 806                 else:
 807                     self.report_warning(error_to_compat_str(e))
 808                     return
 809
 810             else:
 811                 # Youtube may send alerts if there was an issue with the continuation page
 812                 try:
 813                     self._extract_and_report_alerts(response, expected=False)
 814                 except ExtractorError as e:
 815                     if fatal:
 816                         raise
 817                     self.report_warning(error_to_compat_str(e))
 818                     return
 819                 if not check_get_keys or dict_get(response, check_get_keys):
 820                     break
 821                 # Youtube sometimes sends incomplete data
 822                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 823                 last_error = 'Incomplete data received'
 824                 if count >= retries:
 825                     if fatal:
 826                         raise ExtractorError(last_error)
 827                     else:
 828                         self.report_warning(last_error)
 829                         return
 830         return response
 831
 832     @staticmethod
 833     def is_music_url(url):
 834         return re.match(r'https?://music\.youtube\.com/', url) is not None
 835
 836     def _extract_video(self, renderer):
 837         video_id = renderer.get('videoId')
 838         title = self._get_text(renderer, 'title')
 839         description = self._get_text(renderer, 'descriptionSnippet')
 840         duration = parse_duration(self._get_text(
 841             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 842         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 843         view_count = str_to_int(self._search_regex(
 844             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 845             'view count', default=None))
 846
 847         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 848
 849         return {
 850             '_type': 'url',
 851             'ie_key': YoutubeIE.ie_key(),
 852             'id': video_id,
 853             'url': video_id,
 854             'title': title,
 855             'description': description,
 856             'duration': duration,
 857             'view_count': view_count,
 858             'uploader': uploader,
 859         }
 860
 861
 862 class YoutubeIE(YoutubeBaseInfoExtractor):
 863     IE_DESC = 'YouTube.com'
 864     _INVIDIOUS_SITES = (
 865         # invidious-redirect websites
 866         r'(?:www\.)?redirect\.invidious\.io',
 867         r'(?:(?:www|dev)\.)?invidio\.us',
 868         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 869         r'(?:www\.)?invidious\.pussthecat\.org',
 870         r'(?:www\.)?invidious\.zee\.li',
 871         r'(?:www\.)?invidious\.ethibox\.fr',
 872         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 873         # youtube-dl invidious instances list
 874         r'(?:(?:www|no)\.)?invidiou\.sh',
 875         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 876         r'(?:www\.)?invidious\.kabi\.tk',
 877         r'(?:www\.)?invidious\.mastodon\.host',
 878         r'(?:www\.)?invidious\.zapashcanon\.fr',
 879         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 880         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 881         r'(?:www\.)?invidious\.himiko\.cloud',
 882         r'(?:www\.)?invidious\.reallyancient\.tech',
 883         r'(?:www\.)?invidious\.tube',
 884         r'(?:www\.)?invidiou\.site',
 885         r'(?:www\.)?invidious\.site',
 886         r'(?:www\.)?invidious\.xyz',
 887         r'(?:www\.)?invidious\.nixnet\.xyz',
 888         r'(?:www\.)?invidious\.048596\.xyz',
 889         r'(?:www\.)?invidious\.drycat\.fr',
 890         r'(?:www\.)?inv\.skyn3t\.in',
 891         r'(?:www\.)?tube\.poal\.co',
 892         r'(?:www\.)?tube\.connect\.cafe',
 893         r'(?:www\.)?vid\.wxzm\.sx',
 894         r'(?:www\.)?vid\.mint\.lgbt',
 895         r'(?:www\.)?vid\.puffyan\.us',
 896         r'(?:www\.)?yewtu\.be',
 897         r'(?:www\.)?yt\.elukerio\.org',
 898         r'(?:www\.)?yt\.lelux\.fi',
 899         r'(?:www\.)?invidious\.ggc-project\.de',
 900         r'(?:www\.)?yt\.maisputain\.ovh',
 901         r'(?:www\.)?ytprivate\.com',
 902         r'(?:www\.)?invidious\.13ad\.de',
 903         r'(?:www\.)?invidious\.toot\.koeln',
 904         r'(?:www\.)?invidious\.fdn\.fr',
 905         r'(?:www\.)?watch\.nettohikari\.com',
 906         r'(?:www\.)?invidious\.namazso\.eu',
 907         r'(?:www\.)?invidious\.silkky\.cloud',
 908         r'(?:www\.)?invidious\.exonip\.de',
 909         r'(?:www\.)?invidious\.riverside\.rocks',
 910         r'(?:www\.)?invidious\.blamefran\.net',
 911         r'(?:www\.)?invidious\.moomoo\.de',
 912         r'(?:www\.)?ytb\.trom\.tf',
 913         r'(?:www\.)?yt\.cyberhost\.uk',
 914         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 915         r'(?:www\.)?qklhadlycap4cnod\.onion',
 916         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 917         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 918         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 919         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 920         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 921         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 922         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 923         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 924         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 925         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 926     )
 927     _VALID_URL = r"""(?x)^
 928                      (
 929                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 930                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 931                             (?:www\.)?deturl\.com/www\.youtube\.com|
 932                             (?:www\.)?pwnyoutube\.com|
 933                             (?:www\.)?hooktube\.com|
 934                             (?:www\.)?yourepeat\.com|
 935                             tube\.majestyc\.net|
 936                             %(invidious)s|
 937                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 938                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 939                          (?:                                                  # the various things that can precede the ID:
 940                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 941                              |(?:                                             # or the v= param in all its forms
 942                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 943                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 944                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 945                                  v=
 946                              )
 947                          ))
 948                          |(?:
 949                             youtu\.be|                                        # just youtu.be/xxxx
 950                             vid\.plus|                                        # or vid.plus/xxxx
 951                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 952                             %(invidious)s
 953                          )/
 954                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 955                          )
 956                      )?                                                       # all until now is optional -> you can pass the naked ID
 957                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 958                      (?(1).+)?                                                # if we found the ID, everything can follow
 959                      (?:\#|$)""" % {
 960         'invidious': '|'.join(_INVIDIOUS_SITES),
 961     }
 962     _PLAYER_INFO_RE = (
 963         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 964         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 965         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 966     )
 967     _formats = {
 968         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 969         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 970         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 971         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 972         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 973         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 974         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 975         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 976         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 977         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 978         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 979         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 980         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 981         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 982         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 983         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 984         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 985         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 986
 987
 988         # 3D videos
 989         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 990         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 991         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 992         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 993         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 994         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 995         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 996
 997         # Apple HTTP Live Streaming
 998         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 999         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1000         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1001         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1002         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1003         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1004         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1005         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1006
1007         # DASH mp4 video
1008         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1009         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1010         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1011         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1012         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1013         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1014         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1015         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1016         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1017         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1018         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1019         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1020
1021         # Dash mp4 audio
1022         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1023         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1024         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1025         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1026         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1027         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1028         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1029
1030         # Dash webm
1031         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1032         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1033         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1034         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1035         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1036         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1037         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1038         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1039         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1040         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1041         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1042         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1043         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1044         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1045         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1046         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1047         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1048         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1049         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1050         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1051         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1052         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1053
1054         # Dash webm audio
1055         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1056         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1057
1058         # Dash webm audio with opus inside
1059         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1060         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1061         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1062
1063         # RTMP (unnamed)
1064         '_rtmp': {'protocol': 'rtmp'},
1065
1066         # av01 video only formats sometimes served with "unknown" codecs
1067         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1068         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1069         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1070         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1071     }
1072     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1073
1074     _GEO_BYPASS = False
1075
1076     IE_NAME = 'youtube'
1077     _TESTS = [
1078         {
1079             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1080             'info_dict': {
1081                 'id': 'BaW_jenozKc',
1082                 'ext': 'mp4',
1083                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1084                 'uploader': 'Philipp Hagemeister',
1085                 'uploader_id': 'phihag',
1086                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1087                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1088                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1089                 'upload_date': '20121002',
1090                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1091                 'categories': ['Science & Technology'],
1092                 'tags': ['youtube-dl'],
1093                 'duration': 10,
1094                 'view_count': int,
1095                 'like_count': int,
1096                 'dislike_count': int,
1097                 'start_time': 1,
1098                 'end_time': 9,
1099             }
1100         },
1101         {
1102             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1103             'note': 'Embed-only video (#1746)',
1104             'info_dict': {
1105                 'id': 'yZIXLfi8CZQ',
1106                 'ext': 'mp4',
1107                 'upload_date': '20120608',
1108                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1109                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1110                 'uploader': 'SET India',
1111                 'uploader_id': 'setindia',
1112                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1113                 'age_limit': 18,
1114             },
1115             'skip': 'Private video',
1116         },
1117         {
1118             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1119             'note': 'Use the first video ID in the URL',
1120             'info_dict': {
1121                 'id': 'BaW_jenozKc',
1122                 'ext': 'mp4',
1123                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1124                 'uploader': 'Philipp Hagemeister',
1125                 'uploader_id': 'phihag',
1126                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1127                 'upload_date': '20121002',
1128                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1129                 'categories': ['Science & Technology'],
1130                 'tags': ['youtube-dl'],
1131                 'duration': 10,
1132                 'view_count': int,
1133                 'like_count': int,
1134                 'dislike_count': int,
1135             },
1136             'params': {
1137                 'skip_download': True,
1138             },
1139         },
1140         {
1141             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1142             'note': '256k DASH audio (format 141) via DASH manifest',
1143             'info_dict': {
1144                 'id': 'a9LDPn-MO4I',
1145                 'ext': 'm4a',
1146                 'upload_date': '20121002',
1147                 'uploader_id': '8KVIDEO',
1148                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1149                 'description': '',
1150                 'uploader': '8KVIDEO',
1151                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1152             },
1153             'params': {
1154                 'youtube_include_dash_manifest': True,
1155                 'format': '141',
1156             },
1157             'skip': 'format 141 not served anymore',
1158         },
1159         # DASH manifest with encrypted signature
1160         {
1161             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1162             'info_dict': {
1163                 'id': 'IB3lcPjvWLA',
1164                 'ext': 'm4a',
1165                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1166                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1167                 'duration': 244,
1168                 'uploader': 'AfrojackVEVO',
1169                 'uploader_id': 'AfrojackVEVO',
1170                 'upload_date': '20131011',
1171                 'abr': 129.495,
1172             },
1173             'params': {
1174                 'youtube_include_dash_manifest': True,
1175                 'format': '141/bestaudio[ext=m4a]',
1176             },
1177         },
1178         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1179         {
1180             'note': 'Embed allowed age-gate video',
1181             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1182             'info_dict': {
1183                 'id': 'HtVdAasjOgU',
1184                 'ext': 'mp4',
1185                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1186                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1187                 'duration': 142,
1188                 'uploader': 'The Witcher',
1189                 'uploader_id': 'WitcherGame',
1190                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1191                 'upload_date': '20140605',
1192                 'age_limit': 18,
1193             },
1194         },
1195         {
1196             'note': 'Age-gate video with embed allowed in public site',
1197             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1198             'info_dict': {
1199                 'id': 'HsUATh_Nc2U',
1200                 'ext': 'mp4',
1201                 'title': 'Godzilla 2 (Official Video)',
1202                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1203                 'upload_date': '20200408',
1204                 'uploader_id': 'FlyingKitty900',
1205                 'uploader': 'FlyingKitty',
1206                 'age_limit': 18,
1207             },
1208         },
1209         {
1210             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1211             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1212             'info_dict': {
1213                 'id': 'Tq92D6wQ1mg',
1214                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1215                 'ext': 'mp4',
1216                 'upload_date': '20191227',
1217                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1218                 'uploader': 'Projekt Melody',
1219                 'description': 'md5:17eccca93a786d51bc67646756894066',
1220                 'age_limit': 18,
1221             },
1222         },
1223         {
1224             'note': 'Non-Agegated non-embeddable video',
1225             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1226             'info_dict': {
1227                 'id': 'MeJVWBSsPAY',
1228                 'ext': 'mp4',
1229                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1230                 'uploader': 'Herr Lurik',
1231                 'uploader_id': 'st3in234',
1232                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1233                 'upload_date': '20130730',
1234             },
1235         },
1236         {
1237             'note': 'Non-bypassable age-gated video',
1238             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1239             'only_matching': True,
1240         },
1241         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1242         # YouTube Red ad is not captured for creator
1243         {
1244             'url': '__2ABJjxzNo',
1245             'info_dict': {
1246                 'id': '__2ABJjxzNo',
1247                 'ext': 'mp4',
1248                 'duration': 266,
1249                 'upload_date': '20100430',
1250                 'uploader_id': 'deadmau5',
1251                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1252                 'creator': 'deadmau5',
1253                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1254                 'uploader': 'deadmau5',
1255                 'title': 'Deadmau5 - Some Chords (HD)',
1256                 'alt_title': 'Some Chords',
1257             },
1258             'expected_warnings': [
1259                 'DASH manifest missing',
1260             ]
1261         },
1262         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1263         {
1264             'url': 'lqQg6PlCWgI',
1265             'info_dict': {
1266                 'id': 'lqQg6PlCWgI',
1267                 'ext': 'mp4',
1268                 'duration': 6085,
1269                 'upload_date': '20150827',
1270                 'uploader_id': 'olympic',
1271                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1272                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1273                 'uploader': 'Olympics',
1274                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1275             },
1276             'params': {
1277                 'skip_download': 'requires avconv',
1278             }
1279         },
1280         # Non-square pixels
1281         {
1282             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1283             'info_dict': {
1284                 'id': '_b-2C3KPAM0',
1285                 'ext': 'mp4',
1286                 'stretched_ratio': 16 / 9.,
1287                 'duration': 85,
1288                 'upload_date': '20110310',
1289                 'uploader_id': 'AllenMeow',
1290                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1291                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1292                 'uploader': '孫ᄋᄅ',
1293                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1294             },
1295         },
1296         # url_encoded_fmt_stream_map is empty string
1297         {
1298             'url': 'qEJwOuvDf7I',
1299             'info_dict': {
1300                 'id': 'qEJwOuvDf7I',
1301                 'ext': 'webm',
1302                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1303                 'description': '',
1304                 'upload_date': '20150404',
1305                 'uploader_id': 'spbelect',
1306                 'uploader': 'Наблюдатели Петербурга',
1307             },
1308             'params': {
1309                 'skip_download': 'requires avconv',
1310             },
1311             'skip': 'This live event has ended.',
1312         },
1313         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1314         {
1315             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1316             'info_dict': {
1317                 'id': 'FIl7x6_3R5Y',
1318                 'ext': 'webm',
1319                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1320                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1321                 'duration': 220,
1322                 'upload_date': '20150625',
1323                 'uploader_id': 'dorappi2000',
1324                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1325                 'uploader': 'dorappi2000',
1326                 'formats': 'mincount:31',
1327             },
1328             'skip': 'not actual anymore',
1329         },
1330         # DASH manifest with segment_list
1331         {
1332             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1333             'md5': '8ce563a1d667b599d21064e982ab9e31',
1334             'info_dict': {
1335                 'id': 'CsmdDsKjzN8',
1336                 'ext': 'mp4',
1337                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1338                 'uploader': 'Airtek',
1339                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1340                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1341                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1342             },
1343             'params': {
1344                 'youtube_include_dash_manifest': True,
1345                 'format': '135',  # bestvideo
1346             },
1347             'skip': 'This live event has ended.',
1348         },
1349         {
1350             # Multifeed videos (multiple cameras), URL is for Main Camera
1351             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1352             'info_dict': {
1353                 'id': 'jvGDaLqkpTg',
1354                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1355                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1356             },
1357             'playlist': [{
1358                 'info_dict': {
1359                     'id': 'jvGDaLqkpTg',
1360                     'ext': 'mp4',
1361                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1362                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1363                     'duration': 10643,
1364                     'upload_date': '20161111',
1365                     'uploader': 'Team PGP',
1366                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1367                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1368                 },
1369             }, {
1370                 'info_dict': {
1371                     'id': '3AKt1R1aDnw',
1372                     'ext': 'mp4',
1373                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1374                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1375                     'duration': 10991,
1376                     'upload_date': '20161111',
1377                     'uploader': 'Team PGP',
1378                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1379                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1380                 },
1381             }, {
1382                 'info_dict': {
1383                     'id': 'RtAMM00gpVc',
1384                     'ext': 'mp4',
1385                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1386                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1387                     'duration': 10995,
1388                     'upload_date': '20161111',
1389                     'uploader': 'Team PGP',
1390                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1391                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1392                 },
1393             }, {
1394                 'info_dict': {
1395                     'id': '6N2fdlP3C5U',
1396                     'ext': 'mp4',
1397                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1398                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1399                     'duration': 10990,
1400                     'upload_date': '20161111',
1401                     'uploader': 'Team PGP',
1402                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1403                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1404                 },
1405             }],
1406             'params': {
1407                 'skip_download': True,
1408             },
1409             'skip': 'Not multifeed anymore',
1410         },
1411         {
1412             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1413             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1414             'info_dict': {
1415                 'id': 'gVfLd0zydlo',
1416                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1417             },
1418             'playlist_count': 2,
1419             'skip': 'Not multifeed anymore',
1420         },
1421         {
1422             'url': 'https://vid.plus/FlRa-iH7PGw',
1423             'only_matching': True,
1424         },
1425         {
1426             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1427             'only_matching': True,
1428         },
1429         {
1430             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1431             # Also tests cut-off URL expansion in video description (see
1432             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1433             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1434             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1435             'info_dict': {
1436                 'id': 'lsguqyKfVQg',
1437                 'ext': 'mp4',
1438                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1439                 'alt_title': 'Dark Walk',
1440                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1441                 'duration': 133,
1442                 'upload_date': '20151119',
1443                 'uploader_id': 'IronSoulElf',
1444                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1445                 'uploader': 'IronSoulElf',
1446                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1447                 'track': 'Dark Walk',
1448                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1449                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1450             },
1451             'params': {
1452                 'skip_download': True,
1453             },
1454         },
1455         {
1456             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1457             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1458             'only_matching': True,
1459         },
1460         {
1461             # Video with yt:stretch=17:0
1462             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1463             'info_dict': {
1464                 'id': 'Q39EVAstoRM',
1465                 'ext': 'mp4',
1466                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1467                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1468                 'upload_date': '20151107',
1469                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1470                 'uploader': 'CH GAMER DROID',
1471             },
1472             'params': {
1473                 'skip_download': True,
1474             },
1475             'skip': 'This video does not exist.',
1476         },
1477         {
1478             # Video with incomplete 'yt:stretch=16:'
1479             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1480             'only_matching': True,
1481         },
1482         {
1483             # Video licensed under Creative Commons
1484             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1485             'info_dict': {
1486                 'id': 'M4gD1WSo5mA',
1487                 'ext': 'mp4',
1488                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1489                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1490                 'duration': 721,
1491                 'upload_date': '20150127',
1492                 'uploader_id': 'BerkmanCenter',
1493                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1494                 'uploader': 'The Berkman Klein Center for Internet & Society',
1495                 'license': 'Creative Commons Attribution license (reuse allowed)',
1496             },
1497             'params': {
1498                 'skip_download': True,
1499             },
1500         },
1501         {
1502             # Channel-like uploader_url
1503             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1504             'info_dict': {
1505                 'id': 'eQcmzGIKrzg',
1506                 'ext': 'mp4',
1507                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1508                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1509                 'duration': 4060,
1510                 'upload_date': '20151119',
1511                 'uploader': 'Bernie Sanders',
1512                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1513                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1514                 'license': 'Creative Commons Attribution license (reuse allowed)',
1515             },
1516             'params': {
1517                 'skip_download': True,
1518             },
1519         },
1520         {
1521             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1522             'only_matching': True,
1523         },
1524         {
1525             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1526             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1527             'only_matching': True,
1528         },
1529         {
1530             # Rental video preview
1531             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1532             'info_dict': {
1533                 'id': 'uGpuVWrhIzE',
1534                 'ext': 'mp4',
1535                 'title': 'Piku - Trailer',
1536                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1537                 'upload_date': '20150811',
1538                 'uploader': 'FlixMatrix',
1539                 'uploader_id': 'FlixMatrixKaravan',
1540                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1541                 'license': 'Standard YouTube License',
1542             },
1543             'params': {
1544                 'skip_download': True,
1545             },
1546             'skip': 'This video is not available.',
1547         },
1548         {
1549             # YouTube Red video with episode data
1550             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1551             'info_dict': {
1552                 'id': 'iqKdEhx-dD4',
1553                 'ext': 'mp4',
1554                 'title': 'Isolation - Mind Field (Ep 1)',
1555                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1556                 'duration': 2085,
1557                 'upload_date': '20170118',
1558                 'uploader': 'Vsauce',
1559                 'uploader_id': 'Vsauce',
1560                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1561                 'series': 'Mind Field',
1562                 'season_number': 1,
1563                 'episode_number': 1,
1564             },
1565             'params': {
1566                 'skip_download': True,
1567             },
1568             'expected_warnings': [
1569                 'Skipping DASH manifest',
1570             ],
1571         },
1572         {
1573             # The following content has been identified by the YouTube community
1574             # as inappropriate or offensive to some audiences.
1575             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1576             'info_dict': {
1577                 'id': '6SJNVb0GnPI',
1578                 'ext': 'mp4',
1579                 'title': 'Race Differences in Intelligence',
1580                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1581                 'duration': 965,
1582                 'upload_date': '20140124',
1583                 'uploader': 'New Century Foundation',
1584                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1585                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1586             },
1587             'params': {
1588                 'skip_download': True,
1589             },
1590             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1591         },
1592         {
1593             # itag 212
1594             'url': '1t24XAntNCY',
1595             'only_matching': True,
1596         },
1597         {
1598             # geo restricted to JP
1599             'url': 'sJL6WA-aGkQ',
1600             'only_matching': True,
1601         },
1602         {
1603             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1604             'only_matching': True,
1605         },
1606         {
1607             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1608             'only_matching': True,
1609         },
1610         {
1611             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1612             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1613             'only_matching': True,
1614         },
1615         {
1616             # DRM protected
1617             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1618             'only_matching': True,
1619         },
1620         {
1621             # Video with unsupported adaptive stream type formats
1622             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1623             'info_dict': {
1624                 'id': 'Z4Vy8R84T1U',
1625                 'ext': 'mp4',
1626                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1627                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1628                 'duration': 433,
1629                 'upload_date': '20130923',
1630                 'uploader': 'Amelia Putri Harwita',
1631                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1632                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1633                 'formats': 'maxcount:10',
1634             },
1635             'params': {
1636                 'skip_download': True,
1637                 'youtube_include_dash_manifest': False,
1638             },
1639             'skip': 'not actual anymore',
1640         },
1641         {
1642             # Youtube Music Auto-generated description
1643             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1644             'info_dict': {
1645                 'id': 'MgNrAu2pzNs',
1646                 'ext': 'mp4',
1647                 'title': 'Voyeur Girl',
1648                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1649                 'upload_date': '20190312',
1650                 'uploader': 'Stephen - Topic',
1651                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1652                 'artist': 'Stephen',
1653                 'track': 'Voyeur Girl',
1654                 'album': 'it\'s too much love to know my dear',
1655                 'release_date': '20190313',
1656                 'release_year': 2019,
1657             },
1658             'params': {
1659                 'skip_download': True,
1660             },
1661         },
1662         {
1663             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1664             'only_matching': True,
1665         },
1666         {
1667             # invalid -> valid video id redirection
1668             'url': 'DJztXj2GPfl',
1669             'info_dict': {
1670                 'id': 'DJztXj2GPfk',
1671                 'ext': 'mp4',
1672                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1673                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1674                 'upload_date': '20090125',
1675                 'uploader': 'Prochorowka',
1676                 'uploader_id': 'Prochorowka',
1677                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1678                 'artist': 'Panjabi MC',
1679                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1680                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1681             },
1682             'params': {
1683                 'skip_download': True,
1684             },
1685             'skip': 'Video unavailable',
1686         },
1687         {
1688             # empty description results in an empty string
1689             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1690             'info_dict': {
1691                 'id': 'x41yOUIvK2k',
1692                 'ext': 'mp4',
1693                 'title': 'IMG 3456',
1694                 'description': '',
1695                 'upload_date': '20170613',
1696                 'uploader_id': 'ElevageOrVert',
1697                 'uploader': 'ElevageOrVert',
1698             },
1699             'params': {
1700                 'skip_download': True,
1701             },
1702         },
1703         {
1704             # with '};' inside yt initial data (see [1])
1705             # see [2] for an example with '};' inside ytInitialPlayerResponse
1706             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1707             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1708             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1709             'info_dict': {
1710                 'id': 'CHqg6qOn4no',
1711                 'ext': 'mp4',
1712                 'title': 'Part 77   Sort a list of simple types in c#',
1713                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1714                 'upload_date': '20130831',
1715                 'uploader_id': 'kudvenkat',
1716                 'uploader': 'kudvenkat',
1717             },
1718             'params': {
1719                 'skip_download': True,
1720             },
1721         },
1722         {
1723             # another example of '};' in ytInitialData
1724             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1725             'only_matching': True,
1726         },
1727         {
1728             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1729             'only_matching': True,
1730         },
1731         {
1732             # https://github.com/ytdl-org/youtube-dl/pull/28094
1733             'url': 'OtqTfy26tG0',
1734             'info_dict': {
1735                 'id': 'OtqTfy26tG0',
1736                 'ext': 'mp4',
1737                 'title': 'Burn Out',
1738                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1739                 'upload_date': '20141120',
1740                 'uploader': 'The Cinematic Orchestra - Topic',
1741                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1742                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1743                 'artist': 'The Cinematic Orchestra',
1744                 'track': 'Burn Out',
1745                 'album': 'Every Day',
1746                 'release_data': None,
1747                 'release_year': None,
1748             },
1749             'params': {
1750                 'skip_download': True,
1751             },
1752         },
1753         {
1754             # controversial video, only works with bpctr when authenticated with cookies
1755             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1756             'only_matching': True,
1757         },
1758         {
1759             # controversial video, requires bpctr/contentCheckOk
1760             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1761             'info_dict': {
1762                 'id': 'SZJvDhaSDnc',
1763                 'ext': 'mp4',
1764                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1765                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1766                 'uploader': 'CBS This Morning',
1767                 'uploader_id': 'CBSThisMorning',
1768                 'upload_date': '20140716',
1769                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1770             }
1771         },
1772         {
1773             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1774             'url': 'cBvYw8_A0vQ',
1775             'info_dict': {
1776                 'id': 'cBvYw8_A0vQ',
1777                 'ext': 'mp4',
1778                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1779                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1780                 'upload_date': '20201120',
1781                 'uploader': 'Walk around Japan',
1782                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1783                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1784             },
1785             'params': {
1786                 'skip_download': True,
1787             },
1788         }, {
1789             # Has multiple audio streams
1790             'url': 'WaOKSUlf4TM',
1791             'only_matching': True
1792         }, {
1793             # Requires Premium: has format 141 when requested using YTM url
1794             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1795             'only_matching': True
1796         }, {
1797             # multiple subtitles with same lang_code
1798             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1799             'only_matching': True,
1800         }, {
1801             # Force use android client fallback
1802             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1803             'info_dict': {
1804                 'id': 'YOelRv7fMxY',
1805                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1806                 'ext': '3gp',
1807                 'upload_date': '20210624',
1808                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1809                 'uploader': 'colinfurze',
1810                 'uploader_id': 'colinfurze',
1811                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1812                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1813             },
1814             'params': {
1815                 'format': '17',  # 3gp format available on android
1816                 'extractor_args': {'youtube': {'player_client': ['android']}},
1817             },
1818         },
1819         {
1820             # Skip download of additional client configs (remix client config in this case)
1821             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1822             'only_matching': True,
1823             'params': {
1824                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1825             },
1826         }
1827     ]
1828
1829     @classmethod
1830     def suitable(cls, url):
1831         # Hack for lazy extractors until more generic solution is implemented
1832         # (see #28780)
1833         from .youtube import parse_qs
1834         qs = parse_qs(url)
1835         if qs.get('list', [None])[0]:
1836             return False
1837         return super(YoutubeIE, cls).suitable(url)
1838
1839     def __init__(self, *args, **kwargs):
1840         super(YoutubeIE, self).__init__(*args, **kwargs)
1841         self._code_cache = {}
1842         self._player_cache = {}
1843
1844     def _extract_player_url(self, ytcfg=None, webpage=None):
1845         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1846         if not player_url and webpage:
1847             player_url = self._search_regex(
1848                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1849                 webpage, 'player URL', fatal=False)
1850         if not player_url:
1851             return None
1852         if player_url.startswith('//'):
1853             player_url = 'https:' + player_url
1854         elif not re.match(r'https?://', player_url):
1855             player_url = compat_urlparse.urljoin(
1856                 'https://www.youtube.com', player_url)
1857         return player_url
1858
1859     def _signature_cache_id(self, example_sig):
1860         """ Return a string representation of a signature """
1861         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1862
1863     @classmethod
1864     def _extract_player_info(cls, player_url):
1865         for player_re in cls._PLAYER_INFO_RE:
1866             id_m = re.search(player_re, player_url)
1867             if id_m:
1868                 break
1869         else:
1870             raise ExtractorError('Cannot identify player %r' % player_url)
1871         return id_m.group('id')
1872
1873     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1874         player_id = self._extract_player_info(player_url)
1875         if player_id not in self._code_cache:
1876             self._code_cache[player_id] = self._download_webpage(
1877                 player_url, video_id, fatal=fatal,
1878                 note='Downloading player ' + player_id,
1879                 errnote='Download of %s failed' % player_url)
1880         return player_id in self._code_cache
1881
1882     def _extract_signature_function(self, video_id, player_url, example_sig):
1883         player_id = self._extract_player_info(player_url)
1884
1885         # Read from filesystem cache
1886         func_id = 'js_%s_%s' % (
1887             player_id, self._signature_cache_id(example_sig))
1888         assert os.path.basename(func_id) == func_id
1889
1890         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1891         if cache_spec is not None:
1892             return lambda s: ''.join(s[i] for i in cache_spec)
1893
1894         if self._load_player(video_id, player_url):
1895             code = self._code_cache[player_id]
1896             res = self._parse_sig_js(code)
1897
1898             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1899             cache_res = res(test_string)
1900             cache_spec = [ord(c) for c in cache_res]
1901
1902             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1903             return res
1904
1905     def _print_sig_code(self, func, example_sig):
1906         def gen_sig_code(idxs):
1907             def _genslice(start, end, step):
1908                 starts = '' if start == 0 else str(start)
1909                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1910                 steps = '' if step == 1 else (':%d' % step)
1911                 return 's[%s%s%s]' % (starts, ends, steps)
1912
1913             step = None
1914             # Quelch pyflakes warnings - start will be set when step is set
1915             start = '(Never used)'
1916             for i, prev in zip(idxs[1:], idxs[:-1]):
1917                 if step is not None:
1918                     if i - prev == step:
1919                         continue
1920                     yield _genslice(start, prev, step)
1921                     step = None
1922                     continue
1923                 if i - prev in [-1, 1]:
1924                     step = i - prev
1925                     start = prev
1926                     continue
1927                 else:
1928                     yield 's[%d]' % prev
1929             if step is None:
1930                 yield 's[%d]' % i
1931             else:
1932                 yield _genslice(start, i, step)
1933
1934         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1935         cache_res = func(test_string)
1936         cache_spec = [ord(c) for c in cache_res]
1937         expr_code = ' + '.join(gen_sig_code(cache_spec))
1938         signature_id_tuple = '(%s)' % (
1939             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1940         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1941                 '    return %s\n') % (signature_id_tuple, expr_code)
1942         self.to_screen('Extracted signature function:\n' + code)
1943
1944     def _parse_sig_js(self, jscode):
1945         funcname = self._search_regex(
1946             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1947              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1948              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1949              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1950              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1951              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1952              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1953              # Obsolete patterns
1954              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1955              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1956              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1957              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1958              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1959              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1960              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1961              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1962             jscode, 'Initial JS player signature function name', group='sig')
1963
1964         jsi = JSInterpreter(jscode)
1965         initial_function = jsi.extract_function(funcname)
1966         return lambda s: initial_function([s])
1967
1968     def _decrypt_signature(self, s, video_id, player_url):
1969         """Turn the encrypted s field into a working signature"""
1970
1971         if player_url is None:
1972             raise ExtractorError('Cannot decrypt signature without player_url')
1973
1974         try:
1975             player_id = (player_url, self._signature_cache_id(s))
1976             if player_id not in self._player_cache:
1977                 func = self._extract_signature_function(
1978                     video_id, player_url, s
1979                 )
1980                 self._player_cache[player_id] = func
1981             func = self._player_cache[player_id]
1982             if self.get_param('youtube_print_sig_code'):
1983                 self._print_sig_code(func, s)
1984             return func(s)
1985         except Exception as e:
1986             tb = traceback.format_exc()
1987             raise ExtractorError(
1988                 'Signature extraction failed: ' + tb, cause=e)
1989
1990     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1991         """
1992         Extract signatureTimestamp (sts)
1993         Required to tell API what sig/player version is in use.
1994         """
1995         sts = None
1996         if isinstance(ytcfg, dict):
1997             sts = int_or_none(ytcfg.get('STS'))
1998
1999         if not sts:
2000             # Attempt to extract from player
2001             if player_url is None:
2002                 error_msg = 'Cannot extract signature timestamp without player_url.'
2003                 if fatal:
2004                     raise ExtractorError(error_msg)
2005                 self.report_warning(error_msg)
2006                 return
2007             if self._load_player(video_id, player_url, fatal=fatal):
2008                 player_id = self._extract_player_info(player_url)
2009                 code = self._code_cache[player_id]
2010                 sts = int_or_none(self._search_regex(
2011                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2012                     'JS player signature timestamp', group='sts', fatal=fatal))
2013         return sts
2014
2015     def _mark_watched(self, video_id, player_responses):
2016         playback_url = traverse_obj(
2017             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2018             expected_type=url_or_none, get_all=False)
2019         if not playback_url:
2020             self.report_warning('Unable to mark watched')
2021             return
2022         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2023         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2024
2025         # cpn generation algorithm is reverse engineered from base.js.
2026         # In fact it works even with dummy cpn.
2027         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2028         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2029
2030         qs.update({
2031             'ver': ['2'],
2032             'cpn': [cpn],
2033         })
2034         playback_url = compat_urlparse.urlunparse(
2035             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2036
2037         self._download_webpage(
2038             playback_url, video_id, 'Marking watched',
2039             'Unable to mark watched', fatal=False)
2040
2041     @staticmethod
2042     def _extract_urls(webpage):
2043         # Embedded YouTube player
2044         entries = [
2045             unescapeHTML(mobj.group('url'))
2046             for mobj in re.finditer(r'''(?x)
2047             (?:
2048                 <iframe[^>]+?src=|
2049                 data-video-url=|
2050                 <embed[^>]+?src=|
2051                 embedSWF\(?:\s*|
2052                 <object[^>]+data=|
2053                 new\s+SWFObject\(
2054             )
2055             (["\'])
2056                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2057                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2058             \1''', webpage)]
2059
2060         # lazyYT YouTube embed
2061         entries.extend(list(map(
2062             unescapeHTML,
2063             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2064
2065         # Wordpress "YouTube Video Importer" plugin
2066         matches = re.findall(r'''(?x)<div[^>]+
2067             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2068             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2069         entries.extend(m[-1] for m in matches)
2070
2071         return entries
2072
2073     @staticmethod
2074     def _extract_url(webpage):
2075         urls = YoutubeIE._extract_urls(webpage)
2076         return urls[0] if urls else None
2077
2078     @classmethod
2079     def extract_id(cls, url):
2080         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2081         if mobj is None:
2082             raise ExtractorError('Invalid URL: %s' % url)
2083         video_id = mobj.group(2)
2084         return video_id
2085
2086     def _extract_chapters_from_json(self, data, duration):
2087         chapter_list = traverse_obj(
2088             data, (
2089                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2090                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2091             ), expected_type=list)
2092
2093         return self._extract_chapters(
2094             chapter_list,
2095             chapter_time=lambda chapter: float_or_none(
2096                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2097             chapter_title=lambda chapter: traverse_obj(
2098                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2099             duration=duration)
2100
2101     def _extract_chapters_from_engagement_panel(self, data, duration):
2102         content_list = traverse_obj(
2103             data,
2104             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2105             expected_type=list, default=[])
2106         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2107         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2108
2109         return next((
2110             filter(None, (
2111                 self._extract_chapters(
2112                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2113                     chapter_time, chapter_title, duration)
2114                 for contents in content_list
2115             ))), [])
2116
2117     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2118         chapters = []
2119         last_chapter = {'start_time': 0}
2120         for idx, chapter in enumerate(chapter_list or []):
2121             title = chapter_title(chapter)
2122             start_time = chapter_time(chapter)
2123             if start_time is None:
2124                 continue
2125             last_chapter['end_time'] = start_time
2126             if start_time < last_chapter['start_time']:
2127                 if idx == 1:
2128                     chapters.pop()
2129                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2130                 else:
2131                     self.report_warning(f'Invalid start time for chapter "{title}"')
2132                     continue
2133             last_chapter = {'start_time': start_time, 'title': title}
2134             chapters.append(last_chapter)
2135         last_chapter['end_time'] = duration
2136         return chapters
2137
2138     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2139         return self._parse_json(self._search_regex(
2140             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2141              regex), webpage, name, default='{}'), video_id, fatal=False)
2142
2143     @staticmethod
2144     def parse_time_text(time_text):
2145         """
2146         Parse the comment time text
2147         time_text is in the format 'X units ago (edited)'
2148         """
2149         time_text_split = time_text.split(' ')
2150         if len(time_text_split) >= 3:
2151             try:
2152                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2153             except ValueError:
2154                 return None
2155
2156     def _extract_comment(self, comment_renderer, parent=None):
2157         comment_id = comment_renderer.get('commentId')
2158         if not comment_id:
2159             return
2160
2161         text = self._get_text(comment_renderer, 'contentText')
2162
2163         # note: timestamp is an estimate calculated from the current time and time_text
2164         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2165         time_text_dt = self.parse_time_text(time_text)
2166         if isinstance(time_text_dt, datetime.datetime):
2167             timestamp = calendar.timegm(time_text_dt.timetuple())
2168         author = self._get_text(comment_renderer, 'authorText')
2169         author_id = try_get(comment_renderer,
2170                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2171
2172         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2173                                                        lambda x: x['likeCount']), compat_str)) or 0
2174         author_thumbnail = try_get(comment_renderer,
2175                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2176
2177         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2178         is_favorited = 'creatorHeart' in (try_get(
2179             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2180         return {
2181             'id': comment_id,
2182             'text': text,
2183             'timestamp': timestamp,
2184             'time_text': time_text,
2185             'like_count': votes,
2186             'is_favorited': is_favorited,
2187             'author': author,
2188             'author_id': author_id,
2189             'author_thumbnail': author_thumbnail,
2190             'author_is_uploader': author_is_uploader,
2191             'parent': parent or 'root'
2192         }
2193
2194     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2195                          ytcfg, video_id, parent=None, comment_counts=None):
2196
2197         def extract_header(contents):
2198             _total_comments = 0
2199             _continuation = None
2200             for content in contents:
2201                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2202                 expected_comment_count = parse_count(self._get_text(
2203                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2204
2205                 if expected_comment_count:
2206                     comment_counts[1] = expected_comment_count
2207                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2208                     _total_comments = comment_counts[1]
2209                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2210                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2211
2212                 sort_menu_item = try_get(
2213                     comments_header_renderer,
2214                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2215                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2216
2217                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2218                 if not _continuation:
2219                     continue
2220
2221                 sort_text = sort_menu_item.get('title')
2222                 if isinstance(sort_text, compat_str):
2223                     sort_text = sort_text.lower()
2224                 else:
2225                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2226                 self.to_screen('Sorting comments by %s' % sort_text)
2227                 break
2228             return _total_comments, _continuation
2229
2230         def extract_thread(contents):
2231             if not parent:
2232                 comment_counts[2] = 0
2233             for content in contents:
2234                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2235                 comment_renderer = try_get(
2236                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2237                     content, (lambda x: x['commentRenderer'], dict))
2238
2239                 if not comment_renderer:
2240                     continue
2241                 comment = self._extract_comment(comment_renderer, parent)
2242                 if not comment:
2243                     continue
2244                 comment_counts[0] += 1
2245                 yield comment
2246                 # Attempt to get the replies
2247                 comment_replies_renderer = try_get(
2248                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2249
2250                 if comment_replies_renderer:
2251                     comment_counts[2] += 1
2252                     comment_entries_iter = self._comment_entries(
2253                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2254                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2255
2256                     for reply_comment in comment_entries_iter:
2257                         yield reply_comment
2258
2259         # YouTube comments have a max depth of 2
2260         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2261         if max_depth == 1 and parent:
2262             return
2263         if not comment_counts:
2264             # comment so far, est. total comments, current comment thread #
2265             comment_counts = [0, 0, 0]
2266
2267         continuation = self._extract_continuation(root_continuation_data)
2268         if continuation and len(continuation['continuation']) < 27:
2269             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2270             continuation_token = self._generate_comment_continuation(video_id)
2271             continuation = self._build_api_continuation_query(continuation_token, None)
2272
2273         visitor_data = None
2274         is_first_continuation = parent is None
2275
2276         for page_num in itertools.count(0):
2277             if not continuation:
2278                 break
2279             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2280             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2281             if page_num == 0:
2282                 if is_first_continuation:
2283                     note_prefix = 'Downloading comment section API JSON'
2284                 else:
2285                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2286                         comment_counts[2], comment_prog_str)
2287             else:
2288                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2289                     '       ' if parent else '', ' replies' if parent else '',
2290                     page_num, comment_prog_str)
2291
2292             response = self._extract_response(
2293                 item_id=None, query=continuation,
2294                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2295                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2296             if not response:
2297                 break
2298             visitor_data = try_get(
2299                 response,
2300                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2301                 compat_str) or visitor_data
2302
2303             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2304
2305             continuation = None
2306             if isinstance(continuation_contents, list):
2307                 for continuation_section in continuation_contents:
2308                     if not isinstance(continuation_section, dict):
2309                         continue
2310                     continuation_items = try_get(
2311                         continuation_section,
2312                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2313                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2314                         list) or []
2315                     if is_first_continuation:
2316                         total_comments, continuation = extract_header(continuation_items)
2317                         if total_comments:
2318                             yield total_comments
2319                         is_first_continuation = False
2320                         if continuation:
2321                             break
2322                         continue
2323                     count = 0
2324                     for count, entry in enumerate(extract_thread(continuation_items)):
2325                         yield entry
2326                     continuation = self._extract_continuation({'contents': continuation_items})
2327                     if continuation:
2328                         # Sometimes YouTube provides a continuation without any comments
2329                         # In most cases we end up just downloading these with very little comments to come.
2330                         if count == 0:
2331                             if not parent:
2332                                 self.report_warning('No comments received - assuming end of comments')
2333                             continuation = None
2334                         break
2335
2336             # Deprecated response structure
2337             elif isinstance(continuation_contents, dict):
2338                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2339                 for key, continuation_renderer in continuation_contents.items():
2340                     if key not in known_continuation_renderers:
2341                         continue
2342                     if not isinstance(continuation_renderer, dict):
2343                         continue
2344                     if is_first_continuation:
2345                         header_continuation_items = [continuation_renderer.get('header') or {}]
2346                         total_comments, continuation = extract_header(header_continuation_items)
2347                         if total_comments:
2348                             yield total_comments
2349                         is_first_continuation = False
2350                         if continuation:
2351                             break
2352
2353                     # Sometimes YouTube provides a continuation without any comments
2354                     # In most cases we end up just downloading these with very little comments to come.
2355                     count = 0
2356                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2357                         yield entry
2358                     continuation = self._extract_continuation(continuation_renderer)
2359                     if count == 0:
2360                         if not parent:
2361                             self.report_warning('No comments received - assuming end of comments')
2362                         continuation = None
2363                     break
2364
2365     @staticmethod
2366     def _generate_comment_continuation(video_id):
2367         """
2368         Generates initial comment section continuation token from given video id
2369         """
2370         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2371         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2372         new_continuation_intlist = list(itertools.chain.from_iterable(
2373             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2374         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2375
2376     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2377         """Entry for comment extraction"""
2378         def _real_comment_extract(contents):
2379             if isinstance(contents, list):
2380                 for entry in contents:
2381                     for key, renderer in entry.items():
2382                         if key not in known_entry_comment_renderers:
2383                             continue
2384                         yield from self._comment_entries(
2385                             renderer, video_id=video_id, ytcfg=ytcfg,
2386                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2387                             account_syncid=self._extract_account_syncid(ytcfg))
2388                         break
2389         comments = []
2390         known_entry_comment_renderers = ('itemSectionRenderer',)
2391         estimated_total = 0
2392         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2393         # Force English regardless of account setting to prevent parsing issues
2394         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2395         ytcfg = copy.deepcopy(ytcfg)
2396         traverse_obj(
2397             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2398         try:
2399             for comment in _real_comment_extract(contents):
2400                 if len(comments) >= max_comments:
2401                     break
2402                 if isinstance(comment, int):
2403                     estimated_total = comment
2404                     continue
2405                 comments.append(comment)
2406         except KeyboardInterrupt:
2407             self.to_screen('Interrupted by user')
2408         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2409         return {
2410             'comments': comments,
2411             'comment_count': len(comments),
2412         }
2413
2414     @staticmethod
2415     def _generate_player_context(sts=None):
2416         context = {
2417             'html5Preference': 'HTML5_PREF_WANTS',
2418         }
2419         if sts is not None:
2420             context['signatureTimestamp'] = sts
2421         return {
2422             'playbackContext': {
2423                 'contentPlaybackContext': context
2424             },
2425             'contentCheckOk': True,
2426             'racyCheckOk': True
2427         }
2428
2429     @staticmethod
2430     def _is_agegated(player_response):
2431         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2432             return True
2433
2434         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2435         AGE_GATE_REASONS = (
2436             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2437             'age_verification_required', 'age_check_required',  # status
2438         )
2439         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2440
2441     @staticmethod
2442     def _is_unplayable(player_response):
2443         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2444
2445     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2446
2447         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2448         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2449         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2450         headers = self.generate_api_headers(
2451             player_ytcfg, identity_token, syncid,
2452             default_client=client, session_index=session_index)
2453
2454         yt_query = {'videoId': video_id}
2455         yt_query.update(self._generate_player_context(sts))
2456         return self._extract_response(
2457             item_id=video_id, ep='player', query=yt_query,
2458             ytcfg=player_ytcfg, headers=headers, fatal=False,
2459             default_client=client,
2460             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2461         ) or None
2462
2463     def _get_requested_clients(self, url, smuggled_data):
2464         requested_clients = []
2465         allowed_clients = sorted(
2466             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2467             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2468         for client in self._configuration_arg('player_client'):
2469             if client in allowed_clients:
2470                 requested_clients.append(client)
2471             elif client == 'all':
2472                 requested_clients.extend(allowed_clients)
2473             else:
2474                 self.report_warning(f'Skipping unsupported client {client}')
2475         if not requested_clients:
2476             requested_clients = ['android', 'web']
2477
2478         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2479             requested_clients.extend(
2480                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2481
2482         return orderedSet(requested_clients)
2483
2484     def _extract_player_ytcfg(self, client, video_id):
2485         url = {
2486             'web_music': 'https://music.youtube.com',
2487             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2488         }.get(client)
2489         if not url:
2490             return {}
2491         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2492         return self.extract_ytcfg(video_id, webpage) or {}
2493
2494     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2495         initial_pr = None
2496         if webpage:
2497             initial_pr = self._extract_yt_initial_variable(
2498                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2499                 video_id, 'initial player response')
2500
2501         original_clients = clients
2502         clients = clients[::-1]
2503
2504         def append_client(client_name):
2505             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2506                 clients.append(client_name)
2507
2508         while clients:
2509             client = clients.pop()
2510             player_ytcfg = master_ytcfg if client == 'web' else {}
2511             if 'configs' not in self._configuration_arg('player_skip'):
2512                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2513
2514             pr = (
2515                 initial_pr if client == 'web' and initial_pr
2516                 else self._extract_player_response(
2517                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
2518             if pr:
2519                 yield pr
2520
2521             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2522             if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
2523                 append_client(client.replace('_agegate', '_creator'))
2524             elif self._is_agegated(pr):
2525                 append_client(f'{client}_agegate')
2526
2527         # Android player_response does not have microFormats which are needed for
2528         # extraction of some data. So we return the initial_pr with formats
2529         # stripped out even if not requested by the user
2530         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2531         if initial_pr and 'web' not in original_clients:
2532             initial_pr['streamingData'] = None
2533             yield initial_pr
2534
2535     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2536         itags, stream_ids = [], []
2537         itag_qualities, res_qualities = {}, {}
2538         q = qualities([
2539             # Normally tiny is the smallest video-only formats. But
2540             # audio-only formats with unknown quality may get tagged as tiny
2541             'tiny',
2542             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2543             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2544         ])
2545         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2546
2547         for fmt in streaming_formats:
2548             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2549                 continue
2550
2551             itag = str_or_none(fmt.get('itag'))
2552             audio_track = fmt.get('audioTrack') or {}
2553             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2554             if stream_id in stream_ids:
2555                 continue
2556
2557             quality = fmt.get('quality')
2558             height = int_or_none(fmt.get('height'))
2559             if quality == 'tiny' or not quality:
2560                 quality = fmt.get('audioQuality', '').lower() or quality
2561             # The 3gp format (17) in android client has a quality of "small",
2562             # but is actually worse than other formats
2563             if itag == '17':
2564                 quality = 'tiny'
2565             if quality:
2566                 if itag:
2567                     itag_qualities[itag] = quality
2568                 if height:
2569                     res_qualities[height] = quality
2570             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2571             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2572             # number of fragment that would subsequently requested with (`&sq=N`)
2573             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2574                 continue
2575
2576             fmt_url = fmt.get('url')
2577             if not fmt_url:
2578                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2579                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2580                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2581                 if not (sc and fmt_url and encrypted_sig):
2582                     continue
2583                 if not player_url:
2584                     continue
2585                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2586                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2587                 fmt_url += '&' + sp + '=' + signature
2588
2589             if itag:
2590                 itags.append(itag)
2591                 stream_ids.append(stream_id)
2592
2593             tbr = float_or_none(
2594                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2595             dct = {
2596                 'asr': int_or_none(fmt.get('audioSampleRate')),
2597                 'filesize': int_or_none(fmt.get('contentLength')),
2598                 'format_id': itag,
2599                 'format_note': ', '.join(filter(None, (
2600                     audio_track.get('displayName'),
2601                     fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2602                 'fps': int_or_none(fmt.get('fps')),
2603                 'height': height,
2604                 'quality': q(quality),
2605                 'tbr': tbr,
2606                 'url': fmt_url,
2607                 'width': int_or_none(fmt.get('width')),
2608                 'language': audio_track.get('id', '').split('.')[0],
2609             }
2610             mime_mobj = re.match(
2611                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2612             if mime_mobj:
2613                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2614                 dct.update(parse_codecs(mime_mobj.group(2)))
2615             no_audio = dct.get('acodec') == 'none'
2616             no_video = dct.get('vcodec') == 'none'
2617             if no_audio:
2618                 dct['vbr'] = tbr
2619             if no_video:
2620                 dct['abr'] = tbr
2621             if no_audio or no_video:
2622                 dct['downloader_options'] = {
2623                     # Youtube throttles chunks >~10M
2624                     'http_chunk_size': 10485760,
2625                 }
2626                 if dct.get('ext'):
2627                     dct['container'] = dct['ext'] + '_dash'
2628             yield dct
2629
2630         skip_manifests = self._configuration_arg('skip')
2631         get_dash = (
2632             (not is_live or self._configuration_arg('include_live_dash'))
2633             and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2634         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2635
2636         def guess_quality(f):
2637             for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2638                 if val in qdict:
2639                     return q(qdict[val])
2640             return -1
2641
2642         for sd in streaming_data:
2643             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2644             if hls_manifest_url:
2645                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2646                     itag = self._search_regex(
2647                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2648                     if itag in itags:
2649                         continue
2650                     if itag:
2651                         f['format_id'] = itag
2652                         itags.append(itag)
2653                     f['quality'] = guess_quality(f)
2654                     yield f
2655
2656             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2657             if dash_manifest_url:
2658                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2659                     itag = f['format_id']
2660                     if itag in itags:
2661                         continue
2662                     if itag:
2663                         itags.append(itag)
2664                     f['quality'] = guess_quality(f)
2665                     filesize = int_or_none(self._search_regex(
2666                         r'/clen/(\d+)', f.get('fragment_base_url')
2667                         or f['url'], 'file size', default=None))
2668                     if filesize:
2669                         f['filesize'] = filesize
2670                     yield f
2671
2672     def _real_extract(self, url):
2673         url, smuggled_data = unsmuggle_url(url, {})
2674         video_id = self._match_id(url)
2675
2676         base_url = self.http_scheme() + '//www.youtube.com/'
2677         webpage_url = base_url + 'watch?v=' + video_id
2678         webpage = self._download_webpage(
2679             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2680
2681         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2682         player_url = self._extract_player_url(master_ytcfg, webpage)
2683         identity_token = self._extract_identity_token(webpage, video_id)
2684
2685         player_responses = list(self._extract_player_responses(
2686             self._get_requested_clients(url, smuggled_data),
2687             video_id, webpage, master_ytcfg, player_url, identity_token))
2688
2689         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2690
2691         playability_statuses = traverse_obj(
2692             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2693
2694         trailer_video_id = get_first(
2695             playability_statuses,
2696             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2697             expected_type=str)
2698         if trailer_video_id:
2699             return self.url_result(
2700                 trailer_video_id, self.ie_key(), trailer_video_id)
2701
2702         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2703                        if webpage else (lambda x: None))
2704
2705         video_details = traverse_obj(
2706             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2707         microformats = traverse_obj(
2708             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2709             expected_type=dict, default=[])
2710         video_title = (
2711             get_first(video_details, 'title')
2712             or self._get_text(microformats, (..., 'title'))
2713             or search_meta(['og:title', 'twitter:title', 'title']))
2714         video_description = get_first(video_details, 'shortDescription')
2715
2716         if not smuggled_data.get('force_singlefeed', False):
2717             if not self.get_param('noplaylist'):
2718                 multifeed_metadata_list = get_first(
2719                     player_responses,
2720                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2721                     expected_type=str)
2722                 if multifeed_metadata_list:
2723                     entries = []
2724                     feed_ids = []
2725                     for feed in multifeed_metadata_list.split(','):
2726                         # Unquote should take place before split on comma (,) since textual
2727                         # fields may contain comma as well (see
2728                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2729                         feed_data = compat_parse_qs(
2730                             compat_urllib_parse_unquote_plus(feed))
2731
2732                         def feed_entry(name):
2733                             return try_get(
2734                                 feed_data, lambda x: x[name][0], compat_str)
2735
2736                         feed_id = feed_entry('id')
2737                         if not feed_id:
2738                             continue
2739                         feed_title = feed_entry('title')
2740                         title = video_title
2741                         if feed_title:
2742                             title += ' (%s)' % feed_title
2743                         entries.append({
2744                             '_type': 'url_transparent',
2745                             'ie_key': 'Youtube',
2746                             'url': smuggle_url(
2747                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2748                                 {'force_singlefeed': True}),
2749                             'title': title,
2750                         })
2751                         feed_ids.append(feed_id)
2752                     self.to_screen(
2753                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2754                         % (', '.join(feed_ids), video_id))
2755                     return self.playlist_result(
2756                         entries, video_id, video_title, video_description)
2757             else:
2758                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2759
2760         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2761         is_live = get_first(video_details, 'isLive')
2762         if is_live is None:
2763             is_live = get_first(live_broadcast_details, 'isLiveNow')
2764
2765         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2766         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2767
2768         if not formats:
2769             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2770                 self.raise_no_formats(
2771                     'This video is DRM protected.', expected=True)
2772             pemr = get_first(
2773                 playability_statuses,
2774                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2775             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2776             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2777             if subreason:
2778                 if subreason == 'The uploader has not made this video available in your country.':
2779                     countries = get_first(microformats, 'availableCountries')
2780                     if not countries:
2781                         regions_allowed = search_meta('regionsAllowed')
2782                         countries = regions_allowed.split(',') if regions_allowed else None
2783                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2784                 reason += f'. {subreason}'
2785             if reason:
2786                 self.raise_no_formats(reason, expected=True)
2787
2788         for f in formats:
2789             if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']:  # throttled
2790                 f['source_preference'] = -10
2791                 # TODO: this method is not reliable
2792                 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2793
2794         # Source is given priority since formats that throttle are given lower source_preference
2795         # When throttling issue is fully fixed, remove this
2796         self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
2797
2798         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2799         if not keywords and webpage:
2800             keywords = [
2801                 unescapeHTML(m.group('content'))
2802                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2803         for keyword in keywords:
2804             if keyword.startswith('yt:stretch='):
2805                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2806                 if mobj:
2807                     # NB: float is intentional for forcing float division
2808                     w, h = (float(v) for v in mobj.groups())
2809                     if w > 0 and h > 0:
2810                         ratio = w / h
2811                         for f in formats:
2812                             if f.get('vcodec') != 'none':
2813                                 f['stretched_ratio'] = ratio
2814                         break
2815
2816         thumbnails = []
2817         thumbnail_dicts = traverse_obj(
2818             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2819             expected_type=dict, default=[])
2820         for thumbnail in thumbnail_dicts:
2821             thumbnail_url = thumbnail.get('url')
2822             if not thumbnail_url:
2823                 continue
2824             # Sometimes youtube gives a wrong thumbnail URL. See:
2825             # https://github.com/yt-dlp/yt-dlp/issues/233
2826             # https://github.com/ytdl-org/youtube-dl/issues/28023
2827             if 'maxresdefault' in thumbnail_url:
2828                 thumbnail_url = thumbnail_url.split('?')[0]
2829             thumbnails.append({
2830                 'url': thumbnail_url,
2831                 'height': int_or_none(thumbnail.get('height')),
2832                 'width': int_or_none(thumbnail.get('width')),
2833             })
2834         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2835         if thumbnail_url:
2836             thumbnails.append({
2837                 'url': thumbnail_url,
2838             })
2839         # The best resolution thumbnails sometimes does not appear in the webpage
2840         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2841         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2842         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2843         # TODO: Test them also? - For some videos, even these don't exist
2844         guaranteed_thumbnail_names = [
2845             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2846             'mqdefault', 'mq1', 'mq2', 'mq3',
2847             'default', '1', '2', '3'
2848         ]
2849         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2850         n_thumbnail_names = len(thumbnail_names)
2851
2852         thumbnails.extend({
2853             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2854                 video_id=video_id, name=name, ext=ext,
2855                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2856             '_test_url': name in hq_thumbnail_names,
2857         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2858         for thumb in thumbnails:
2859             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2860             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2861         self._remove_duplicate_formats(thumbnails)
2862
2863         category = get_first(microformats, 'category') or search_meta('genre')
2864         channel_id = str_or_none(
2865             get_first(video_details, 'channelId')
2866             or get_first(microformats, 'externalChannelId')
2867             or search_meta('channelId'))
2868         duration = int_or_none(
2869             get_first(video_details, 'lengthSeconds')
2870             or get_first(microformats, 'lengthSeconds')
2871             or parse_duration(search_meta('duration'))) or None
2872         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2873
2874         live_content = get_first(video_details, 'isLiveContent')
2875         is_upcoming = get_first(video_details, 'isUpcoming')
2876         if is_live is None:
2877             if is_upcoming or live_content is False:
2878                 is_live = False
2879         if is_upcoming is None and (live_content or is_live):
2880             is_upcoming = False
2881         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2882         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2883         if not duration and live_endtime and live_starttime:
2884             duration = live_endtime - live_starttime
2885
2886         info = {
2887             'id': video_id,
2888             'title': self._live_title(video_title) if is_live else video_title,
2889             'formats': formats,
2890             'thumbnails': thumbnails,
2891             'description': video_description,
2892             'upload_date': unified_strdate(
2893                 get_first(microformats, 'uploadDate')
2894                 or search_meta('uploadDate')),
2895             'uploader': get_first(video_details, 'author'),
2896             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2897             'uploader_url': owner_profile_url,
2898             'channel_id': channel_id,
2899             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2900             'duration': duration,
2901             'view_count': int_or_none(
2902                 get_first((video_details, microformats), (..., 'viewCount'))
2903                 or search_meta('interactionCount')),
2904             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2905             'age_limit': 18 if (
2906                 get_first(microformats, 'isFamilySafe') is False
2907                 or search_meta('isFamilyFriendly') == 'false'
2908                 or search_meta('og:restrictions:age') == '18+') else 0,
2909             'webpage_url': webpage_url,
2910             'categories': [category] if category else None,
2911             'tags': keywords,
2912             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2913             'is_live': is_live,
2914             'was_live': (False if is_live or is_upcoming or live_content is False
2915                          else None if is_live is None or is_upcoming is None
2916                          else live_content),
2917             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2918             'release_timestamp': live_starttime,
2919         }
2920
2921         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2922         # Converted into dicts to remove duplicates
2923         captions = {
2924             sub.get('baseUrl'): sub
2925             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2926         translation_languages = {
2927             lang.get('languageCode'): lang.get('languageName')
2928             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2929         subtitles = {}
2930         if pctr:
2931             def process_language(container, base_url, lang_code, sub_name, query):
2932                 lang_subs = container.setdefault(lang_code, [])
2933                 for fmt in self._SUBTITLE_FORMATS:
2934                     query.update({
2935                         'fmt': fmt,
2936                     })
2937                     lang_subs.append({
2938                         'ext': fmt,
2939                         'url': update_url_query(base_url, query),
2940                         'name': sub_name,
2941                     })
2942
2943             for base_url, caption_track in captions.items():
2944                 if not base_url:
2945                     continue
2946                 if caption_track.get('kind') != 'asr':
2947                     lang_code = (
2948                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2949                         or caption_track.get('languageCode'))
2950                     if not lang_code:
2951                         continue
2952                     process_language(
2953                         subtitles, base_url, lang_code,
2954                         traverse_obj(caption_track, ('name', 'simpleText')),
2955                         {})
2956                     continue
2957                 automatic_captions = {}
2958                 for trans_code, trans_name in translation_languages.items():
2959                     if not trans_code:
2960                         continue
2961                     process_language(
2962                         automatic_captions, base_url, trans_code,
2963                         self._get_text(trans_name, max_runs=1),
2964                         {'tlang': trans_code})
2965                 info['automatic_captions'] = automatic_captions
2966         info['subtitles'] = subtitles
2967
2968         parsed_url = compat_urllib_parse_urlparse(url)
2969         for component in [parsed_url.fragment, parsed_url.query]:
2970             query = compat_parse_qs(component)
2971             for k, v in query.items():
2972                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2973                     d_k += '_time'
2974                     if d_k not in info and k in s_ks:
2975                         info[d_k] = parse_duration(query[k][0])
2976
2977         # Youtube Music Auto-generated description
2978         if video_description:
2979             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2980             if mobj:
2981                 release_year = mobj.group('release_year')
2982                 release_date = mobj.group('release_date')
2983                 if release_date:
2984                     release_date = release_date.replace('-', '')
2985                     if not release_year:
2986                         release_year = release_date[:4]
2987                 info.update({
2988                     'album': mobj.group('album'.strip()),
2989                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2990                     'track': mobj.group('track').strip(),
2991                     'release_date': release_date,
2992                     'release_year': int_or_none(release_year),
2993                 })
2994
2995         initial_data = None
2996         if webpage:
2997             initial_data = self._extract_yt_initial_variable(
2998                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2999                 'yt initial data')
3000         if not initial_data:
3001             headers = self.generate_api_headers(
3002                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
3003                 session_index=self._extract_session_index(master_ytcfg))
3004
3005             initial_data = self._extract_response(
3006                 item_id=video_id, ep='next', fatal=False,
3007                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
3008                 note='Downloading initial data API JSON')
3009
3010         try:
3011             # This will error if there is no livechat
3012             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3013             info['subtitles']['live_chat'] = [{
3014                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3015                 'video_id': video_id,
3016                 'ext': 'json',
3017                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3018             }]
3019         except (KeyError, IndexError, TypeError):
3020             pass
3021
3022         if initial_data:
3023             info['chapters'] = (
3024                 self._extract_chapters_from_json(initial_data, duration)
3025                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3026                 or None)
3027
3028             contents = try_get(
3029                 initial_data,
3030                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3031                 list) or []
3032             for content in contents:
3033                 vpir = content.get('videoPrimaryInfoRenderer')
3034                 if vpir:
3035                     stl = vpir.get('superTitleLink')
3036                     if stl:
3037                         stl = self._get_text(stl)
3038                         if try_get(
3039                                 vpir,
3040                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3041                             info['location'] = stl
3042                         else:
3043                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3044                             if mobj:
3045                                 info.update({
3046                                     'series': mobj.group(1),
3047                                     'season_number': int(mobj.group(2)),
3048                                     'episode_number': int(mobj.group(3)),
3049                                 })
3050                     for tlb in (try_get(
3051                             vpir,
3052                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3053                             list) or []):
3054                         tbr = tlb.get('toggleButtonRenderer') or {}
3055                         for getter, regex in [(
3056                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3057                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3058                                     lambda x: x['accessibility'],
3059                                     lambda x: x['accessibilityData']['accessibilityData'],
3060                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3061                             label = (try_get(tbr, getter, dict) or {}).get('label')
3062                             if label:
3063                                 mobj = re.match(regex, label)
3064                                 if mobj:
3065                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3066                                     break
3067                     sbr_tooltip = try_get(
3068                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3069                     if sbr_tooltip:
3070                         like_count, dislike_count = sbr_tooltip.split(' / ')
3071                         info.update({
3072                             'like_count': str_to_int(like_count),
3073                             'dislike_count': str_to_int(dislike_count),
3074                         })
3075                 vsir = content.get('videoSecondaryInfoRenderer')
3076                 if vsir:
3077                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3078                     rows = try_get(
3079                         vsir,
3080                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3081                         list) or []
3082                     multiple_songs = False
3083                     for row in rows:
3084                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3085                             multiple_songs = True
3086                             break
3087                     for row in rows:
3088                         mrr = row.get('metadataRowRenderer') or {}
3089                         mrr_title = mrr.get('title')
3090                         if not mrr_title:
3091                             continue
3092                         mrr_title = self._get_text(mrr, 'title')
3093                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3094                         if mrr_title == 'License':
3095                             info['license'] = mrr_contents_text
3096                         elif not multiple_songs:
3097                             if mrr_title == 'Album':
3098                                 info['album'] = mrr_contents_text
3099                             elif mrr_title == 'Artist':
3100                                 info['artist'] = mrr_contents_text
3101                             elif mrr_title == 'Song':
3102                                 info['track'] = mrr_contents_text
3103
3104         fallbacks = {
3105             'channel': 'uploader',
3106             'channel_id': 'uploader_id',
3107             'channel_url': 'uploader_url',
3108         }
3109         for to, frm in fallbacks.items():
3110             if not info.get(to):
3111                 info[to] = info.get(frm)
3112
3113         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3114             v = info.get(s_k)
3115             if v:
3116                 info[d_k] = v
3117
3118         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3119         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3120         is_membersonly = None
3121         is_premium = None
3122         if initial_data and is_private is not None:
3123             is_membersonly = False
3124             is_premium = False
3125             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3126             badge_labels = set()
3127             for content in contents:
3128                 if not isinstance(content, dict):
3129                     continue
3130                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3131             for badge_label in badge_labels:
3132                 if badge_label.lower() == 'members only':
3133                     is_membersonly = True
3134                 elif badge_label.lower() == 'premium':
3135                     is_premium = True
3136                 elif badge_label.lower() == 'unlisted':
3137                     is_unlisted = True
3138
3139         info['availability'] = self._availability(
3140             is_private=is_private,
3141             needs_premium=is_premium,
3142             needs_subscription=is_membersonly,
3143             needs_auth=info['age_limit'] >= 18,
3144             is_unlisted=None if is_private is None else is_unlisted)
3145
3146         # get xsrf for annotations or comments
3147         get_annotations = self.get_param('writeannotations', False)
3148         get_comments = self.get_param('getcomments', False)
3149         if get_annotations or get_comments:
3150             xsrf_token = None
3151             if master_ytcfg:
3152                 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
3153             if not xsrf_token:
3154                 xsrf_token = self._search_regex(
3155                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3156                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
3157
3158         # annotations
3159         if get_annotations:
3160             invideo_url = get_first(
3161                 player_responses,
3162                 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3163                 expected_type=str)
3164             if xsrf_token and invideo_url:
3165                 xsrf_field_name = None
3166                 if master_ytcfg:
3167                     xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3168                 if not xsrf_field_name:
3169                     xsrf_field_name = self._search_regex(
3170                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3171                         webpage, 'xsrf field name',
3172                         group='xsrf_field_name', default='session_token')
3173                 info['annotations'] = self._download_webpage(
3174                     self._proto_relative_url(invideo_url),
3175                     video_id, note='Downloading annotations',
3176                     errnote='Unable to download video annotations', fatal=False,
3177                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3178
3179         if get_comments:
3180             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3181
3182         self.mark_watched(video_id, player_responses)
3183
3184         return info
3185
3186
3187 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3188     IE_DESC = 'YouTube.com tab'
3189     _VALID_URL = r'''(?x)
3190                     https?://
3191                         (?:\w+\.)?
3192                         (?:
3193                             youtube(?:kids)?\.com|
3194                             invidio\.us
3195                         )/
3196                         (?:
3197                             (?P<channel_type>channel|c|user|browse)/|
3198                             (?P<not_channel>
3199                                 feed/|hashtag/|
3200                                 (?:playlist|watch)\?.*?\blist=
3201                             )|
3202                             (?!(?:%s)\b)  # Direct URLs
3203                         )
3204                         (?P<id>[^/?\#&]+)
3205                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3206     IE_NAME = 'youtube:tab'
3207
3208     _TESTS = [{
3209         'note': 'playlists, multipage',
3210         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3211         'playlist_mincount': 94,
3212         'info_dict': {
3213             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3214             'title': 'Игорь Клейнер - Playlists',
3215             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3216             'uploader': 'Игорь Клейнер',
3217             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3218         },
3219     }, {
3220         'note': 'playlists, multipage, different order',
3221         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3222         'playlist_mincount': 94,
3223         'info_dict': {
3224             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3225             'title': 'Игорь Клейнер - Playlists',
3226             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3227             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3228             'uploader': 'Игорь Клейнер',
3229         },
3230     }, {
3231         'note': 'playlists, series',
3232         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3233         'playlist_mincount': 5,
3234         'info_dict': {
3235             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3236             'title': '3Blue1Brown - Playlists',
3237             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3238             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3239             'uploader': '3Blue1Brown',
3240         },
3241     }, {
3242         'note': 'playlists, singlepage',
3243         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3244         'playlist_mincount': 4,
3245         'info_dict': {
3246             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3247             'title': 'ThirstForScience - Playlists',
3248             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3249             'uploader': 'ThirstForScience',
3250             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3251         }
3252     }, {
3253         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3254         'only_matching': True,
3255     }, {
3256         'note': 'basic, single video playlist',
3257         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3258         'info_dict': {
3259             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3260             'uploader': 'Sergey M.',
3261             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3262             'title': 'youtube-dl public playlist',
3263         },
3264         'playlist_count': 1,
3265     }, {
3266         'note': 'empty playlist',
3267         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3268         'info_dict': {
3269             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3270             'uploader': 'Sergey M.',
3271             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3272             'title': 'youtube-dl empty playlist',
3273         },
3274         'playlist_count': 0,
3275     }, {
3276         'note': 'Home tab',
3277         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3278         'info_dict': {
3279             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3280             'title': 'lex will - Home',
3281             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3282             'uploader': 'lex will',
3283             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3284         },
3285         'playlist_mincount': 2,
3286     }, {
3287         'note': 'Videos tab',
3288         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3289         'info_dict': {
3290             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3291             'title': 'lex will - Videos',
3292             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3293             'uploader': 'lex will',
3294             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3295         },
3296         'playlist_mincount': 975,
3297     }, {
3298         'note': 'Videos tab, sorted by popular',
3299         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3300         'info_dict': {
3301             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3302             'title': 'lex will - Videos',
3303             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3304             'uploader': 'lex will',
3305             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3306         },
3307         'playlist_mincount': 199,
3308     }, {
3309         'note': 'Playlists tab',
3310         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3311         'info_dict': {
3312             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3313             'title': 'lex will - Playlists',
3314             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3315             'uploader': 'lex will',
3316             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3317         },
3318         'playlist_mincount': 17,
3319     }, {
3320         'note': 'Community tab',
3321         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3322         'info_dict': {
3323             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3324             'title': 'lex will - Community',
3325             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3326             'uploader': 'lex will',
3327             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3328         },
3329         'playlist_mincount': 18,
3330     }, {
3331         'note': 'Channels tab',
3332         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3333         'info_dict': {
3334             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3335             'title': 'lex will - Channels',
3336             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3337             'uploader': 'lex will',
3338             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3339         },
3340         'playlist_mincount': 12,
3341     }, {
3342         'note': 'Search tab',
3343         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3344         'playlist_mincount': 40,
3345         'info_dict': {
3346             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3347             'title': '3Blue1Brown - Search - linear algebra',
3348             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3349             'uploader': '3Blue1Brown',
3350             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3351         },
3352     }, {
3353         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3354         'only_matching': True,
3355     }, {
3356         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3357         'only_matching': True,
3358     }, {
3359         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3360         'only_matching': True,
3361     }, {
3362         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3363         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3364         'info_dict': {
3365             'title': '29C3: Not my department',
3366             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3367             'uploader': 'Christiaan008',
3368             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3369             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3370         },
3371         'playlist_count': 96,
3372     }, {
3373         'note': 'Large playlist',
3374         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3375         'info_dict': {
3376             'title': 'Uploads from Cauchemar',
3377             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3378             'uploader': 'Cauchemar',
3379             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3380         },
3381         'playlist_mincount': 1123,
3382     }, {
3383         'note': 'even larger playlist, 8832 videos',
3384         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3385         'only_matching': True,
3386     }, {
3387         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3388         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3389         'info_dict': {
3390             'title': 'Uploads from Interstellar Movie',
3391             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3392             'uploader': 'Interstellar Movie',
3393             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3394         },
3395         'playlist_mincount': 21,
3396     }, {
3397         'note': 'Playlist with "show unavailable videos" button',
3398         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3399         'info_dict': {
3400             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3401             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3402             'uploader': 'Phim Siêu Nhân Nhật Bản',
3403             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3404         },
3405         'playlist_mincount': 200,
3406     }, {
3407         'note': 'Playlist with unavailable videos in page 7',
3408         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3409         'info_dict': {
3410             'title': 'Uploads from BlankTV',
3411             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3412             'uploader': 'BlankTV',
3413             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3414         },
3415         'playlist_mincount': 1000,
3416     }, {
3417         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3418         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3419         'info_dict': {
3420             'title': 'Data Analysis with Dr Mike Pound',
3421             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3422             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3423             'uploader': 'Computerphile',
3424             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3425         },
3426         'playlist_mincount': 11,
3427     }, {
3428         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3429         'only_matching': True,
3430     }, {
3431         'note': 'Playlist URL that does not actually serve a playlist',
3432         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3433         'info_dict': {
3434             'id': 'FqZTN594JQw',
3435             'ext': 'webm',
3436             'title': "Smiley's People 01 detective, Adventure Series, Action",
3437             'uploader': 'STREEM',
3438             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3439             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3440             'upload_date': '20150526',
3441             'license': 'Standard YouTube License',
3442             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3443             'categories': ['People & Blogs'],
3444             'tags': list,
3445             'view_count': int,
3446             'like_count': int,
3447             'dislike_count': int,
3448         },
3449         'params': {
3450             'skip_download': True,
3451         },
3452         'skip': 'This video is not available.',
3453         'add_ie': [YoutubeIE.ie_key()],
3454     }, {
3455         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3456         'only_matching': True,
3457     }, {
3458         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3459         'only_matching': True,
3460     }, {
3461         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3462         'info_dict': {
3463             'id': '3yImotZU3tw',  # This will keep changing
3464             'ext': 'mp4',
3465             'title': compat_str,
3466             'uploader': 'Sky News',
3467             'uploader_id': 'skynews',
3468             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3469             'upload_date': r're:\d{8}',
3470             'description': compat_str,
3471             'categories': ['News & Politics'],
3472             'tags': list,
3473             'like_count': int,
3474             'dislike_count': int,
3475         },
3476         'params': {
3477             'skip_download': True,
3478         },
3479         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3480     }, {
3481         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3482         'info_dict': {
3483             'id': 'a48o2S1cPoo',
3484             'ext': 'mp4',
3485             'title': 'The Young Turks - Live Main Show',
3486             'uploader': 'The Young Turks',
3487             'uploader_id': 'TheYoungTurks',
3488             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3489             'upload_date': '20150715',
3490             'license': 'Standard YouTube License',
3491             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3492             'categories': ['News & Politics'],
3493             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3494             'like_count': int,
3495             'dislike_count': int,
3496         },
3497         'params': {
3498             'skip_download': True,
3499         },
3500         'only_matching': True,
3501     }, {
3502         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3503         'only_matching': True,
3504     }, {
3505         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3506         'only_matching': True,
3507     }, {
3508         'note': 'A channel that is not live. Should raise error',
3509         'url': 'https://www.youtube.com/user/numberphile/live',
3510         'only_matching': True,
3511     }, {
3512         'url': 'https://www.youtube.com/feed/trending',
3513         'only_matching': True,
3514     }, {
3515         'url': 'https://www.youtube.com/feed/library',
3516         'only_matching': True,
3517     }, {
3518         'url': 'https://www.youtube.com/feed/history',
3519         'only_matching': True,
3520     }, {
3521         'url': 'https://www.youtube.com/feed/subscriptions',
3522         'only_matching': True,
3523     }, {
3524         'url': 'https://www.youtube.com/feed/watch_later',
3525         'only_matching': True,
3526     }, {
3527         'note': 'Recommended - redirects to home page',
3528         'url': 'https://www.youtube.com/feed/recommended',
3529         'only_matching': True,
3530     }, {
3531         'note': 'inline playlist with not always working continuations',
3532         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3533         'only_matching': True,
3534     }, {
3535         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3536         'only_matching': True,
3537     }, {
3538         'url': 'https://www.youtube.com/course',
3539         'only_matching': True,
3540     }, {
3541         'url': 'https://www.youtube.com/zsecurity',
3542         'only_matching': True,
3543     }, {
3544         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3545         'only_matching': True,
3546     }, {
3547         'url': 'https://www.youtube.com/TheYoungTurks/live',
3548         'only_matching': True,
3549     }, {
3550         'url': 'https://www.youtube.com/hashtag/cctv9',
3551         'info_dict': {
3552             'id': 'cctv9',
3553             'title': '#cctv9',
3554         },
3555         'playlist_mincount': 350,
3556     }, {
3557         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3558         'only_matching': True,
3559     }, {
3560         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3561         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3562         'only_matching': True
3563     }, {
3564         'note': '/browse/ should redirect to /channel/',
3565         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3566         'only_matching': True
3567     }, {
3568         'note': 'VLPL, should redirect to playlist?list=PL...',
3569         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3570         'info_dict': {
3571             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3572             'uploader': 'NoCopyrightSounds',
3573             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3574             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3575             'title': 'NCS Releases',
3576         },
3577         'playlist_mincount': 166,
3578     }, {
3579         'note': 'Topic, should redirect to playlist?list=UU...',
3580         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3581         'info_dict': {
3582             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3583             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3584             'title': 'Uploads from Royalty Free Music - Topic',
3585             'uploader': 'Royalty Free Music - Topic',
3586         },
3587         'expected_warnings': [
3588             'A channel/user page was given',
3589             'The URL does not have a videos tab',
3590         ],
3591         'playlist_mincount': 101,
3592     }, {
3593         'note': 'Topic without a UU playlist',
3594         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3595         'info_dict': {
3596             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3597             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3598         },
3599         'expected_warnings': [
3600             'A channel/user page was given',
3601             'The URL does not have a videos tab',
3602             'Falling back to channel URL',
3603         ],
3604         'playlist_mincount': 9,
3605     }, {
3606         'note': 'Youtube music Album',
3607         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3608         'info_dict': {
3609             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3610             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3611         },
3612         'playlist_count': 50,
3613     }, {
3614         'note': 'unlisted single video playlist',
3615         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3616         'info_dict': {
3617             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3618             'uploader': 'colethedj',
3619             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3620             'title': 'yt-dlp unlisted playlist test',
3621             'availability': 'unlisted'
3622         },
3623         'playlist_count': 1,
3624     }]
3625
3626     @classmethod
3627     def suitable(cls, url):
3628         return False if YoutubeIE.suitable(url) else super(
3629             YoutubeTabIE, cls).suitable(url)
3630
3631     def _extract_channel_id(self, webpage):
3632         channel_id = self._html_search_meta(
3633             'channelId', webpage, 'channel id', default=None)
3634         if channel_id:
3635             return channel_id
3636         channel_url = self._html_search_meta(
3637             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3638              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3639              'twitter:app:url:googleplay'), webpage, 'channel url')
3640         return self._search_regex(
3641             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3642             channel_url, 'channel id')
3643
3644     @staticmethod
3645     def _extract_basic_item_renderer(item):
3646         # Modified from _extract_grid_item_renderer
3647         known_basic_renderers = (
3648             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3649         )
3650         for key, renderer in item.items():
3651             if not isinstance(renderer, dict):
3652                 continue
3653             elif key in known_basic_renderers:
3654                 return renderer
3655             elif key.startswith('grid') and key.endswith('Renderer'):
3656                 return renderer
3657
3658     def _grid_entries(self, grid_renderer):
3659         for item in grid_renderer['items']:
3660             if not isinstance(item, dict):
3661                 continue
3662             renderer = self._extract_basic_item_renderer(item)
3663             if not isinstance(renderer, dict):
3664                 continue
3665             title = self._get_text(renderer, 'title')
3666
3667             # playlist
3668             playlist_id = renderer.get('playlistId')
3669             if playlist_id:
3670                 yield self.url_result(
3671                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3672                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3673                     video_title=title)
3674                 continue
3675             # video
3676             video_id = renderer.get('videoId')
3677             if video_id:
3678                 yield self._extract_video(renderer)
3679                 continue
3680             # channel
3681             channel_id = renderer.get('channelId')
3682             if channel_id:
3683                 yield self.url_result(
3684                     'https://www.youtube.com/channel/%s' % channel_id,
3685                     ie=YoutubeTabIE.ie_key(), video_title=title)
3686                 continue
3687             # generic endpoint URL support
3688             ep_url = urljoin('https://www.youtube.com/', try_get(
3689                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3690                 compat_str))
3691             if ep_url:
3692                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3693                     if ie.suitable(ep_url):
3694                         yield self.url_result(
3695                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3696                         break
3697
3698     def _shelf_entries_from_content(self, shelf_renderer):
3699         content = shelf_renderer.get('content')
3700         if not isinstance(content, dict):
3701             return
3702         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3703         if renderer:
3704             # TODO: add support for nested playlists so each shelf is processed
3705             # as separate playlist
3706             # TODO: this includes only first N items
3707             for entry in self._grid_entries(renderer):
3708                 yield entry
3709         renderer = content.get('horizontalListRenderer')
3710         if renderer:
3711             # TODO
3712             pass
3713
3714     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3715         ep = try_get(
3716             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3717             compat_str)
3718         shelf_url = urljoin('https://www.youtube.com', ep)
3719         if shelf_url:
3720             # Skipping links to another channels, note that checking for
3721             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3722             # will not work
3723             if skip_channels and '/channels?' in shelf_url:
3724                 return
3725             title = self._get_text(shelf_renderer, 'title')
3726             yield self.url_result(shelf_url, video_title=title)
3727         # Shelf may not contain shelf URL, fallback to extraction from content
3728         for entry in self._shelf_entries_from_content(shelf_renderer):
3729             yield entry
3730
3731     def _playlist_entries(self, video_list_renderer):
3732         for content in video_list_renderer['contents']:
3733             if not isinstance(content, dict):
3734                 continue
3735             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3736             if not isinstance(renderer, dict):
3737                 continue
3738             video_id = renderer.get('videoId')
3739             if not video_id:
3740                 continue
3741             yield self._extract_video(renderer)
3742
3743     def _rich_entries(self, rich_grid_renderer):
3744         renderer = try_get(
3745             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3746         video_id = renderer.get('videoId')
3747         if not video_id:
3748             return
3749         yield self._extract_video(renderer)
3750
3751     def _video_entry(self, video_renderer):
3752         video_id = video_renderer.get('videoId')
3753         if video_id:
3754             return self._extract_video(video_renderer)
3755
3756     def _post_thread_entries(self, post_thread_renderer):
3757         post_renderer = try_get(
3758             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3759         if not post_renderer:
3760             return
3761         # video attachment
3762         video_renderer = try_get(
3763             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3764         video_id = video_renderer.get('videoId')
3765         if video_id:
3766             entry = self._extract_video(video_renderer)
3767             if entry:
3768                 yield entry
3769         # playlist attachment
3770         playlist_id = try_get(
3771             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3772         if playlist_id:
3773             yield self.url_result(
3774                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3775                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3776         # inline video links
3777         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3778         for run in runs:
3779             if not isinstance(run, dict):
3780                 continue
3781             ep_url = try_get(
3782                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3783             if not ep_url:
3784                 continue
3785             if not YoutubeIE.suitable(ep_url):
3786                 continue
3787             ep_video_id = YoutubeIE._match_id(ep_url)
3788             if video_id == ep_video_id:
3789                 continue
3790             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3791
3792     def _post_thread_continuation_entries(self, post_thread_continuation):
3793         contents = post_thread_continuation.get('contents')
3794         if not isinstance(contents, list):
3795             return
3796         for content in contents:
3797             renderer = content.get('backstagePostThreadRenderer')
3798             if not isinstance(renderer, dict):
3799                 continue
3800             for entry in self._post_thread_entries(renderer):
3801                 yield entry
3802
3803     r''' # unused
3804     def _rich_grid_entries(self, contents):
3805         for content in contents:
3806             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3807             if video_renderer:
3808                 entry = self._video_entry(video_renderer)
3809                 if entry:
3810                     yield entry
3811     '''
3812     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3813
3814         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3815             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3816             for content in contents:
3817                 if not isinstance(content, dict):
3818                     continue
3819                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3820                 if not is_renderer:
3821                     renderer = content.get('richItemRenderer')
3822                     if renderer:
3823                         for entry in self._rich_entries(renderer):
3824                             yield entry
3825                         continuation_list[0] = self._extract_continuation(parent_renderer)
3826                     continue
3827                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3828                 for isr_content in isr_contents:
3829                     if not isinstance(isr_content, dict):
3830                         continue
3831
3832                     known_renderers = {
3833                         'playlistVideoListRenderer': self._playlist_entries,
3834                         'gridRenderer': self._grid_entries,
3835                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3836                         'backstagePostThreadRenderer': self._post_thread_entries,
3837                         'videoRenderer': lambda x: [self._video_entry(x)],
3838                     }
3839                     for key, renderer in isr_content.items():
3840                         if key not in known_renderers:
3841                             continue
3842                         for entry in known_renderers[key](renderer):
3843                             if entry:
3844                                 yield entry
3845                         continuation_list[0] = self._extract_continuation(renderer)
3846                         break
3847
3848                 if not continuation_list[0]:
3849                     continuation_list[0] = self._extract_continuation(is_renderer)
3850
3851             if not continuation_list[0]:
3852                 continuation_list[0] = self._extract_continuation(parent_renderer)
3853
3854         continuation_list = [None]  # Python 2 doesnot support nonlocal
3855         tab_content = try_get(tab, lambda x: x['content'], dict)
3856         if not tab_content:
3857             return
3858         parent_renderer = (
3859             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3860             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3861         for entry in extract_entries(parent_renderer):
3862             yield entry
3863         continuation = continuation_list[0]
3864         visitor_data = None
3865
3866         for page_num in itertools.count(1):
3867             if not continuation:
3868                 break
3869             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3870             response = self._extract_response(
3871                 item_id='%s page %s' % (item_id, page_num),
3872                 query=continuation, headers=headers, ytcfg=ytcfg,
3873                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3874
3875             if not response:
3876                 break
3877             visitor_data = try_get(
3878                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3879
3880             known_continuation_renderers = {
3881                 'playlistVideoListContinuation': self._playlist_entries,
3882                 'gridContinuation': self._grid_entries,
3883                 'itemSectionContinuation': self._post_thread_continuation_entries,
3884                 'sectionListContinuation': extract_entries,  # for feeds
3885             }
3886             continuation_contents = try_get(
3887                 response, lambda x: x['continuationContents'], dict) or {}
3888             continuation_renderer = None
3889             for key, value in continuation_contents.items():
3890                 if key not in known_continuation_renderers:
3891                     continue
3892                 continuation_renderer = value
3893                 continuation_list = [None]
3894                 for entry in known_continuation_renderers[key](continuation_renderer):
3895                     yield entry
3896                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3897                 break
3898             if continuation_renderer:
3899                 continue
3900
3901             known_renderers = {
3902                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3903                 'gridVideoRenderer': (self._grid_entries, 'items'),
3904                 'gridChannelRenderer': (self._grid_entries, 'items'),
3905                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3906                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3907                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3908                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3909             }
3910             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3911             continuation_items = try_get(
3912                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3913             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3914             video_items_renderer = None
3915             for key, value in continuation_item.items():
3916                 if key not in known_renderers:
3917                     continue
3918                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3919                 continuation_list = [None]
3920                 for entry in known_renderers[key][0](video_items_renderer):
3921                     yield entry
3922                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3923                 break
3924             if video_items_renderer:
3925                 continue
3926             break
3927
3928     @staticmethod
3929     def _extract_selected_tab(tabs):
3930         for tab in tabs:
3931             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3932             if renderer.get('selected') is True:
3933                 return renderer
3934         else:
3935             raise ExtractorError('Unable to find selected tab')
3936
3937     @classmethod
3938     def _extract_uploader(cls, data):
3939         uploader = {}
3940         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3941         owner = try_get(
3942             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3943         if owner:
3944             uploader['uploader'] = owner.get('text')
3945             uploader['uploader_id'] = try_get(
3946                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3947             uploader['uploader_url'] = urljoin(
3948                 'https://www.youtube.com/',
3949                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3950         return {k: v for k, v in uploader.items() if v is not None}
3951
3952     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3953         playlist_id = title = description = channel_url = channel_name = channel_id = None
3954         thumbnails_list = tags = []
3955
3956         selected_tab = self._extract_selected_tab(tabs)
3957         renderer = try_get(
3958             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3959         if renderer:
3960             channel_name = renderer.get('title')
3961             channel_url = renderer.get('channelUrl')
3962             channel_id = renderer.get('externalId')
3963         else:
3964             renderer = try_get(
3965                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3966
3967         if renderer:
3968             title = renderer.get('title')
3969             description = renderer.get('description', '')
3970             playlist_id = channel_id
3971             tags = renderer.get('keywords', '').split()
3972             thumbnails_list = (
3973                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3974                 or try_get(
3975                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3976                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3977                     list)
3978                 or [])
3979
3980         thumbnails = []
3981         for t in thumbnails_list:
3982             if not isinstance(t, dict):
3983                 continue
3984             thumbnail_url = url_or_none(t.get('url'))
3985             if not thumbnail_url:
3986                 continue
3987             thumbnails.append({
3988                 'url': thumbnail_url,
3989                 'width': int_or_none(t.get('width')),
3990                 'height': int_or_none(t.get('height')),
3991             })
3992         if playlist_id is None:
3993             playlist_id = item_id
3994         if title is None:
3995             title = (
3996                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3997                 or playlist_id)
3998         title += format_field(selected_tab, 'title', ' - %s')
3999         title += format_field(selected_tab, 'expandedText', ' - %s')
4000         metadata = {
4001             'playlist_id': playlist_id,
4002             'playlist_title': title,
4003             'playlist_description': description,
4004             'uploader': channel_name,
4005             'uploader_id': channel_id,
4006             'uploader_url': channel_url,
4007             'thumbnails': thumbnails,
4008             'tags': tags,
4009         }
4010         availability = self._extract_availability(data)
4011         if availability:
4012             metadata['availability'] = availability
4013         if not channel_id:
4014             metadata.update(self._extract_uploader(data))
4015         metadata.update({
4016             'channel': metadata['uploader'],
4017             'channel_id': metadata['uploader_id'],
4018             'channel_url': metadata['uploader_url']})
4019         ytcfg = self.extract_ytcfg(item_id, webpage)
4020         return self.playlist_result(
4021             self._entries(
4022                 selected_tab, playlist_id,
4023                 self._extract_identity_token(webpage, item_id),
4024                 self._extract_account_syncid(ytcfg, data), ytcfg),
4025             **metadata)
4026
4027     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4028         first_id = last_id = None
4029         ytcfg = self.extract_ytcfg(playlist_id, webpage)
4030         headers = self.generate_api_headers(
4031             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4032             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
4033         for page_num in itertools.count(1):
4034             videos = list(self._playlist_entries(playlist))
4035             if not videos:
4036                 return
4037             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4038             if start >= len(videos):
4039                 return
4040             for video in videos[start:]:
4041                 if video['id'] == first_id:
4042                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4043                     return
4044                 yield video
4045             first_id = first_id or videos[0]['id']
4046             last_id = videos[-1]['id']
4047             watch_endpoint = try_get(
4048                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4049             query = {
4050                 'playlistId': playlist_id,
4051                 'videoId': watch_endpoint.get('videoId') or last_id,
4052                 'index': watch_endpoint.get('index') or len(videos),
4053                 'params': watch_endpoint.get('params') or 'OAE%3D'
4054             }
4055             response = self._extract_response(
4056                 item_id='%s page %d' % (playlist_id, page_num),
4057                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4058                 check_get_keys='contents'
4059             )
4060             playlist = try_get(
4061                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4062
4063     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4064         title = playlist.get('title') or try_get(
4065             data, lambda x: x['titleText']['simpleText'], compat_str)
4066         playlist_id = playlist.get('playlistId') or item_id
4067
4068         # Delegating everything except mix playlists to regular tab-based playlist URL
4069         playlist_url = urljoin(url, try_get(
4070             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4071             compat_str))
4072         if playlist_url and playlist_url != url:
4073             return self.url_result(
4074                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4075                 video_title=title)
4076
4077         return self.playlist_result(
4078             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4079             playlist_id=playlist_id, playlist_title=title)
4080
4081     def _extract_availability(self, data):
4082         """
4083         Gets the availability of a given playlist/tab.
4084         Note: Unless YouTube tells us explicitly, we do not assume it is public
4085         @param data: response
4086         """
4087         is_private = is_unlisted = None
4088         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4089         badge_labels = self._extract_badges(renderer)
4090
4091         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4092         privacy_dropdown_entries = try_get(
4093             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4094         for renderer_dict in privacy_dropdown_entries:
4095             is_selected = try_get(
4096                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4097             if not is_selected:
4098                 continue
4099             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4100             if label:
4101                 badge_labels.add(label.lower())
4102                 break
4103
4104         for badge_label in badge_labels:
4105             if badge_label == 'unlisted':
4106                 is_unlisted = True
4107             elif badge_label == 'private':
4108                 is_private = True
4109             elif badge_label == 'public':
4110                 is_unlisted = is_private = False
4111         return self._availability(is_private, False, False, False, is_unlisted)
4112
4113     @staticmethod
4114     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4115         sidebar_renderer = try_get(
4116             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4117         for item in sidebar_renderer:
4118             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4119             if renderer:
4120                 return renderer
4121
4122     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4123         """
4124         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4125         """
4126         browse_id = params = None
4127         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4128         if not renderer:
4129             return
4130         menu_renderer = try_get(
4131             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4132         for menu_item in menu_renderer:
4133             if not isinstance(menu_item, dict):
4134                 continue
4135             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4136             text = try_get(
4137                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4138             if not text or text.lower() != 'show unavailable videos':
4139                 continue
4140             browse_endpoint = try_get(
4141                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4142             browse_id = browse_endpoint.get('browseId')
4143             params = browse_endpoint.get('params')
4144             break
4145
4146         ytcfg = self.extract_ytcfg(item_id, webpage)
4147         headers = self.generate_api_headers(
4148             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4149             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4150             visitor_data=try_get(
4151                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4152         query = {
4153             'params': params or 'wgYCCAA=',
4154             'browseId': browse_id or 'VL%s' % item_id
4155         }
4156         return self._extract_response(
4157             item_id=item_id, headers=headers, query=query,
4158             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4159             note='Downloading API JSON with unavailable videos')
4160
4161     def _extract_webpage(self, url, item_id):
4162         retries = self.get_param('extractor_retries', 3)
4163         count = -1
4164         last_error = 'Incomplete yt initial data recieved'
4165         while count < retries:
4166             count += 1
4167             # Sometimes youtube returns a webpage with incomplete ytInitialData
4168             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4169             if count:
4170                 self.report_warning('%s. Retrying ...' % last_error)
4171             webpage = self._download_webpage(
4172                 url, item_id,
4173                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4174             data = self.extract_yt_initial_data(item_id, webpage)
4175             if data.get('contents') or data.get('currentVideoEndpoint'):
4176                 break
4177             # Extract alerts here only when there is error
4178             self._extract_and_report_alerts(data)
4179             if count >= retries:
4180                 raise ExtractorError(last_error)
4181         return webpage, data
4182
4183     @staticmethod
4184     def _smuggle_data(entries, data):
4185         for entry in entries:
4186             if data:
4187                 entry['url'] = smuggle_url(entry['url'], data)
4188             yield entry
4189
4190     def _real_extract(self, url):
4191         url, smuggled_data = unsmuggle_url(url, {})
4192         if self.is_music_url(url):
4193             smuggled_data['is_music_url'] = True
4194         info_dict = self.__real_extract(url, smuggled_data)
4195         if info_dict.get('entries'):
4196             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4197         return info_dict
4198
4199     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4200
4201     def __real_extract(self, url, smuggled_data):
4202         item_id = self._match_id(url)
4203         url = compat_urlparse.urlunparse(
4204             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4205         compat_opts = self.get_param('compat_opts', [])
4206
4207         def get_mobj(url):
4208             mobj = self._url_re.match(url).groupdict()
4209             mobj.update((k, '') for k, v in mobj.items() if v is None)
4210             return mobj
4211
4212         mobj = get_mobj(url)
4213         # Youtube returns incomplete data if tabname is not lower case
4214         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4215
4216         if is_channel:
4217             if smuggled_data.get('is_music_url'):
4218                 if item_id[:2] == 'VL':
4219                     # Youtube music VL channels have an equivalent playlist
4220                     item_id = item_id[2:]
4221                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4222                 elif item_id[:2] == 'MP':
4223                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4224                     item_id = self._search_regex(
4225                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4226                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4227                         'playlist id')
4228                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4229                 elif mobj['channel_type'] == 'browse':
4230                     # Youtube music /browse/ should be changed to /channel/
4231                     pre = 'https://www.youtube.com/channel/%s' % item_id
4232         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4233             # Home URLs should redirect to /videos/
4234             self.report_warning(
4235                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4236                 'To download only the videos in the home page, add a "/featured" to the URL')
4237             tab = '/videos'
4238
4239         url = ''.join((pre, tab, post))
4240         mobj = get_mobj(url)
4241
4242         # Handle both video/playlist URLs
4243         qs = parse_qs(url)
4244         video_id = qs.get('v', [None])[0]
4245         playlist_id = qs.get('list', [None])[0]
4246
4247         if not video_id and mobj['not_channel'].startswith('watch'):
4248             if not playlist_id:
4249                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4250                 raise ExtractorError('Unable to recognize tab page')
4251             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4252             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4253             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4254             mobj = get_mobj(url)
4255
4256         if video_id and playlist_id:
4257             if self.get_param('noplaylist'):
4258                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4259                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4260             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4261
4262         webpage, data = self._extract_webpage(url, item_id)
4263
4264         tabs = try_get(
4265             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4266         if tabs:
4267             selected_tab = self._extract_selected_tab(tabs)
4268             tab_name = selected_tab.get('title', '')
4269             if 'no-youtube-channel-redirect' not in compat_opts:
4270                 if mobj['tab'] == '/live':
4271                     # Live tab should have redirected to the video
4272                     raise ExtractorError('The channel is not currently live', expected=True)
4273                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4274                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4275                         # Topic channels don't have /videos. Use the equivalent playlist instead
4276                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4277                         pl_id = 'UU%s' % item_id[2:]
4278                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4279                         try:
4280                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4281                             for alert_type, alert_message in self._extract_alerts(pl_data):
4282                                 if alert_type == 'error':
4283                                     raise ExtractorError('Youtube said: %s' % alert_message)
4284                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4285                         except ExtractorError:
4286                             self.report_warning('The playlist gave error. Falling back to channel URL')
4287                     else:
4288                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4289
4290         self.write_debug('Final URL: %s' % url)
4291
4292         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4293         if 'no-youtube-unavailable-videos' not in compat_opts:
4294             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4295         self._extract_and_report_alerts(data)
4296         tabs = try_get(
4297             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4298         if tabs:
4299             return self._extract_from_tabs(item_id, webpage, data, tabs)
4300
4301         playlist = try_get(
4302             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4303         if playlist:
4304             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4305
4306         video_id = try_get(
4307             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4308             compat_str) or video_id
4309         if video_id:
4310             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4311                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4312             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4313
4314         raise ExtractorError('Unable to recognize tab page')
4315
4316
4317 class YoutubePlaylistIE(InfoExtractor):
4318     IE_DESC = 'YouTube.com playlists'
4319     _VALID_URL = r'''(?x)(?:
4320                         (?:https?://)?
4321                         (?:\w+\.)?
4322                         (?:
4323                             (?:
4324                                 youtube(?:kids)?\.com|
4325                                 invidio\.us
4326                             )
4327                             /.*?\?.*?\blist=
4328                         )?
4329                         (?P<id>%(playlist_id)s)
4330                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4331     IE_NAME = 'youtube:playlist'
4332     _TESTS = [{
4333         'note': 'issue #673',
4334         'url': 'PLBB231211A4F62143',
4335         'info_dict': {
4336             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4337             'id': 'PLBB231211A4F62143',
4338             'uploader': 'Wickydoo',
4339             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4340             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4341         },
4342         'playlist_mincount': 29,
4343     }, {
4344         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4345         'info_dict': {
4346             'title': 'YDL_safe_search',
4347             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4348         },
4349         'playlist_count': 2,
4350         'skip': 'This playlist is private',
4351     }, {
4352         'note': 'embedded',
4353         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4354         'playlist_count': 4,
4355         'info_dict': {
4356             'title': 'JODA15',
4357             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4358             'uploader': 'milan',
4359             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4360         }
4361     }, {
4362         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4363         'playlist_mincount': 654,
4364         'info_dict': {
4365             'title': '2018 Chinese New Singles (11/6 updated)',
4366             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4367             'uploader': 'LBK',
4368             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4369             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4370         }
4371     }, {
4372         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4373         'only_matching': True,
4374     }, {
4375         # music album playlist
4376         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4377         'only_matching': True,
4378     }]
4379
4380     @classmethod
4381     def suitable(cls, url):
4382         if YoutubeTabIE.suitable(url):
4383             return False
4384         # Hack for lazy extractors until more generic solution is implemented
4385         # (see #28780)
4386         from .youtube import parse_qs
4387         qs = parse_qs(url)
4388         if qs.get('v', [None])[0]:
4389             return False
4390         return super(YoutubePlaylistIE, cls).suitable(url)
4391
4392     def _real_extract(self, url):
4393         playlist_id = self._match_id(url)
4394         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4395         url = update_url_query(
4396             'https://www.youtube.com/playlist',
4397             parse_qs(url) or {'list': playlist_id})
4398         if is_music_url:
4399             url = smuggle_url(url, {'is_music_url': True})
4400         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4401
4402
4403 class YoutubeYtBeIE(InfoExtractor):
4404     IE_DESC = 'youtu.be'
4405     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4406     _TESTS = [{
4407         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4408         'info_dict': {
4409             'id': 'yeWKywCrFtk',
4410             'ext': 'mp4',
4411             'title': 'Small Scale Baler and Braiding Rugs',
4412             'uploader': 'Backus-Page House Museum',
4413             'uploader_id': 'backuspagemuseum',
4414             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4415             'upload_date': '20161008',
4416             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4417             'categories': ['Nonprofits & Activism'],
4418             'tags': list,
4419             'like_count': int,
4420             'dislike_count': int,
4421         },
4422         'params': {
4423             'noplaylist': True,
4424             'skip_download': True,
4425         },
4426     }, {
4427         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4428         'only_matching': True,
4429     }]
4430
4431     def _real_extract(self, url):
4432         mobj = re.match(self._VALID_URL, url)
4433         video_id = mobj.group('id')
4434         playlist_id = mobj.group('playlist_id')
4435         return self.url_result(
4436             update_url_query('https://www.youtube.com/watch', {
4437                 'v': video_id,
4438                 'list': playlist_id,
4439                 'feature': 'youtu.be',
4440             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4441
4442
4443 class YoutubeYtUserIE(InfoExtractor):
4444     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4445     _VALID_URL = r'ytuser:(?P<id>.+)'
4446     _TESTS = [{
4447         'url': 'ytuser:phihag',
4448         'only_matching': True,
4449     }]
4450
4451     def _real_extract(self, url):
4452         user_id = self._match_id(url)
4453         return self.url_result(
4454             'https://www.youtube.com/user/%s' % user_id,
4455             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4456
4457
4458 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4459     IE_NAME = 'youtube:favorites'
4460     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4461     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4462     _LOGIN_REQUIRED = True
4463     _TESTS = [{
4464         'url': ':ytfav',
4465         'only_matching': True,
4466     }, {
4467         'url': ':ytfavorites',
4468         'only_matching': True,
4469     }]
4470
4471     def _real_extract(self, url):
4472         return self.url_result(
4473             'https://www.youtube.com/playlist?list=LL',
4474             ie=YoutubeTabIE.ie_key())
4475
4476
4477 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4478     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4479     # there doesn't appear to be a real limit, for example if you search for
4480     # 'python' you get more than 8.000.000 results
4481     _MAX_RESULTS = float('inf')
4482     IE_NAME = 'youtube:search'
4483     _SEARCH_KEY = 'ytsearch'
4484     _SEARCH_PARAMS = None
4485     _TESTS = []
4486
4487     def _entries(self, query, n):
4488         data = {'query': query}
4489         if self._SEARCH_PARAMS:
4490             data['params'] = self._SEARCH_PARAMS
4491         total = 0
4492         continuation = {}
4493         for page_num in itertools.count(1):
4494             data.update(continuation)
4495             search = self._extract_response(
4496                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4497                 check_get_keys=('contents', 'onResponseReceivedCommands')
4498             )
4499             if not search:
4500                 break
4501             slr_contents = try_get(
4502                 search,
4503                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4504                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4505                 list)
4506             if not slr_contents:
4507                 break
4508
4509             # Youtube sometimes adds promoted content to searches,
4510             # changing the index location of videos and token.
4511             # So we search through all entries till we find them.
4512             continuation = None
4513             for slr_content in slr_contents:
4514                 if not continuation:
4515                     continuation = self._extract_continuation({'contents': [slr_content]})
4516
4517                 isr_contents = try_get(
4518                     slr_content,
4519                     lambda x: x['itemSectionRenderer']['contents'],
4520                     list)
4521                 if not isr_contents:
4522                     continue
4523                 for content in isr_contents:
4524                     if not isinstance(content, dict):
4525                         continue
4526                     video = content.get('videoRenderer')
4527                     if not isinstance(video, dict):
4528                         continue
4529                     video_id = video.get('videoId')
4530                     if not video_id:
4531                         continue
4532
4533                     yield self._extract_video(video)
4534                     total += 1
4535                     if total == n:
4536                         return
4537
4538             if not continuation:
4539                 break
4540
4541     def _get_n_results(self, query, n):
4542         """Get a specified number of results for a query"""
4543         return self.playlist_result(self._entries(query, n), query, query)
4544
4545
4546 class YoutubeSearchDateIE(YoutubeSearchIE):
4547     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4548     _SEARCH_KEY = 'ytsearchdate'
4549     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4550     _SEARCH_PARAMS = 'CAI%3D'
4551
4552
4553 class YoutubeSearchURLIE(YoutubeSearchIE):
4554     IE_DESC = 'YouTube.com search URLs'
4555     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4556     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4557     # _MAX_RESULTS = 100
4558     _TESTS = [{
4559         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4560         'playlist_mincount': 5,
4561         'info_dict': {
4562             'id': 'youtube-dl test video',
4563             'title': 'youtube-dl test video',
4564         }
4565     }, {
4566         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4567         'only_matching': True,
4568     }]
4569
4570     @classmethod
4571     def _make_valid_url(cls):
4572         return cls._VALID_URL
4573
4574     def _real_extract(self, url):
4575         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4576         query = (qs.get('search_query') or qs.get('q'))[0]
4577         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4578         return self._get_n_results(query, self._MAX_RESULTS)
4579
4580
4581 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4582     """
4583     Base class for feed extractors
4584     Subclasses must define the _FEED_NAME property.
4585     """
4586     _LOGIN_REQUIRED = True
4587     _TESTS = []
4588
4589     @property
4590     def IE_NAME(self):
4591         return 'youtube:%s' % self._FEED_NAME
4592
4593     def _real_extract(self, url):
4594         return self.url_result(
4595             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4596             ie=YoutubeTabIE.ie_key())
4597
4598
4599 class YoutubeWatchLaterIE(InfoExtractor):
4600     IE_NAME = 'youtube:watchlater'
4601     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4602     _VALID_URL = r':ytwatchlater'
4603     _TESTS = [{
4604         'url': ':ytwatchlater',
4605         'only_matching': True,
4606     }]
4607
4608     def _real_extract(self, url):
4609         return self.url_result(
4610             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4611
4612
4613 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4614     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4615     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4616     _FEED_NAME = 'recommended'
4617     _LOGIN_REQUIRED = False
4618     _TESTS = [{
4619         'url': ':ytrec',
4620         'only_matching': True,
4621     }, {
4622         'url': ':ytrecommended',
4623         'only_matching': True,
4624     }, {
4625         'url': 'https://youtube.com',
4626         'only_matching': True,
4627     }]
4628
4629
4630 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4631     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4632     _VALID_URL = r':ytsub(?:scription)?s?'
4633     _FEED_NAME = 'subscriptions'
4634     _TESTS = [{
4635         'url': ':ytsubs',
4636         'only_matching': True,
4637     }, {
4638         'url': ':ytsubscriptions',
4639         'only_matching': True,
4640     }]
4641
4642
4643 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4644     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4645     _VALID_URL = r':ythis(?:tory)?'
4646     _FEED_NAME = 'history'
4647     _TESTS = [{
4648         'url': ':ythistory',
4649         'only_matching': True,
4650     }]
4651
4652
4653 class YoutubeTruncatedURLIE(InfoExtractor):
4654     IE_NAME = 'youtube:truncated_url'
4655     IE_DESC = False  # Do not list
4656     _VALID_URL = r'''(?x)
4657         (?:https?://)?
4658         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4659         (?:watch\?(?:
4660             feature=[a-z_]+|
4661             annotation_id=annotation_[^&]+|
4662             x-yt-cl=[0-9]+|
4663             hl=[^&]*|
4664             t=[0-9]+
4665         )?
4666         |
4667             attribution_link\?a=[^&]+
4668         )
4669         $
4670     '''
4671
4672     _TESTS = [{
4673         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4674         'only_matching': True,
4675     }, {
4676         'url': 'https://www.youtube.com/watch?',
4677         'only_matching': True,
4678     }, {
4679         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4680         'only_matching': True,
4681     }, {
4682         'url': 'https://www.youtube.com/watch?feature=foo',
4683         'only_matching': True,
4684     }, {
4685         'url': 'https://www.youtube.com/watch?hl=en-GB',
4686         'only_matching': True,
4687     }, {
4688         'url': 'https://www.youtube.com/watch?t=2372',
4689         'only_matching': True,
4690     }]
4691
4692     def _real_extract(self, url):
4693         raise ExtractorError(
4694             'Did you forget to quote the URL? Remember that & is a meta '
4695             'character in most shells, so you want to put the URL in quotes, '
4696             'like  youtube-dl '
4697             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4698             ' or simply  youtube-dl BaW_jenozKc  .',
4699             expected=True)
4700
4701
4702 class YoutubeTruncatedIDIE(InfoExtractor):
4703     IE_NAME = 'youtube:truncated_id'
4704     IE_DESC = False  # Do not list
4705     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4706
4707     _TESTS = [{
4708         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4709         'only_matching': True,
4710     }]
4711
4712     def _real_extract(self, url):
4713         video_id = self._match_id(url)
4714         raise ExtractorError(
4715             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4716             expected=True)