yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     is_html,
  42     mimetype2ext,
  43     network_exceptions,
  44     orderedSet,
  45     parse_codecs,
  46     parse_count,
  47     parse_duration,
  48     parse_iso8601,
  49     parse_qs,
  50     qualities,
  51     remove_end,
  52     remove_start,
  53     smuggle_url,
  54     str_or_none,
  55     str_to_int,
  56     traverse_obj,
  57     try_get,
  58     unescapeHTML,
  59     unified_strdate,
  60     unsmuggle_url,
  61     update_url_query,
  62     url_or_none,
  63     urljoin,
  64     variadic,
  65 )
  66
  67
  68 # any clients starting with _ cannot be explicity requested by the user
  69 INNERTUBE_CLIENTS = {
  70     'web': {
  71         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  72         'INNERTUBE_CONTEXT': {
  73             'client': {
  74                 'clientName': 'WEB',
  75                 'clientVersion': '2.20210622.10.00',
  76             }
  77         },
  78         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  79     },
  80     'web_embedded': {
  81         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  82         'INNERTUBE_CONTEXT': {
  83             'client': {
  84                 'clientName': 'WEB_EMBEDDED_PLAYER',
  85                 'clientVersion': '1.20210620.0.1',
  86             },
  87         },
  88         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  89     },
  90     'web_music': {
  91         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  92         'INNERTUBE_HOST': 'music.youtube.com',
  93         'INNERTUBE_CONTEXT': {
  94             'client': {
  95                 'clientName': 'WEB_REMIX',
  96                 'clientVersion': '1.20210621.00.00',
  97             }
  98         },
  99         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 100     },
 101     'web_creator': {
 102         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 103         'INNERTUBE_CONTEXT': {
 104             'client': {
 105                 'clientName': 'WEB_CREATOR',
 106                 'clientVersion': '1.20210621.00.00',
 107             }
 108         },
 109         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 110     },
 111     'android': {
 112         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 113         'INNERTUBE_CONTEXT': {
 114             'client': {
 115                 'clientName': 'ANDROID',
 116                 'clientVersion': '16.20',
 117             }
 118         },
 119         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 120     },
 121     'android_embedded': {
 122         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 123         'INNERTUBE_CONTEXT': {
 124             'client': {
 125                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 126                 'clientVersion': '16.20',
 127             },
 128         },
 129         'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 130     },
 131     'android_music': {
 132         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 133         'INNERTUBE_HOST': 'music.youtube.com',
 134         'INNERTUBE_CONTEXT': {
 135             'client': {
 136                 'clientName': 'ANDROID_MUSIC',
 137                 'clientVersion': '4.32',
 138             }
 139         },
 140         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 141     },
 142     'android_creator': {
 143         'INNERTUBE_CONTEXT': {
 144             'client': {
 145                 'clientName': 'ANDROID_CREATOR',
 146                 'clientVersion': '21.24.100',
 147             },
 148         },
 149         'INNERTUBE_CONTEXT_CLIENT_NAME': 14
 150     },
 151     # ios has HLS live streams
 152     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 153     'ios': {
 154         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 155         'INNERTUBE_CONTEXT': {
 156             'client': {
 157                 'clientName': 'IOS',
 158                 'clientVersion': '16.20',
 159             }
 160         },
 161         'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 162     },
 163     'ios_embedded': {
 164         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 165         'INNERTUBE_CONTEXT': {
 166             'client': {
 167                 'clientName': 'IOS_MESSAGES_EXTENSION',
 168                 'clientVersion': '16.20',
 169             },
 170         },
 171         'INNERTUBE_CONTEXT_CLIENT_NAME': 66
 172     },
 173     'ios_music': {
 174         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 175         'INNERTUBE_HOST': 'music.youtube.com',
 176         'INNERTUBE_CONTEXT': {
 177             'client': {
 178                 'clientName': 'IOS_MUSIC',
 179                 'clientVersion': '4.32',
 180             },
 181         },
 182         'INNERTUBE_CONTEXT_CLIENT_NAME': 26
 183     },
 184     'ios_creator': {
 185         'INNERTUBE_CONTEXT': {
 186             'client': {
 187                 'clientName': 'IOS_CREATOR',
 188                 'clientVersion': '21.24.100',
 189             },
 190         },
 191         'INNERTUBE_CONTEXT_CLIENT_NAME': 15
 192     },
 193     # mweb has 'ultralow' formats
 194     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 195     'mweb': {
 196         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 197         'INNERTUBE_CONTEXT': {
 198             'client': {
 199                 'clientName': 'MWEB',
 200                 'clientVersion': '2.20210721.07.00',
 201             }
 202         },
 203         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 204     },
 205 }
 206
 207
 208 def build_innertube_clients():
 209     third_party = {
 210         'embedUrl': 'https://google.com',  # Can be any valid URL
 211     }
 212     base_clients = ('android', 'web', 'ios', 'mweb')
 213     priority = qualities(base_clients[::-1])
 214
 215     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 216         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 217         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 218         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 219         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 220
 221         if client in base_clients:
 222             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 223             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 224             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 225             agegate_ytcfg['priority'] -= 1
 226         elif client.endswith('_embedded'):
 227             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 228             ytcfg['priority'] -= 2
 229         else:
 230             ytcfg['priority'] -= 3
 231
 232
 233 build_innertube_clients()
 234
 235
 236 class YoutubeBaseInfoExtractor(InfoExtractor):
 237     """Provide base functions for Youtube extractors"""
 238
 239     _RESERVED_NAMES = (
 240         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
 241         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 242         r'browse|oembed|get_video_info|iframe_api|s/player|'
 243         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 244
 245     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 246
 247     _NETRC_MACHINE = 'youtube'
 248
 249     # If True it will raise an error if no login info is provided
 250     _LOGIN_REQUIRED = False
 251
 252     r'''  # Unused since login is broken
 253     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 254     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
 255
 256     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
 257     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
 258     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 259     '''
 260
 261     def _login(self):
 262         """
 263         Attempt to log in to YouTube.
 264         True is returned if successful or skipped.
 265         False is returned if login failed.
 266
 267         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 268         """
 269
 270         def warn(message):
 271             self.report_warning(message)
 272
 273         # username+password login is broken
 274         if (self._LOGIN_REQUIRED
 275                 and self.get_param('cookiefile') is None
 276                 and self.get_param('cookiesfrombrowser') is None):
 277             self.raise_login_required(
 278                 'Login details are needed to download this content', method='cookies')
 279         username, password = self._get_login_info()
 280         if username:
 281             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 282         return
 283
 284         # Everything below this is broken!
 285         r'''
 286         # No authentication to be performed
 287         if username is None:
 288             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 289                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 290             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 291             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 292             return True
 293
 294         login_page = self._download_webpage(
 295             self._LOGIN_URL, None,
 296             note='Downloading login page',
 297             errnote='unable to fetch login page', fatal=False)
 298         if login_page is False:
 299             return
 300
 301         login_form = self._hidden_inputs(login_page)
 302
 303         def req(url, f_req, note, errnote):
 304             data = login_form.copy()
 305             data.update({
 306                 'pstMsg': 1,
 307                 'checkConnection': 'youtube',
 308                 'checkedDomains': 'youtube',
 309                 'hl': 'en',
 310                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 311                 'f.req': json.dumps(f_req),
 312                 'flowName': 'GlifWebSignIn',
 313                 'flowEntry': 'ServiceLogin',
 314                 # TODO: reverse actual botguard identifier generation algo
 315                 'bgRequest': '["identifier",""]',
 316             })
 317             return self._download_json(
 318                 url, None, note=note, errnote=errnote,
 319                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 320                 fatal=False,
 321                 data=urlencode_postdata(data), headers={
 322                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 323                     'Google-Accounts-XSRF': 1,
 324                 })
 325
 326         lookup_req = [
 327             username,
 328             None, [], None, 'US', None, None, 2, False, True,
 329             [
 330                 None, None,
 331                 [2, 1, None, 1,
 332                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 333                  None, [], 4],
 334                 1, [None, None, []], None, None, None, True
 335             ],
 336             username,
 337         ]
 338
 339         lookup_results = req(
 340             self._LOOKUP_URL, lookup_req,
 341             'Looking up account info', 'Unable to look up account info')
 342
 343         if lookup_results is False:
 344             return False
 345
 346         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 347         if not user_hash:
 348             warn('Unable to extract user hash')
 349             return False
 350
 351         challenge_req = [
 352             user_hash,
 353             None, 1, None, [1, None, None, None, [password, None, True]],
 354             [
 355                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 356                 1, [None, None, []], None, None, None, True
 357             ]]
 358
 359         challenge_results = req(
 360             self._CHALLENGE_URL, challenge_req,
 361             'Logging in', 'Unable to log in')
 362
 363         if challenge_results is False:
 364             return
 365
 366         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 367         if login_res:
 368             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 369             warn(
 370                 'Unable to login: %s' % 'Invalid password'
 371                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 372             return False
 373
 374         res = try_get(challenge_results, lambda x: x[0][-1], list)
 375         if not res:
 376             warn('Unable to extract result entry')
 377             return False
 378
 379         login_challenge = try_get(res, lambda x: x[0][0], list)
 380         if login_challenge:
 381             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 382             if challenge_str == 'TWO_STEP_VERIFICATION':
 383                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 384                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 385                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 386                 if status == 'QUOTA_EXCEEDED':
 387                     warn('Exceeded the limit of TFA codes, try later')
 388                     return False
 389
 390                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 391                 if not tl:
 392                     warn('Unable to extract TL')
 393                     return False
 394
 395                 tfa_code = self._get_tfa_info('2-step verification code')
 396
 397                 if not tfa_code:
 398                     warn(
 399                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 400                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 401                     return False
 402
 403                 tfa_code = remove_start(tfa_code, 'G-')
 404
 405                 tfa_req = [
 406                     user_hash, None, 2, None,
 407                     [
 408                         9, None, None, None, None, None, None, None,
 409                         [None, tfa_code, True, 2]
 410                     ]]
 411
 412                 tfa_results = req(
 413                     self._TFA_URL.format(tl), tfa_req,
 414                     'Submitting TFA code', 'Unable to submit TFA code')
 415
 416                 if tfa_results is False:
 417                     return False
 418
 419                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 420                 if tfa_res:
 421                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 422                     warn(
 423                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 424                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 425                     return False
 426
 427                 check_cookie_url = try_get(
 428                     tfa_results, lambda x: x[0][-1][2], compat_str)
 429             else:
 430                 CHALLENGES = {
 431                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 432                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 433                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 434                 }
 435                 challenge = CHALLENGES.get(
 436                     challenge_str,
 437                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 438                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 439                 return False
 440         else:
 441             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 442
 443         if not check_cookie_url:
 444             warn('Unable to extract CheckCookie URL')
 445             return False
 446
 447         check_cookie_results = self._download_webpage(
 448             check_cookie_url, None, 'Checking cookie', fatal=False)
 449
 450         if check_cookie_results is False:
 451             return False
 452
 453         if 'https://myaccount.google.com/' not in check_cookie_results:
 454             warn('Unable to log in')
 455             return False
 456
 457         return True
 458         '''
 459
 460     def _initialize_consent(self):
 461         cookies = self._get_cookies('https://www.youtube.com/')
 462         if cookies.get('__Secure-3PSID'):
 463             return
 464         consent_id = None
 465         consent = cookies.get('CONSENT')
 466         if consent:
 467             if 'YES' in consent.value:
 468                 return
 469             consent_id = self._search_regex(
 470                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 471         if not consent_id:
 472             consent_id = random.randint(100, 999)
 473         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 474
 475     def _real_initialize(self):
 476         self._initialize_consent()
 477         if self._downloader is None:
 478             return
 479         if not self._login():
 480             return
 481
 482     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 483     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 484     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 485
 486     def _get_default_ytcfg(self, client='web'):
 487         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 488
 489     def _get_innertube_host(self, client='web'):
 490         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 491
 492     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 493         # try_get but with fallback to default ytcfg client values when present
 494         _func = lambda y: try_get(y, getter, expected_type)
 495         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 496
 497     def _extract_client_name(self, ytcfg, default_client='web'):
 498         return self._ytcfg_get_safe(
 499             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 500                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 501
 502     @staticmethod
 503     def _extract_session_index(*data):
 504         for ytcfg in data:
 505             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 506             if session_index is not None:
 507                 return session_index
 508
 509     def _extract_client_version(self, ytcfg, default_client='web'):
 510         return self._ytcfg_get_safe(
 511             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 512                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 513
 514     def _extract_api_key(self, ytcfg=None, default_client='web'):
 515         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 516
 517     def _extract_context(self, ytcfg=None, default_client='web'):
 518         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 519         context = _get_context(ytcfg)
 520         if context:
 521             return context
 522
 523         context = _get_context(self._get_default_ytcfg(default_client))
 524         if not ytcfg:
 525             return context
 526
 527         # Recreate the client context (required)
 528         context['client'].update({
 529             'clientVersion': self._extract_client_version(ytcfg, default_client),
 530             'clientName': self._extract_client_name(ytcfg, default_client),
 531         })
 532         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 533         if visitor_data:
 534             context['client']['visitorData'] = visitor_data
 535         return context
 536
 537     _SAPISID = None
 538
 539     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 540         time_now = round(time.time())
 541         if self._SAPISID is None:
 542             yt_cookies = self._get_cookies('https://www.youtube.com')
 543             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 544             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 545             sapisid_cookie = dict_get(
 546                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 547             if sapisid_cookie and sapisid_cookie.value:
 548                 self._SAPISID = sapisid_cookie.value
 549                 self.write_debug('Extracted SAPISID cookie')
 550                 # SAPISID cookie is required if not already present
 551                 if not yt_cookies.get('SAPISID'):
 552                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 553                     self._set_cookie(
 554                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 555             else:
 556                 self._SAPISID = False
 557         if not self._SAPISID:
 558             return None
 559         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 560         sapisidhash = hashlib.sha1(
 561             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 562         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 563
 564     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 565                   note='Downloading API JSON', errnote='Unable to download API page',
 566                   context=None, api_key=None, api_hostname=None, default_client='web'):
 567
 568         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 569         data.update(query)
 570         real_headers = self.generate_api_headers(default_client=default_client)
 571         real_headers.update({'content-type': 'application/json'})
 572         if headers:
 573             real_headers.update(headers)
 574         return self._download_json(
 575             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 576             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 577             data=json.dumps(data).encode('utf8'), headers=real_headers,
 578             query={'key': api_key or self._extract_api_key()})
 579
 580     def extract_yt_initial_data(self, video_id, webpage):
 581         return self._parse_json(
 582             self._search_regex(
 583                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 584                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 585             video_id)
 586
 587     def _extract_identity_token(self, webpage, item_id):
 588         if not webpage:
 589             return None
 590         ytcfg = self.extract_ytcfg(item_id, webpage)
 591         if ytcfg:
 592             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 593             if token:
 594                 return token
 595         return self._search_regex(
 596             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 597             'identity token', default=None)
 598
 599     @staticmethod
 600     def _extract_account_syncid(*args):
 601         """
 602         Extract syncId required to download private playlists of secondary channels
 603         @params response and/or ytcfg
 604         """
 605         for data in args:
 606             # ytcfg includes channel_syncid if on secondary channel
 607             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 608             if delegated_sid:
 609                 return delegated_sid
 610             sync_ids = (try_get(
 611                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 612                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 613             if len(sync_ids) >= 2 and sync_ids[1]:
 614                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 615                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 616                 return sync_ids[0]
 617
 618     def extract_ytcfg(self, video_id, webpage):
 619         if not webpage:
 620             return {}
 621         return self._parse_json(
 622             self._search_regex(
 623                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 624                 default='{}'), video_id, fatal=False) or {}
 625
 626     def generate_api_headers(
 627             self, ytcfg=None, identity_token=None, account_syncid=None,
 628             visitor_data=None, api_hostname=None, default_client='web', session_index=None):
 629         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 630         headers = {
 631             'X-YouTube-Client-Name': compat_str(
 632                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 633             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 634             'Origin': origin
 635         }
 636         if not visitor_data and ytcfg:
 637             visitor_data = try_get(
 638                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 639         if identity_token:
 640             headers['X-Youtube-Identity-Token'] = identity_token
 641         if account_syncid:
 642             headers['X-Goog-PageId'] = account_syncid
 643         if session_index is None and ytcfg:
 644             session_index = self._extract_session_index(ytcfg)
 645         if account_syncid or session_index is not None:
 646             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 647         if visitor_data:
 648             headers['X-Goog-Visitor-Id'] = visitor_data
 649         auth = self._generate_sapisidhash_header(origin)
 650         if auth is not None:
 651             headers['Authorization'] = auth
 652             headers['X-Origin'] = origin
 653         return headers
 654
 655     @staticmethod
 656     def _build_api_continuation_query(continuation, ctp=None):
 657         query = {
 658             'continuation': continuation
 659         }
 660         # TODO: Inconsistency with clickTrackingParams.
 661         # Currently we have a fixed ctp contained within context (from ytcfg)
 662         # and a ctp in root query for continuation.
 663         if ctp:
 664             query['clickTracking'] = {'clickTrackingParams': ctp}
 665         return query
 666
 667     @classmethod
 668     def _extract_next_continuation_data(cls, renderer):
 669         next_continuation = try_get(
 670             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 671                        lambda x: x['continuation']['reloadContinuationData']), dict)
 672         if not next_continuation:
 673             return
 674         continuation = next_continuation.get('continuation')
 675         if not continuation:
 676             return
 677         ctp = next_continuation.get('clickTrackingParams')
 678         return cls._build_api_continuation_query(continuation, ctp)
 679
 680     @classmethod
 681     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 682         if isinstance(continuation_ep, dict):
 683             continuation = try_get(
 684                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 685             if not continuation:
 686                 return
 687             ctp = continuation_ep.get('clickTrackingParams')
 688             return cls._build_api_continuation_query(continuation, ctp)
 689
 690     @classmethod
 691     def _extract_continuation(cls, renderer):
 692         next_continuation = cls._extract_next_continuation_data(renderer)
 693         if next_continuation:
 694             return next_continuation
 695
 696         contents = []
 697         for key in ('contents', 'items'):
 698             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 699
 700         for content in contents:
 701             if not isinstance(content, dict):
 702                 continue
 703             continuation_ep = try_get(
 704                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 705                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 706                 dict)
 707             continuation = cls._extract_continuation_ep_data(continuation_ep)
 708             if continuation:
 709                 return continuation
 710
 711     @classmethod
 712     def _extract_alerts(cls, data):
 713         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 714             if not isinstance(alert_dict, dict):
 715                 continue
 716             for alert in alert_dict.values():
 717                 alert_type = alert.get('type')
 718                 if not alert_type:
 719                     continue
 720                 message = cls._get_text(alert, 'text')
 721                 if message:
 722                     yield alert_type, message
 723
 724     def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
 725         errors = []
 726         warnings = []
 727         for alert_type, alert_message in alerts:
 728             if alert_type.lower() == 'error' and fatal:
 729                 errors.append([alert_type, alert_message])
 730             else:
 731                 warnings.append([alert_type, alert_message])
 732
 733         for alert_type, alert_message in (warnings + errors[:-1]):
 734             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
 735         if errors:
 736             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 737
 738     def _extract_and_report_alerts(self, data, *args, **kwargs):
 739         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 740
 741     def _extract_badges(self, renderer: dict):
 742         badges = set()
 743         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 744             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 745             if label:
 746                 badges.add(label.lower())
 747         return badges
 748
 749     @staticmethod
 750     def _get_text(data, *path_list, max_runs=None):
 751         for path in path_list or [None]:
 752             if path is None:
 753                 obj = [data]
 754             else:
 755                 obj = traverse_obj(data, path, default=[])
 756                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 757                     obj = [obj]
 758             for item in obj:
 759                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 760                 if text:
 761                     return text
 762                 runs = try_get(item, lambda x: x['runs'], list) or []
 763                 if not runs and isinstance(item, list):
 764                     runs = item
 765
 766                 runs = runs[:min(len(runs), max_runs or len(runs))]
 767                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 768                 if text:
 769                     return text
 770
 771     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 772                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 773                           default_client='web'):
 774         response = None
 775         last_error = None
 776         count = -1
 777         retries = self.get_param('extractor_retries', 3)
 778         if check_get_keys is None:
 779             check_get_keys = []
 780         while count < retries:
 781             count += 1
 782             if last_error:
 783                 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
 784             try:
 785                 response = self._call_api(
 786                     ep=ep, fatal=True, headers=headers,
 787                     video_id=item_id, query=query,
 788                     context=self._extract_context(ytcfg, default_client),
 789                     api_key=self._extract_api_key(ytcfg, default_client),
 790                     api_hostname=api_hostname, default_client=default_client,
 791                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 792             except ExtractorError as e:
 793                 if isinstance(e.cause, network_exceptions):
 794                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 795                         e.cause.seek(0)
 796                         yt_error = try_get(
 797                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 798                             lambda x: x['error']['message'], compat_str)
 799                         if yt_error:
 800                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 801                     # Downloading page may result in intermittent 5xx HTTP error
 802                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 803                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 804                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 805                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 806                         last_error = error_to_compat_str(e.cause or e.msg)
 807                         if count < retries:
 808                             continue
 809                 if fatal:
 810                     raise
 811                 else:
 812                     self.report_warning(error_to_compat_str(e))
 813                     return
 814
 815             else:
 816                 # Youtube may send alerts if there was an issue with the continuation page
 817                 try:
 818                     self._extract_and_report_alerts(response, expected=False, only_once=True)
 819                 except ExtractorError as e:
 820                     # YouTube servers may return errors we want to retry on in a 200 OK response
 821                     # See: https://github.com/yt-dlp/yt-dlp/issues/839
 822                     if 'unknown error' in e.msg.lower():
 823                         last_error = e.msg
 824                         continue
 825                     if fatal:
 826                         raise
 827                     self.report_warning(error_to_compat_str(e))
 828                     return
 829                 if not check_get_keys or dict_get(response, check_get_keys):
 830                     break
 831                 # Youtube sometimes sends incomplete data
 832                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 833                 last_error = 'Incomplete data received'
 834                 if count >= retries:
 835                     if fatal:
 836                         raise ExtractorError(last_error)
 837                     else:
 838                         self.report_warning(last_error)
 839                         return
 840         return response
 841
 842     @staticmethod
 843     def is_music_url(url):
 844         return re.match(r'https?://music\.youtube\.com/', url) is not None
 845
 846     def _extract_video(self, renderer):
 847         video_id = renderer.get('videoId')
 848         title = self._get_text(renderer, 'title')
 849         description = self._get_text(renderer, 'descriptionSnippet')
 850         duration = parse_duration(self._get_text(
 851             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 852         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 853         view_count = str_to_int(self._search_regex(
 854             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 855             'view count', default=None))
 856
 857         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 858
 859         return {
 860             '_type': 'url',
 861             'ie_key': YoutubeIE.ie_key(),
 862             'id': video_id,
 863             'url': video_id,
 864             'title': title,
 865             'description': description,
 866             'duration': duration,
 867             'view_count': view_count,
 868             'uploader': uploader,
 869         }
 870
 871
 872 class YoutubeIE(YoutubeBaseInfoExtractor):
 873     IE_DESC = 'YouTube.com'
 874     _INVIDIOUS_SITES = (
 875         # invidious-redirect websites
 876         r'(?:www\.)?redirect\.invidious\.io',
 877         r'(?:(?:www|dev)\.)?invidio\.us',
 878         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 879         r'(?:www\.)?invidious\.pussthecat\.org',
 880         r'(?:www\.)?invidious\.zee\.li',
 881         r'(?:www\.)?invidious\.ethibox\.fr',
 882         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 883         # youtube-dl invidious instances list
 884         r'(?:(?:www|no)\.)?invidiou\.sh',
 885         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 886         r'(?:www\.)?invidious\.kabi\.tk',
 887         r'(?:www\.)?invidious\.mastodon\.host',
 888         r'(?:www\.)?invidious\.zapashcanon\.fr',
 889         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 890         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 891         r'(?:www\.)?invidious\.himiko\.cloud',
 892         r'(?:www\.)?invidious\.reallyancient\.tech',
 893         r'(?:www\.)?invidious\.tube',
 894         r'(?:www\.)?invidiou\.site',
 895         r'(?:www\.)?invidious\.site',
 896         r'(?:www\.)?invidious\.xyz',
 897         r'(?:www\.)?invidious\.nixnet\.xyz',
 898         r'(?:www\.)?invidious\.048596\.xyz',
 899         r'(?:www\.)?invidious\.drycat\.fr',
 900         r'(?:www\.)?inv\.skyn3t\.in',
 901         r'(?:www\.)?tube\.poal\.co',
 902         r'(?:www\.)?tube\.connect\.cafe',
 903         r'(?:www\.)?vid\.wxzm\.sx',
 904         r'(?:www\.)?vid\.mint\.lgbt',
 905         r'(?:www\.)?vid\.puffyan\.us',
 906         r'(?:www\.)?yewtu\.be',
 907         r'(?:www\.)?yt\.elukerio\.org',
 908         r'(?:www\.)?yt\.lelux\.fi',
 909         r'(?:www\.)?invidious\.ggc-project\.de',
 910         r'(?:www\.)?yt\.maisputain\.ovh',
 911         r'(?:www\.)?ytprivate\.com',
 912         r'(?:www\.)?invidious\.13ad\.de',
 913         r'(?:www\.)?invidious\.toot\.koeln',
 914         r'(?:www\.)?invidious\.fdn\.fr',
 915         r'(?:www\.)?watch\.nettohikari\.com',
 916         r'(?:www\.)?invidious\.namazso\.eu',
 917         r'(?:www\.)?invidious\.silkky\.cloud',
 918         r'(?:www\.)?invidious\.exonip\.de',
 919         r'(?:www\.)?invidious\.riverside\.rocks',
 920         r'(?:www\.)?invidious\.blamefran\.net',
 921         r'(?:www\.)?invidious\.moomoo\.de',
 922         r'(?:www\.)?ytb\.trom\.tf',
 923         r'(?:www\.)?yt\.cyberhost\.uk',
 924         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 925         r'(?:www\.)?qklhadlycap4cnod\.onion',
 926         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 927         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 928         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 929         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 930         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 931         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 932         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 933         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 934         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 935         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 936     )
 937     _VALID_URL = r"""(?x)^
 938                      (
 939                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 940                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 941                             (?:www\.)?deturl\.com/www\.youtube\.com|
 942                             (?:www\.)?pwnyoutube\.com|
 943                             (?:www\.)?hooktube\.com|
 944                             (?:www\.)?yourepeat\.com|
 945                             tube\.majestyc\.net|
 946                             %(invidious)s|
 947                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 948                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 949                          (?:                                                  # the various things that can precede the ID:
 950                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 951                              |(?:                                             # or the v= param in all its forms
 952                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 953                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 954                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 955                                  v=
 956                              )
 957                          ))
 958                          |(?:
 959                             youtu\.be|                                        # just youtu.be/xxxx
 960                             vid\.plus|                                        # or vid.plus/xxxx
 961                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 962                             %(invidious)s
 963                          )/
 964                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 965                          )
 966                      )?                                                       # all until now is optional -> you can pass the naked ID
 967                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 968                      (?(1).+)?                                                # if we found the ID, everything can follow
 969                      (?:\#|$)""" % {
 970         'invidious': '|'.join(_INVIDIOUS_SITES),
 971     }
 972     _PLAYER_INFO_RE = (
 973         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 974         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 975         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 976     )
 977     _formats = {
 978         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 979         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 980         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 981         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 982         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 983         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 984         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 985         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 986         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 987         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 988         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 989         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 990         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 991         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 992         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 993         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 994         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 995         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 996
 997
 998         # 3D videos
 999         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1000         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
1001         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1002         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
1003         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
1004         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1005         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
1006
1007         # Apple HTTP Live Streaming
1008         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1009         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1010         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1011         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
1012         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1013         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
1014         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
1015         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
1016
1017         # DASH mp4 video
1018         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1019         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1020         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1021         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1022         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1023         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1024         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1025         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1026         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1027         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1028         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1029         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1030
1031         # Dash mp4 audio
1032         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1033         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1034         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1035         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1036         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1037         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1038         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1039
1040         # Dash webm
1041         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1042         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1043         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1044         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1045         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1046         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1047         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1048         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1049         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1050         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1051         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1052         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1053         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1054         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1055         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1056         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1057         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1058         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1059         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1060         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1061         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1062         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1063
1064         # Dash webm audio
1065         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1066         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1067
1068         # Dash webm audio with opus inside
1069         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1070         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1071         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1072
1073         # RTMP (unnamed)
1074         '_rtmp': {'protocol': 'rtmp'},
1075
1076         # av01 video only formats sometimes served with "unknown" codecs
1077         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1078         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
1079         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
1080         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
1081         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
1082         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
1083         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1084         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
1085     }
1086     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1087
1088     _GEO_BYPASS = False
1089
1090     IE_NAME = 'youtube'
1091     _TESTS = [
1092         {
1093             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1094             'info_dict': {
1095                 'id': 'BaW_jenozKc',
1096                 'ext': 'mp4',
1097                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1098                 'uploader': 'Philipp Hagemeister',
1099                 'uploader_id': 'phihag',
1100                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1101                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1102                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1103                 'upload_date': '20121002',
1104                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1105                 'categories': ['Science & Technology'],
1106                 'tags': ['youtube-dl'],
1107                 'duration': 10,
1108                 'view_count': int,
1109                 'like_count': int,
1110                 'dislike_count': int,
1111                 'start_time': 1,
1112                 'end_time': 9,
1113             }
1114         },
1115         {
1116             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1117             'note': 'Embed-only video (#1746)',
1118             'info_dict': {
1119                 'id': 'yZIXLfi8CZQ',
1120                 'ext': 'mp4',
1121                 'upload_date': '20120608',
1122                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1123                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1124                 'uploader': 'SET India',
1125                 'uploader_id': 'setindia',
1126                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1127                 'age_limit': 18,
1128             },
1129             'skip': 'Private video',
1130         },
1131         {
1132             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1133             'note': 'Use the first video ID in the URL',
1134             'info_dict': {
1135                 'id': 'BaW_jenozKc',
1136                 'ext': 'mp4',
1137                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1138                 'uploader': 'Philipp Hagemeister',
1139                 'uploader_id': 'phihag',
1140                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1141                 'upload_date': '20121002',
1142                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1143                 'categories': ['Science & Technology'],
1144                 'tags': ['youtube-dl'],
1145                 'duration': 10,
1146                 'view_count': int,
1147                 'like_count': int,
1148                 'dislike_count': int,
1149             },
1150             'params': {
1151                 'skip_download': True,
1152             },
1153         },
1154         {
1155             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1156             'note': '256k DASH audio (format 141) via DASH manifest',
1157             'info_dict': {
1158                 'id': 'a9LDPn-MO4I',
1159                 'ext': 'm4a',
1160                 'upload_date': '20121002',
1161                 'uploader_id': '8KVIDEO',
1162                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1163                 'description': '',
1164                 'uploader': '8KVIDEO',
1165                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1166             },
1167             'params': {
1168                 'youtube_include_dash_manifest': True,
1169                 'format': '141',
1170             },
1171             'skip': 'format 141 not served anymore',
1172         },
1173         # DASH manifest with encrypted signature
1174         {
1175             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1176             'info_dict': {
1177                 'id': 'IB3lcPjvWLA',
1178                 'ext': 'm4a',
1179                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1180                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1181                 'duration': 244,
1182                 'uploader': 'AfrojackVEVO',
1183                 'uploader_id': 'AfrojackVEVO',
1184                 'upload_date': '20131011',
1185                 'abr': 129.495,
1186             },
1187             'params': {
1188                 'youtube_include_dash_manifest': True,
1189                 'format': '141/bestaudio[ext=m4a]',
1190             },
1191         },
1192         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1193         {
1194             'note': 'Embed allowed age-gate video',
1195             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1196             'info_dict': {
1197                 'id': 'HtVdAasjOgU',
1198                 'ext': 'mp4',
1199                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1200                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1201                 'duration': 142,
1202                 'uploader': 'The Witcher',
1203                 'uploader_id': 'WitcherGame',
1204                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1205                 'upload_date': '20140605',
1206                 'age_limit': 18,
1207             },
1208         },
1209         {
1210             'note': 'Age-gate video with embed allowed in public site',
1211             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1212             'info_dict': {
1213                 'id': 'HsUATh_Nc2U',
1214                 'ext': 'mp4',
1215                 'title': 'Godzilla 2 (Official Video)',
1216                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1217                 'upload_date': '20200408',
1218                 'uploader_id': 'FlyingKitty900',
1219                 'uploader': 'FlyingKitty',
1220                 'age_limit': 18,
1221             },
1222         },
1223         {
1224             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1225             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1226             'info_dict': {
1227                 'id': 'Tq92D6wQ1mg',
1228                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1229                 'ext': 'mp4',
1230                 'upload_date': '20191227',
1231                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1232                 'uploader': 'Projekt Melody',
1233                 'description': 'md5:17eccca93a786d51bc67646756894066',
1234                 'age_limit': 18,
1235             },
1236         },
1237         {
1238             'note': 'Non-Agegated non-embeddable video',
1239             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1240             'info_dict': {
1241                 'id': 'MeJVWBSsPAY',
1242                 'ext': 'mp4',
1243                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1244                 'uploader': 'Herr Lurik',
1245                 'uploader_id': 'st3in234',
1246                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1247                 'upload_date': '20130730',
1248             },
1249         },
1250         {
1251             'note': 'Non-bypassable age-gated video',
1252             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1253             'only_matching': True,
1254         },
1255         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1256         # YouTube Red ad is not captured for creator
1257         {
1258             'url': '__2ABJjxzNo',
1259             'info_dict': {
1260                 'id': '__2ABJjxzNo',
1261                 'ext': 'mp4',
1262                 'duration': 266,
1263                 'upload_date': '20100430',
1264                 'uploader_id': 'deadmau5',
1265                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1266                 'creator': 'deadmau5',
1267                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1268                 'uploader': 'deadmau5',
1269                 'title': 'Deadmau5 - Some Chords (HD)',
1270                 'alt_title': 'Some Chords',
1271             },
1272             'expected_warnings': [
1273                 'DASH manifest missing',
1274             ]
1275         },
1276         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1277         {
1278             'url': 'lqQg6PlCWgI',
1279             'info_dict': {
1280                 'id': 'lqQg6PlCWgI',
1281                 'ext': 'mp4',
1282                 'duration': 6085,
1283                 'upload_date': '20150827',
1284                 'uploader_id': 'olympic',
1285                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1286                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1287                 'uploader': 'Olympics',
1288                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1289             },
1290             'params': {
1291                 'skip_download': 'requires avconv',
1292             }
1293         },
1294         # Non-square pixels
1295         {
1296             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1297             'info_dict': {
1298                 'id': '_b-2C3KPAM0',
1299                 'ext': 'mp4',
1300                 'stretched_ratio': 16 / 9.,
1301                 'duration': 85,
1302                 'upload_date': '20110310',
1303                 'uploader_id': 'AllenMeow',
1304                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1305                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1306                 'uploader': '孫ᄋᄅ',
1307                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1308             },
1309         },
1310         # url_encoded_fmt_stream_map is empty string
1311         {
1312             'url': 'qEJwOuvDf7I',
1313             'info_dict': {
1314                 'id': 'qEJwOuvDf7I',
1315                 'ext': 'webm',
1316                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1317                 'description': '',
1318                 'upload_date': '20150404',
1319                 'uploader_id': 'spbelect',
1320                 'uploader': 'Наблюдатели Петербурга',
1321             },
1322             'params': {
1323                 'skip_download': 'requires avconv',
1324             },
1325             'skip': 'This live event has ended.',
1326         },
1327         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1328         {
1329             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1330             'info_dict': {
1331                 'id': 'FIl7x6_3R5Y',
1332                 'ext': 'webm',
1333                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1334                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1335                 'duration': 220,
1336                 'upload_date': '20150625',
1337                 'uploader_id': 'dorappi2000',
1338                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1339                 'uploader': 'dorappi2000',
1340                 'formats': 'mincount:31',
1341             },
1342             'skip': 'not actual anymore',
1343         },
1344         # DASH manifest with segment_list
1345         {
1346             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1347             'md5': '8ce563a1d667b599d21064e982ab9e31',
1348             'info_dict': {
1349                 'id': 'CsmdDsKjzN8',
1350                 'ext': 'mp4',
1351                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1352                 'uploader': 'Airtek',
1353                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1354                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1355                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1356             },
1357             'params': {
1358                 'youtube_include_dash_manifest': True,
1359                 'format': '135',  # bestvideo
1360             },
1361             'skip': 'This live event has ended.',
1362         },
1363         {
1364             # Multifeed videos (multiple cameras), URL is for Main Camera
1365             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1366             'info_dict': {
1367                 'id': 'jvGDaLqkpTg',
1368                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1369                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1370             },
1371             'playlist': [{
1372                 'info_dict': {
1373                     'id': 'jvGDaLqkpTg',
1374                     'ext': 'mp4',
1375                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1376                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1377                     'duration': 10643,
1378                     'upload_date': '20161111',
1379                     'uploader': 'Team PGP',
1380                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1381                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1382                 },
1383             }, {
1384                 'info_dict': {
1385                     'id': '3AKt1R1aDnw',
1386                     'ext': 'mp4',
1387                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1388                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1389                     'duration': 10991,
1390                     'upload_date': '20161111',
1391                     'uploader': 'Team PGP',
1392                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1393                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1394                 },
1395             }, {
1396                 'info_dict': {
1397                     'id': 'RtAMM00gpVc',
1398                     'ext': 'mp4',
1399                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1400                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1401                     'duration': 10995,
1402                     'upload_date': '20161111',
1403                     'uploader': 'Team PGP',
1404                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1405                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1406                 },
1407             }, {
1408                 'info_dict': {
1409                     'id': '6N2fdlP3C5U',
1410                     'ext': 'mp4',
1411                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1412                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1413                     'duration': 10990,
1414                     'upload_date': '20161111',
1415                     'uploader': 'Team PGP',
1416                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1417                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1418                 },
1419             }],
1420             'params': {
1421                 'skip_download': True,
1422             },
1423             'skip': 'Not multifeed anymore',
1424         },
1425         {
1426             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1427             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1428             'info_dict': {
1429                 'id': 'gVfLd0zydlo',
1430                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1431             },
1432             'playlist_count': 2,
1433             'skip': 'Not multifeed anymore',
1434         },
1435         {
1436             'url': 'https://vid.plus/FlRa-iH7PGw',
1437             'only_matching': True,
1438         },
1439         {
1440             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1441             'only_matching': True,
1442         },
1443         {
1444             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1445             # Also tests cut-off URL expansion in video description (see
1446             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1447             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1448             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1449             'info_dict': {
1450                 'id': 'lsguqyKfVQg',
1451                 'ext': 'mp4',
1452                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1453                 'alt_title': 'Dark Walk',
1454                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1455                 'duration': 133,
1456                 'upload_date': '20151119',
1457                 'uploader_id': 'IronSoulElf',
1458                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1459                 'uploader': 'IronSoulElf',
1460                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1461                 'track': 'Dark Walk',
1462                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1463                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1464             },
1465             'params': {
1466                 'skip_download': True,
1467             },
1468         },
1469         {
1470             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1471             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1472             'only_matching': True,
1473         },
1474         {
1475             # Video with yt:stretch=17:0
1476             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1477             'info_dict': {
1478                 'id': 'Q39EVAstoRM',
1479                 'ext': 'mp4',
1480                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1481                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1482                 'upload_date': '20151107',
1483                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1484                 'uploader': 'CH GAMER DROID',
1485             },
1486             'params': {
1487                 'skip_download': True,
1488             },
1489             'skip': 'This video does not exist.',
1490         },
1491         {
1492             # Video with incomplete 'yt:stretch=16:'
1493             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1494             'only_matching': True,
1495         },
1496         {
1497             # Video licensed under Creative Commons
1498             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1499             'info_dict': {
1500                 'id': 'M4gD1WSo5mA',
1501                 'ext': 'mp4',
1502                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1503                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1504                 'duration': 721,
1505                 'upload_date': '20150127',
1506                 'uploader_id': 'BerkmanCenter',
1507                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1508                 'uploader': 'The Berkman Klein Center for Internet & Society',
1509                 'license': 'Creative Commons Attribution license (reuse allowed)',
1510             },
1511             'params': {
1512                 'skip_download': True,
1513             },
1514         },
1515         {
1516             # Channel-like uploader_url
1517             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1518             'info_dict': {
1519                 'id': 'eQcmzGIKrzg',
1520                 'ext': 'mp4',
1521                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1522                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1523                 'duration': 4060,
1524                 'upload_date': '20151119',
1525                 'uploader': 'Bernie Sanders',
1526                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1527                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1528                 'license': 'Creative Commons Attribution license (reuse allowed)',
1529             },
1530             'params': {
1531                 'skip_download': True,
1532             },
1533         },
1534         {
1535             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1536             'only_matching': True,
1537         },
1538         {
1539             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1540             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1541             'only_matching': True,
1542         },
1543         {
1544             # Rental video preview
1545             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1546             'info_dict': {
1547                 'id': 'uGpuVWrhIzE',
1548                 'ext': 'mp4',
1549                 'title': 'Piku - Trailer',
1550                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1551                 'upload_date': '20150811',
1552                 'uploader': 'FlixMatrix',
1553                 'uploader_id': 'FlixMatrixKaravan',
1554                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1555                 'license': 'Standard YouTube License',
1556             },
1557             'params': {
1558                 'skip_download': True,
1559             },
1560             'skip': 'This video is not available.',
1561         },
1562         {
1563             # YouTube Red video with episode data
1564             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1565             'info_dict': {
1566                 'id': 'iqKdEhx-dD4',
1567                 'ext': 'mp4',
1568                 'title': 'Isolation - Mind Field (Ep 1)',
1569                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1570                 'duration': 2085,
1571                 'upload_date': '20170118',
1572                 'uploader': 'Vsauce',
1573                 'uploader_id': 'Vsauce',
1574                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1575                 'series': 'Mind Field',
1576                 'season_number': 1,
1577                 'episode_number': 1,
1578             },
1579             'params': {
1580                 'skip_download': True,
1581             },
1582             'expected_warnings': [
1583                 'Skipping DASH manifest',
1584             ],
1585         },
1586         {
1587             # The following content has been identified by the YouTube community
1588             # as inappropriate or offensive to some audiences.
1589             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1590             'info_dict': {
1591                 'id': '6SJNVb0GnPI',
1592                 'ext': 'mp4',
1593                 'title': 'Race Differences in Intelligence',
1594                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1595                 'duration': 965,
1596                 'upload_date': '20140124',
1597                 'uploader': 'New Century Foundation',
1598                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1599                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1600             },
1601             'params': {
1602                 'skip_download': True,
1603             },
1604             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1605         },
1606         {
1607             # itag 212
1608             'url': '1t24XAntNCY',
1609             'only_matching': True,
1610         },
1611         {
1612             # geo restricted to JP
1613             'url': 'sJL6WA-aGkQ',
1614             'only_matching': True,
1615         },
1616         {
1617             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1618             'only_matching': True,
1619         },
1620         {
1621             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1622             'only_matching': True,
1623         },
1624         {
1625             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1626             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1627             'only_matching': True,
1628         },
1629         {
1630             # DRM protected
1631             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1632             'only_matching': True,
1633         },
1634         {
1635             # Video with unsupported adaptive stream type formats
1636             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1637             'info_dict': {
1638                 'id': 'Z4Vy8R84T1U',
1639                 'ext': 'mp4',
1640                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1641                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1642                 'duration': 433,
1643                 'upload_date': '20130923',
1644                 'uploader': 'Amelia Putri Harwita',
1645                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1646                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1647                 'formats': 'maxcount:10',
1648             },
1649             'params': {
1650                 'skip_download': True,
1651                 'youtube_include_dash_manifest': False,
1652             },
1653             'skip': 'not actual anymore',
1654         },
1655         {
1656             # Youtube Music Auto-generated description
1657             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1658             'info_dict': {
1659                 'id': 'MgNrAu2pzNs',
1660                 'ext': 'mp4',
1661                 'title': 'Voyeur Girl',
1662                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1663                 'upload_date': '20190312',
1664                 'uploader': 'Stephen - Topic',
1665                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1666                 'artist': 'Stephen',
1667                 'track': 'Voyeur Girl',
1668                 'album': 'it\'s too much love to know my dear',
1669                 'release_date': '20190313',
1670                 'release_year': 2019,
1671             },
1672             'params': {
1673                 'skip_download': True,
1674             },
1675         },
1676         {
1677             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1678             'only_matching': True,
1679         },
1680         {
1681             # invalid -> valid video id redirection
1682             'url': 'DJztXj2GPfl',
1683             'info_dict': {
1684                 'id': 'DJztXj2GPfk',
1685                 'ext': 'mp4',
1686                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1687                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1688                 'upload_date': '20090125',
1689                 'uploader': 'Prochorowka',
1690                 'uploader_id': 'Prochorowka',
1691                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1692                 'artist': 'Panjabi MC',
1693                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1694                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1695             },
1696             'params': {
1697                 'skip_download': True,
1698             },
1699             'skip': 'Video unavailable',
1700         },
1701         {
1702             # empty description results in an empty string
1703             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1704             'info_dict': {
1705                 'id': 'x41yOUIvK2k',
1706                 'ext': 'mp4',
1707                 'title': 'IMG 3456',
1708                 'description': '',
1709                 'upload_date': '20170613',
1710                 'uploader_id': 'ElevageOrVert',
1711                 'uploader': 'ElevageOrVert',
1712             },
1713             'params': {
1714                 'skip_download': True,
1715             },
1716         },
1717         {
1718             # with '};' inside yt initial data (see [1])
1719             # see [2] for an example with '};' inside ytInitialPlayerResponse
1720             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1721             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1722             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1723             'info_dict': {
1724                 'id': 'CHqg6qOn4no',
1725                 'ext': 'mp4',
1726                 'title': 'Part 77   Sort a list of simple types in c#',
1727                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1728                 'upload_date': '20130831',
1729                 'uploader_id': 'kudvenkat',
1730                 'uploader': 'kudvenkat',
1731             },
1732             'params': {
1733                 'skip_download': True,
1734             },
1735         },
1736         {
1737             # another example of '};' in ytInitialData
1738             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1739             'only_matching': True,
1740         },
1741         {
1742             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1743             'only_matching': True,
1744         },
1745         {
1746             # https://github.com/ytdl-org/youtube-dl/pull/28094
1747             'url': 'OtqTfy26tG0',
1748             'info_dict': {
1749                 'id': 'OtqTfy26tG0',
1750                 'ext': 'mp4',
1751                 'title': 'Burn Out',
1752                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1753                 'upload_date': '20141120',
1754                 'uploader': 'The Cinematic Orchestra - Topic',
1755                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1756                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1757                 'artist': 'The Cinematic Orchestra',
1758                 'track': 'Burn Out',
1759                 'album': 'Every Day',
1760                 'release_data': None,
1761                 'release_year': None,
1762             },
1763             'params': {
1764                 'skip_download': True,
1765             },
1766         },
1767         {
1768             # controversial video, only works with bpctr when authenticated with cookies
1769             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1770             'only_matching': True,
1771         },
1772         {
1773             # controversial video, requires bpctr/contentCheckOk
1774             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1775             'info_dict': {
1776                 'id': 'SZJvDhaSDnc',
1777                 'ext': 'mp4',
1778                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1779                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1780                 'uploader': 'CBS This Morning',
1781                 'uploader_id': 'CBSThisMorning',
1782                 'upload_date': '20140716',
1783                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1784             }
1785         },
1786         {
1787             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1788             'url': 'cBvYw8_A0vQ',
1789             'info_dict': {
1790                 'id': 'cBvYw8_A0vQ',
1791                 'ext': 'mp4',
1792                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1793                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1794                 'upload_date': '20201120',
1795                 'uploader': 'Walk around Japan',
1796                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1797                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1798             },
1799             'params': {
1800                 'skip_download': True,
1801             },
1802         }, {
1803             # Has multiple audio streams
1804             'url': 'WaOKSUlf4TM',
1805             'only_matching': True
1806         }, {
1807             # Requires Premium: has format 141 when requested using YTM url
1808             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1809             'only_matching': True
1810         }, {
1811             # multiple subtitles with same lang_code
1812             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1813             'only_matching': True,
1814         }, {
1815             # Force use android client fallback
1816             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1817             'info_dict': {
1818                 'id': 'YOelRv7fMxY',
1819                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1820                 'ext': '3gp',
1821                 'upload_date': '20210624',
1822                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1823                 'uploader': 'colinfurze',
1824                 'uploader_id': 'colinfurze',
1825                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1826                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1827             },
1828             'params': {
1829                 'format': '17',  # 3gp format available on android
1830                 'extractor_args': {'youtube': {'player_client': ['android']}},
1831             },
1832         },
1833         {
1834             # Skip download of additional client configs (remix client config in this case)
1835             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1836             'only_matching': True,
1837             'params': {
1838                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1839             },
1840         }, {
1841             # shorts
1842             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1843             'only_matching': True,
1844         },
1845     ]
1846
1847     @classmethod
1848     def suitable(cls, url):
1849         from ..utils import parse_qs
1850
1851         qs = parse_qs(url)
1852         if qs.get('list', [None])[0]:
1853             return False
1854         return super(YoutubeIE, cls).suitable(url)
1855
1856     def __init__(self, *args, **kwargs):
1857         super(YoutubeIE, self).__init__(*args, **kwargs)
1858         self._code_cache = {}
1859         self._player_cache = {}
1860
1861     def _extract_player_url(self, ytcfg=None, webpage=None):
1862         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1863         if not player_url and webpage:
1864             player_url = self._search_regex(
1865                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1866                 webpage, 'player URL', fatal=False)
1867         if not player_url:
1868             return None
1869         if player_url.startswith('//'):
1870             player_url = 'https:' + player_url
1871         elif not re.match(r'https?://', player_url):
1872             player_url = compat_urlparse.urljoin(
1873                 'https://www.youtube.com', player_url)
1874         return player_url
1875
1876     def _signature_cache_id(self, example_sig):
1877         """ Return a string representation of a signature """
1878         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1879
1880     @classmethod
1881     def _extract_player_info(cls, player_url):
1882         for player_re in cls._PLAYER_INFO_RE:
1883             id_m = re.search(player_re, player_url)
1884             if id_m:
1885                 break
1886         else:
1887             raise ExtractorError('Cannot identify player %r' % player_url)
1888         return id_m.group('id')
1889
1890     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1891         player_id = self._extract_player_info(player_url)
1892         if player_id not in self._code_cache:
1893             self._code_cache[player_id] = self._download_webpage(
1894                 player_url, video_id, fatal=fatal,
1895                 note='Downloading player ' + player_id,
1896                 errnote='Download of %s failed' % player_url)
1897         return player_id in self._code_cache
1898
1899     def _extract_signature_function(self, video_id, player_url, example_sig):
1900         player_id = self._extract_player_info(player_url)
1901
1902         # Read from filesystem cache
1903         func_id = 'js_%s_%s' % (
1904             player_id, self._signature_cache_id(example_sig))
1905         assert os.path.basename(func_id) == func_id
1906
1907         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1908         if cache_spec is not None:
1909             return lambda s: ''.join(s[i] for i in cache_spec)
1910
1911         if self._load_player(video_id, player_url):
1912             code = self._code_cache[player_id]
1913             res = self._parse_sig_js(code)
1914
1915             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1916             cache_res = res(test_string)
1917             cache_spec = [ord(c) for c in cache_res]
1918
1919             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1920             return res
1921
1922     def _print_sig_code(self, func, example_sig):
1923         def gen_sig_code(idxs):
1924             def _genslice(start, end, step):
1925                 starts = '' if start == 0 else str(start)
1926                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1927                 steps = '' if step == 1 else (':%d' % step)
1928                 return 's[%s%s%s]' % (starts, ends, steps)
1929
1930             step = None
1931             # Quelch pyflakes warnings - start will be set when step is set
1932             start = '(Never used)'
1933             for i, prev in zip(idxs[1:], idxs[:-1]):
1934                 if step is not None:
1935                     if i - prev == step:
1936                         continue
1937                     yield _genslice(start, prev, step)
1938                     step = None
1939                     continue
1940                 if i - prev in [-1, 1]:
1941                     step = i - prev
1942                     start = prev
1943                     continue
1944                 else:
1945                     yield 's[%d]' % prev
1946             if step is None:
1947                 yield 's[%d]' % i
1948             else:
1949                 yield _genslice(start, i, step)
1950
1951         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1952         cache_res = func(test_string)
1953         cache_spec = [ord(c) for c in cache_res]
1954         expr_code = ' + '.join(gen_sig_code(cache_spec))
1955         signature_id_tuple = '(%s)' % (
1956             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1957         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1958                 '    return %s\n') % (signature_id_tuple, expr_code)
1959         self.to_screen('Extracted signature function:\n' + code)
1960
1961     def _parse_sig_js(self, jscode):
1962         funcname = self._search_regex(
1963             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1964              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1965              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1966              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1967              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1968              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1969              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1970              # Obsolete patterns
1971              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1972              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1973              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1974              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1975              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1976              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1977              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1978              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1979             jscode, 'Initial JS player signature function name', group='sig')
1980
1981         jsi = JSInterpreter(jscode)
1982         initial_function = jsi.extract_function(funcname)
1983         return lambda s: initial_function([s])
1984
1985     def _decrypt_signature(self, s, video_id, player_url):
1986         """Turn the encrypted s field into a working signature"""
1987
1988         if player_url is None:
1989             raise ExtractorError('Cannot decrypt signature without player_url')
1990
1991         try:
1992             player_id = (player_url, self._signature_cache_id(s))
1993             if player_id not in self._player_cache:
1994                 func = self._extract_signature_function(
1995                     video_id, player_url, s
1996                 )
1997                 self._player_cache[player_id] = func
1998             func = self._player_cache[player_id]
1999             if self.get_param('youtube_print_sig_code'):
2000                 self._print_sig_code(func, s)
2001             return func(s)
2002         except Exception as e:
2003             tb = traceback.format_exc()
2004             raise ExtractorError(
2005                 'Signature extraction failed: ' + tb, cause=e)
2006
2007     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
2008         """
2009         Extract signatureTimestamp (sts)
2010         Required to tell API what sig/player version is in use.
2011         """
2012         sts = None
2013         if isinstance(ytcfg, dict):
2014             sts = int_or_none(ytcfg.get('STS'))
2015
2016         if not sts:
2017             # Attempt to extract from player
2018             if player_url is None:
2019                 error_msg = 'Cannot extract signature timestamp without player_url.'
2020                 if fatal:
2021                     raise ExtractorError(error_msg)
2022                 self.report_warning(error_msg)
2023                 return
2024             if self._load_player(video_id, player_url, fatal=fatal):
2025                 player_id = self._extract_player_info(player_url)
2026                 code = self._code_cache[player_id]
2027                 sts = int_or_none(self._search_regex(
2028                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2029                     'JS player signature timestamp', group='sts', fatal=fatal))
2030         return sts
2031
2032     def _mark_watched(self, video_id, player_responses):
2033         playback_url = traverse_obj(
2034             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2035             expected_type=url_or_none, get_all=False)
2036         if not playback_url:
2037             self.report_warning('Unable to mark watched')
2038             return
2039         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2040         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2041
2042         # cpn generation algorithm is reverse engineered from base.js.
2043         # In fact it works even with dummy cpn.
2044         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2045         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2046
2047         qs.update({
2048             'ver': ['2'],
2049             'cpn': [cpn],
2050         })
2051         playback_url = compat_urlparse.urlunparse(
2052             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2053
2054         self._download_webpage(
2055             playback_url, video_id, 'Marking watched',
2056             'Unable to mark watched', fatal=False)
2057
2058     @staticmethod
2059     def _extract_urls(webpage):
2060         # Embedded YouTube player
2061         entries = [
2062             unescapeHTML(mobj.group('url'))
2063             for mobj in re.finditer(r'''(?x)
2064             (?:
2065                 <iframe[^>]+?src=|
2066                 data-video-url=|
2067                 <embed[^>]+?src=|
2068                 embedSWF\(?:\s*|
2069                 <object[^>]+data=|
2070                 new\s+SWFObject\(
2071             )
2072             (["\'])
2073                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2074                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2075             \1''', webpage)]
2076
2077         # lazyYT YouTube embed
2078         entries.extend(list(map(
2079             unescapeHTML,
2080             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2081
2082         # Wordpress "YouTube Video Importer" plugin
2083         matches = re.findall(r'''(?x)<div[^>]+
2084             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2085             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2086         entries.extend(m[-1] for m in matches)
2087
2088         return entries
2089
2090     @staticmethod
2091     def _extract_url(webpage):
2092         urls = YoutubeIE._extract_urls(webpage)
2093         return urls[0] if urls else None
2094
2095     @classmethod
2096     def extract_id(cls, url):
2097         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2098         if mobj is None:
2099             raise ExtractorError('Invalid URL: %s' % url)
2100         return mobj.group('id')
2101
2102     def _extract_chapters_from_json(self, data, duration):
2103         chapter_list = traverse_obj(
2104             data, (
2105                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2106                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2107             ), expected_type=list)
2108
2109         return self._extract_chapters(
2110             chapter_list,
2111             chapter_time=lambda chapter: float_or_none(
2112                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2113             chapter_title=lambda chapter: traverse_obj(
2114                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2115             duration=duration)
2116
2117     def _extract_chapters_from_engagement_panel(self, data, duration):
2118         content_list = traverse_obj(
2119             data,
2120             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2121             expected_type=list, default=[])
2122         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2123         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2124
2125         return next((
2126             filter(None, (
2127                 self._extract_chapters(
2128                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2129                     chapter_time, chapter_title, duration)
2130                 for contents in content_list
2131             ))), [])
2132
2133     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2134         chapters = []
2135         last_chapter = {'start_time': 0}
2136         for idx, chapter in enumerate(chapter_list or []):
2137             title = chapter_title(chapter)
2138             start_time = chapter_time(chapter)
2139             if start_time is None:
2140                 continue
2141             last_chapter['end_time'] = start_time
2142             if start_time < last_chapter['start_time']:
2143                 if idx == 1:
2144                     chapters.pop()
2145                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2146                 else:
2147                     self.report_warning(f'Invalid start time for chapter "{title}"')
2148                     continue
2149             last_chapter = {'start_time': start_time, 'title': title}
2150             chapters.append(last_chapter)
2151         last_chapter['end_time'] = duration
2152         return chapters
2153
2154     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2155         return self._parse_json(self._search_regex(
2156             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2157              regex), webpage, name, default='{}'), video_id, fatal=False)
2158
2159     @staticmethod
2160     def parse_time_text(time_text):
2161         """
2162         Parse the comment time text
2163         time_text is in the format 'X units ago (edited)'
2164         """
2165         time_text_split = time_text.split(' ')
2166         if len(time_text_split) >= 3:
2167             try:
2168                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2169             except ValueError:
2170                 return None
2171
2172     def _extract_comment(self, comment_renderer, parent=None):
2173         comment_id = comment_renderer.get('commentId')
2174         if not comment_id:
2175             return
2176
2177         text = self._get_text(comment_renderer, 'contentText')
2178
2179         # note: timestamp is an estimate calculated from the current time and time_text
2180         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2181         time_text_dt = self.parse_time_text(time_text)
2182         if isinstance(time_text_dt, datetime.datetime):
2183             timestamp = calendar.timegm(time_text_dt.timetuple())
2184         author = self._get_text(comment_renderer, 'authorText')
2185         author_id = try_get(comment_renderer,
2186                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2187
2188         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2189                                                        lambda x: x['likeCount']), compat_str)) or 0
2190         author_thumbnail = try_get(comment_renderer,
2191                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2192
2193         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2194         is_favorited = 'creatorHeart' in (try_get(
2195             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2196         return {
2197             'id': comment_id,
2198             'text': text,
2199             'timestamp': timestamp,
2200             'time_text': time_text,
2201             'like_count': votes,
2202             'is_favorited': is_favorited,
2203             'author': author,
2204             'author_id': author_id,
2205             'author_thumbnail': author_thumbnail,
2206             'author_is_uploader': author_is_uploader,
2207             'parent': parent or 'root'
2208         }
2209
2210     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2211                          ytcfg, video_id, parent=None, comment_counts=None):
2212
2213         def extract_header(contents):
2214             _total_comments = 0
2215             _continuation = None
2216             for content in contents:
2217                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2218                 expected_comment_count = parse_count(self._get_text(
2219                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2220
2221                 if expected_comment_count:
2222                     comment_counts[1] = expected_comment_count
2223                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2224                     _total_comments = comment_counts[1]
2225                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2226                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2227
2228                 sort_menu_item = try_get(
2229                     comments_header_renderer,
2230                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2231                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2232
2233                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2234                 if not _continuation:
2235                     continue
2236
2237                 sort_text = sort_menu_item.get('title')
2238                 if isinstance(sort_text, compat_str):
2239                     sort_text = sort_text.lower()
2240                 else:
2241                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2242                 self.to_screen('Sorting comments by %s' % sort_text)
2243                 break
2244             return _total_comments, _continuation
2245
2246         def extract_thread(contents):
2247             if not parent:
2248                 comment_counts[2] = 0
2249             for content in contents:
2250                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2251                 comment_renderer = try_get(
2252                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2253                     content, (lambda x: x['commentRenderer'], dict))
2254
2255                 if not comment_renderer:
2256                     continue
2257                 comment = self._extract_comment(comment_renderer, parent)
2258                 if not comment:
2259                     continue
2260                 comment_counts[0] += 1
2261                 yield comment
2262                 # Attempt to get the replies
2263                 comment_replies_renderer = try_get(
2264                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2265
2266                 if comment_replies_renderer:
2267                     comment_counts[2] += 1
2268                     comment_entries_iter = self._comment_entries(
2269                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2270                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2271
2272                     for reply_comment in comment_entries_iter:
2273                         yield reply_comment
2274
2275         # YouTube comments have a max depth of 2
2276         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2277         if max_depth == 1 and parent:
2278             return
2279         if not comment_counts:
2280             # comment so far, est. total comments, current comment thread #
2281             comment_counts = [0, 0, 0]
2282
2283         continuation = self._extract_continuation(root_continuation_data)
2284         if continuation and len(continuation['continuation']) < 27:
2285             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2286             continuation_token = self._generate_comment_continuation(video_id)
2287             continuation = self._build_api_continuation_query(continuation_token, None)
2288
2289         visitor_data = None
2290         is_first_continuation = parent is None
2291
2292         for page_num in itertools.count(0):
2293             if not continuation:
2294                 break
2295             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2296             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2297             if page_num == 0:
2298                 if is_first_continuation:
2299                     note_prefix = 'Downloading comment section API JSON'
2300                 else:
2301                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2302                         comment_counts[2], comment_prog_str)
2303             else:
2304                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2305                     '       ' if parent else '', ' replies' if parent else '',
2306                     page_num, comment_prog_str)
2307
2308             response = self._extract_response(
2309                 item_id=None, query=continuation,
2310                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2311                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2312             if not response:
2313                 break
2314             visitor_data = try_get(
2315                 response,
2316                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2317                 compat_str) or visitor_data
2318
2319             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2320
2321             continuation = None
2322             if isinstance(continuation_contents, list):
2323                 for continuation_section in continuation_contents:
2324                     if not isinstance(continuation_section, dict):
2325                         continue
2326                     continuation_items = try_get(
2327                         continuation_section,
2328                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2329                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2330                         list) or []
2331                     if is_first_continuation:
2332                         total_comments, continuation = extract_header(continuation_items)
2333                         if total_comments:
2334                             yield total_comments
2335                         is_first_continuation = False
2336                         if continuation:
2337                             break
2338                         continue
2339                     count = 0
2340                     for count, entry in enumerate(extract_thread(continuation_items)):
2341                         yield entry
2342                     continuation = self._extract_continuation({'contents': continuation_items})
2343                     if continuation:
2344                         # Sometimes YouTube provides a continuation without any comments
2345                         # In most cases we end up just downloading these with very little comments to come.
2346                         if count == 0:
2347                             if not parent:
2348                                 self.report_warning('No comments received - assuming end of comments')
2349                             continuation = None
2350                         break
2351
2352             # Deprecated response structure
2353             elif isinstance(continuation_contents, dict):
2354                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2355                 for key, continuation_renderer in continuation_contents.items():
2356                     if key not in known_continuation_renderers:
2357                         continue
2358                     if not isinstance(continuation_renderer, dict):
2359                         continue
2360                     if is_first_continuation:
2361                         header_continuation_items = [continuation_renderer.get('header') or {}]
2362                         total_comments, continuation = extract_header(header_continuation_items)
2363                         if total_comments:
2364                             yield total_comments
2365                         is_first_continuation = False
2366                         if continuation:
2367                             break
2368
2369                     # Sometimes YouTube provides a continuation without any comments
2370                     # In most cases we end up just downloading these with very little comments to come.
2371                     count = 0
2372                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2373                         yield entry
2374                     continuation = self._extract_continuation(continuation_renderer)
2375                     if count == 0:
2376                         if not parent:
2377                             self.report_warning('No comments received - assuming end of comments')
2378                         continuation = None
2379                     break
2380
2381     @staticmethod
2382     def _generate_comment_continuation(video_id):
2383         """
2384         Generates initial comment section continuation token from given video id
2385         """
2386         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2387         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2388         new_continuation_intlist = list(itertools.chain.from_iterable(
2389             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2390         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2391
2392     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2393         """Entry for comment extraction"""
2394         def _real_comment_extract(contents):
2395             if isinstance(contents, list):
2396                 for entry in contents:
2397                     for key, renderer in entry.items():
2398                         if key not in known_entry_comment_renderers:
2399                             continue
2400                         yield from self._comment_entries(
2401                             renderer, video_id=video_id, ytcfg=ytcfg,
2402                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2403                             account_syncid=self._extract_account_syncid(ytcfg))
2404                         break
2405         comments = []
2406         known_entry_comment_renderers = ('itemSectionRenderer',)
2407         estimated_total = 0
2408         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2409         # Force English regardless of account setting to prevent parsing issues
2410         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2411         ytcfg = copy.deepcopy(ytcfg)
2412         traverse_obj(
2413             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2414         try:
2415             for comment in _real_comment_extract(contents):
2416                 if len(comments) >= max_comments:
2417                     break
2418                 if isinstance(comment, int):
2419                     estimated_total = comment
2420                     continue
2421                 comments.append(comment)
2422         except KeyboardInterrupt:
2423             self.to_screen('Interrupted by user')
2424         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2425         return {
2426             'comments': comments,
2427             'comment_count': len(comments),
2428         }
2429
2430     @staticmethod
2431     def _generate_player_context(sts=None):
2432         context = {
2433             'html5Preference': 'HTML5_PREF_WANTS',
2434         }
2435         if sts is not None:
2436             context['signatureTimestamp'] = sts
2437         return {
2438             'playbackContext': {
2439                 'contentPlaybackContext': context
2440             },
2441             'contentCheckOk': True,
2442             'racyCheckOk': True
2443         }
2444
2445     @staticmethod
2446     def _is_agegated(player_response):
2447         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2448             return True
2449
2450         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2451         AGE_GATE_REASONS = (
2452             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2453             'age_verification_required', 'age_check_required',  # status
2454         )
2455         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2456
2457     @staticmethod
2458     def _is_unplayable(player_response):
2459         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2460
2461     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2462
2463         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2464         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2465         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2466         headers = self.generate_api_headers(
2467             player_ytcfg, identity_token, syncid,
2468             default_client=client, session_index=session_index)
2469
2470         yt_query = {'videoId': video_id}
2471         yt_query.update(self._generate_player_context(sts))
2472         return self._extract_response(
2473             item_id=video_id, ep='player', query=yt_query,
2474             ytcfg=player_ytcfg, headers=headers, fatal=True,
2475             default_client=client,
2476             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2477         ) or None
2478
2479     def _get_requested_clients(self, url, smuggled_data):
2480         requested_clients = []
2481         allowed_clients = sorted(
2482             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2483             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2484         for client in self._configuration_arg('player_client'):
2485             if client in allowed_clients:
2486                 requested_clients.append(client)
2487             elif client == 'all':
2488                 requested_clients.extend(allowed_clients)
2489             else:
2490                 self.report_warning(f'Skipping unsupported client {client}')
2491         if not requested_clients:
2492             requested_clients = ['android', 'web']
2493
2494         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2495             requested_clients.extend(
2496                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2497
2498         return orderedSet(requested_clients)
2499
2500     def _extract_player_ytcfg(self, client, video_id):
2501         url = {
2502             'web_music': 'https://music.youtube.com',
2503             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2504         }.get(client)
2505         if not url:
2506             return {}
2507         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2508         return self.extract_ytcfg(video_id, webpage) or {}
2509
2510     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2511         initial_pr = None
2512         if webpage:
2513             initial_pr = self._extract_yt_initial_variable(
2514                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2515                 video_id, 'initial player response')
2516
2517         original_clients = clients
2518         clients = clients[::-1]
2519
2520         def append_client(client_name):
2521             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2522                 clients.append(client_name)
2523
2524         # Android player_response does not have microFormats which are needed for
2525         # extraction of some data. So we return the initial_pr with formats
2526         # stripped out even if not requested by the user
2527         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2528         yielded_pr = False
2529         if initial_pr:
2530             pr = dict(initial_pr)
2531             pr['streamingData'] = None
2532             yielded_pr = True
2533             yield pr
2534
2535         last_error = None
2536         while clients:
2537             client = clients.pop()
2538             player_ytcfg = master_ytcfg if client == 'web' else {}
2539             if 'configs' not in self._configuration_arg('player_skip'):
2540                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2541
2542             try:
2543                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2544                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2545             except ExtractorError as e:
2546                 if last_error:
2547                     self.report_warning(last_error)
2548                 last_error = e
2549                 continue
2550
2551             if pr:
2552                 yielded_pr = True
2553                 yield pr
2554
2555             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2556             if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
2557                 append_client(client.replace('_agegate', '_creator'))
2558             elif self._is_agegated(pr):
2559                 append_client(f'{client}_agegate')
2560
2561         if last_error:
2562             if not yielded_pr:
2563                 raise last_error
2564             self.report_warning(last_error)
2565
2566     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2567         itags, stream_ids = [], []
2568         itag_qualities, res_qualities = {}, {}
2569         q = qualities([
2570             # Normally tiny is the smallest video-only formats. But
2571             # audio-only formats with unknown quality may get tagged as tiny
2572             'tiny',
2573             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2574             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2575         ])
2576         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2577
2578         for fmt in streaming_formats:
2579             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2580                 continue
2581
2582             itag = str_or_none(fmt.get('itag'))
2583             audio_track = fmt.get('audioTrack') or {}
2584             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2585             if stream_id in stream_ids:
2586                 continue
2587
2588             quality = fmt.get('quality')
2589             height = int_or_none(fmt.get('height'))
2590             if quality == 'tiny' or not quality:
2591                 quality = fmt.get('audioQuality', '').lower() or quality
2592             # The 3gp format (17) in android client has a quality of "small",
2593             # but is actually worse than other formats
2594             if itag == '17':
2595                 quality = 'tiny'
2596             if quality:
2597                 if itag:
2598                     itag_qualities[itag] = quality
2599                 if height:
2600                     res_qualities[height] = quality
2601             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2602             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2603             # number of fragment that would subsequently requested with (`&sq=N`)
2604             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2605                 continue
2606
2607             fmt_url = fmt.get('url')
2608             if not fmt_url:
2609                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2610                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2611                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2612                 if not (sc and fmt_url and encrypted_sig):
2613                     continue
2614                 if not player_url:
2615                     continue
2616                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2617                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2618                 fmt_url += '&' + sp + '=' + signature
2619
2620             if itag:
2621                 itags.append(itag)
2622                 stream_ids.append(stream_id)
2623
2624             tbr = float_or_none(
2625                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2626             dct = {
2627                 'asr': int_or_none(fmt.get('audioSampleRate')),
2628                 'filesize': int_or_none(fmt.get('contentLength')),
2629                 'format_id': itag,
2630                 'format_note': ', '.join(filter(None, (
2631                     '%s%s' % (audio_track.get('displayName') or '',
2632                               ' (default)' if audio_track.get('audioIsDefault') else ''),
2633                     fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2634                 'fps': int_or_none(fmt.get('fps')),
2635                 'height': height,
2636                 'quality': q(quality),
2637                 'tbr': tbr,
2638                 'url': fmt_url,
2639                 'width': int_or_none(fmt.get('width')),
2640                 'language': audio_track.get('id', '').split('.')[0],
2641                 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2642             }
2643             mime_mobj = re.match(
2644                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2645             if mime_mobj:
2646                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2647                 dct.update(parse_codecs(mime_mobj.group(2)))
2648             no_audio = dct.get('acodec') == 'none'
2649             no_video = dct.get('vcodec') == 'none'
2650             if no_audio:
2651                 dct['vbr'] = tbr
2652             if no_video:
2653                 dct['abr'] = tbr
2654             if no_audio or no_video:
2655                 dct['downloader_options'] = {
2656                     # Youtube throttles chunks >~10M
2657                     'http_chunk_size': 10485760,
2658                 }
2659                 if dct.get('ext'):
2660                     dct['container'] = dct['ext'] + '_dash'
2661             yield dct
2662
2663         skip_manifests = self._configuration_arg('skip')
2664         get_dash = (
2665             (not is_live or self._configuration_arg('include_live_dash'))
2666             and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2667         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2668
2669         def guess_quality(f):
2670             for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2671                 if val in qdict:
2672                     return q(qdict[val])
2673             return -1
2674
2675         for sd in streaming_data:
2676             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2677             if hls_manifest_url:
2678                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2679                     itag = self._search_regex(
2680                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2681                     if itag in itags:
2682                         continue
2683                     if itag:
2684                         f['format_id'] = itag
2685                         itags.append(itag)
2686                     f['quality'] = guess_quality(f)
2687                     yield f
2688
2689             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2690             if dash_manifest_url:
2691                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2692                     itag = f['format_id']
2693                     if itag in itags:
2694                         continue
2695                     if itag:
2696                         itags.append(itag)
2697                     f['quality'] = guess_quality(f)
2698                     filesize = int_or_none(self._search_regex(
2699                         r'/clen/(\d+)', f.get('fragment_base_url')
2700                         or f['url'], 'file size', default=None))
2701                     if filesize:
2702                         f['filesize'] = filesize
2703                     yield f
2704
2705     def _real_extract(self, url):
2706         url, smuggled_data = unsmuggle_url(url, {})
2707         video_id = self._match_id(url)
2708
2709         base_url = self.http_scheme() + '//www.youtube.com/'
2710         webpage_url = base_url + 'watch?v=' + video_id
2711         webpage = self._download_webpage(
2712             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2713
2714         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2715         player_url = self._extract_player_url(master_ytcfg, webpage)
2716         identity_token = self._extract_identity_token(webpage, video_id)
2717
2718         player_responses = list(self._extract_player_responses(
2719             self._get_requested_clients(url, smuggled_data),
2720             video_id, webpage, master_ytcfg, player_url, identity_token))
2721
2722         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2723
2724         playability_statuses = traverse_obj(
2725             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2726
2727         trailer_video_id = get_first(
2728             playability_statuses,
2729             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2730             expected_type=str)
2731         if trailer_video_id:
2732             return self.url_result(
2733                 trailer_video_id, self.ie_key(), trailer_video_id)
2734
2735         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2736                        if webpage else (lambda x: None))
2737
2738         video_details = traverse_obj(
2739             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2740         microformats = traverse_obj(
2741             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2742             expected_type=dict, default=[])
2743         video_title = (
2744             get_first(video_details, 'title')
2745             or self._get_text(microformats, (..., 'title'))
2746             or search_meta(['og:title', 'twitter:title', 'title']))
2747         video_description = get_first(video_details, 'shortDescription')
2748
2749         if not smuggled_data.get('force_singlefeed', False):
2750             if not self.get_param('noplaylist'):
2751                 multifeed_metadata_list = get_first(
2752                     player_responses,
2753                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2754                     expected_type=str)
2755                 if multifeed_metadata_list:
2756                     entries = []
2757                     feed_ids = []
2758                     for feed in multifeed_metadata_list.split(','):
2759                         # Unquote should take place before split on comma (,) since textual
2760                         # fields may contain comma as well (see
2761                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2762                         feed_data = compat_parse_qs(
2763                             compat_urllib_parse_unquote_plus(feed))
2764
2765                         def feed_entry(name):
2766                             return try_get(
2767                                 feed_data, lambda x: x[name][0], compat_str)
2768
2769                         feed_id = feed_entry('id')
2770                         if not feed_id:
2771                             continue
2772                         feed_title = feed_entry('title')
2773                         title = video_title
2774                         if feed_title:
2775                             title += ' (%s)' % feed_title
2776                         entries.append({
2777                             '_type': 'url_transparent',
2778                             'ie_key': 'Youtube',
2779                             'url': smuggle_url(
2780                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2781                                 {'force_singlefeed': True}),
2782                             'title': title,
2783                         })
2784                         feed_ids.append(feed_id)
2785                     self.to_screen(
2786                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2787                         % (', '.join(feed_ids), video_id))
2788                     return self.playlist_result(
2789                         entries, video_id, video_title, video_description)
2790             else:
2791                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2792
2793         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2794         is_live = get_first(video_details, 'isLive')
2795         if is_live is None:
2796             is_live = get_first(live_broadcast_details, 'isLiveNow')
2797
2798         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2799         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2800
2801         if not formats:
2802             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2803                 self.report_drm(video_id)
2804             pemr = get_first(
2805                 playability_statuses,
2806                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2807             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2808             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2809             if subreason:
2810                 if subreason == 'The uploader has not made this video available in your country.':
2811                     countries = get_first(microformats, 'availableCountries')
2812                     if not countries:
2813                         regions_allowed = search_meta('regionsAllowed')
2814                         countries = regions_allowed.split(',') if regions_allowed else None
2815                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2816                 reason += f'. {subreason}'
2817             if reason:
2818                 self.raise_no_formats(reason, expected=True)
2819
2820         for f in formats:
2821             if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']:  # throttled
2822                 f['source_preference'] = -10
2823                 # TODO: this method is not reliable
2824                 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2825
2826         # Source is given priority since formats that throttle are given lower source_preference
2827         # When throttling issue is fully fixed, remove this
2828         self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
2829
2830         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2831         if not keywords and webpage:
2832             keywords = [
2833                 unescapeHTML(m.group('content'))
2834                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2835         for keyword in keywords:
2836             if keyword.startswith('yt:stretch='):
2837                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2838                 if mobj:
2839                     # NB: float is intentional for forcing float division
2840                     w, h = (float(v) for v in mobj.groups())
2841                     if w > 0 and h > 0:
2842                         ratio = w / h
2843                         for f in formats:
2844                             if f.get('vcodec') != 'none':
2845                                 f['stretched_ratio'] = ratio
2846                         break
2847
2848         thumbnails = []
2849         thumbnail_dicts = traverse_obj(
2850             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2851             expected_type=dict, default=[])
2852         for thumbnail in thumbnail_dicts:
2853             thumbnail_url = thumbnail.get('url')
2854             if not thumbnail_url:
2855                 continue
2856             # Sometimes youtube gives a wrong thumbnail URL. See:
2857             # https://github.com/yt-dlp/yt-dlp/issues/233
2858             # https://github.com/ytdl-org/youtube-dl/issues/28023
2859             if 'maxresdefault' in thumbnail_url:
2860                 thumbnail_url = thumbnail_url.split('?')[0]
2861             thumbnails.append({
2862                 'url': thumbnail_url,
2863                 'height': int_or_none(thumbnail.get('height')),
2864                 'width': int_or_none(thumbnail.get('width')),
2865             })
2866         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2867         if thumbnail_url:
2868             thumbnails.append({
2869                 'url': thumbnail_url,
2870             })
2871         # The best resolution thumbnails sometimes does not appear in the webpage
2872         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2873         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2874         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2875         # TODO: Test them also? - For some videos, even these don't exist
2876         guaranteed_thumbnail_names = [
2877             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2878             'mqdefault', 'mq1', 'mq2', 'mq3',
2879             'default', '1', '2', '3'
2880         ]
2881         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2882         n_thumbnail_names = len(thumbnail_names)
2883
2884         thumbnails.extend({
2885             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2886                 video_id=video_id, name=name, ext=ext,
2887                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2888             '_test_url': name in hq_thumbnail_names,
2889         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2890         for thumb in thumbnails:
2891             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2892             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2893         self._remove_duplicate_formats(thumbnails)
2894
2895         category = get_first(microformats, 'category') or search_meta('genre')
2896         channel_id = str_or_none(
2897             get_first(video_details, 'channelId')
2898             or get_first(microformats, 'externalChannelId')
2899             or search_meta('channelId'))
2900         duration = int_or_none(
2901             get_first(video_details, 'lengthSeconds')
2902             or get_first(microformats, 'lengthSeconds')
2903             or parse_duration(search_meta('duration'))) or None
2904         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2905
2906         live_content = get_first(video_details, 'isLiveContent')
2907         is_upcoming = get_first(video_details, 'isUpcoming')
2908         if is_live is None:
2909             if is_upcoming or live_content is False:
2910                 is_live = False
2911         if is_upcoming is None and (live_content or is_live):
2912             is_upcoming = False
2913         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2914         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2915         if not duration and live_endtime and live_starttime:
2916             duration = live_endtime - live_starttime
2917
2918         info = {
2919             'id': video_id,
2920             'title': self._live_title(video_title) if is_live else video_title,
2921             'formats': formats,
2922             'thumbnails': thumbnails,
2923             'description': video_description,
2924             'upload_date': unified_strdate(
2925                 get_first(microformats, 'uploadDate')
2926                 or search_meta('uploadDate')),
2927             'uploader': get_first(video_details, 'author'),
2928             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2929             'uploader_url': owner_profile_url,
2930             'channel_id': channel_id,
2931             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2932             'duration': duration,
2933             'view_count': int_or_none(
2934                 get_first((video_details, microformats), (..., 'viewCount'))
2935                 or search_meta('interactionCount')),
2936             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2937             'age_limit': 18 if (
2938                 get_first(microformats, 'isFamilySafe') is False
2939                 or search_meta('isFamilyFriendly') == 'false'
2940                 or search_meta('og:restrictions:age') == '18+') else 0,
2941             'webpage_url': webpage_url,
2942             'categories': [category] if category else None,
2943             'tags': keywords,
2944             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2945             'is_live': is_live,
2946             'was_live': (False if is_live or is_upcoming or live_content is False
2947                          else None if is_live is None or is_upcoming is None
2948                          else live_content),
2949             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2950             'release_timestamp': live_starttime,
2951         }
2952
2953         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2954         # Converted into dicts to remove duplicates
2955         captions = {
2956             sub.get('baseUrl'): sub
2957             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2958         translation_languages = {
2959             lang.get('languageCode'): lang.get('languageName')
2960             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2961         subtitles = {}
2962         if pctr:
2963             def process_language(container, base_url, lang_code, sub_name, query):
2964                 lang_subs = container.setdefault(lang_code, [])
2965                 for fmt in self._SUBTITLE_FORMATS:
2966                     query.update({
2967                         'fmt': fmt,
2968                     })
2969                     lang_subs.append({
2970                         'ext': fmt,
2971                         'url': update_url_query(base_url, query),
2972                         'name': sub_name,
2973                     })
2974
2975             for base_url, caption_track in captions.items():
2976                 if not base_url:
2977                     continue
2978                 if caption_track.get('kind') != 'asr':
2979                     lang_code = (
2980                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2981                         or caption_track.get('languageCode'))
2982                     if not lang_code:
2983                         continue
2984                     process_language(
2985                         subtitles, base_url, lang_code,
2986                         traverse_obj(caption_track, ('name', 'simpleText'), ('name', 'runs', ..., 'text'), get_all=False),
2987                         {})
2988                     continue
2989                 automatic_captions = {}
2990                 for trans_code, trans_name in translation_languages.items():
2991                     if not trans_code:
2992                         continue
2993                     process_language(
2994                         automatic_captions, base_url, trans_code,
2995                         self._get_text(trans_name, max_runs=1),
2996                         {'tlang': trans_code})
2997                 info['automatic_captions'] = automatic_captions
2998         info['subtitles'] = subtitles
2999
3000         parsed_url = compat_urllib_parse_urlparse(url)
3001         for component in [parsed_url.fragment, parsed_url.query]:
3002             query = compat_parse_qs(component)
3003             for k, v in query.items():
3004                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
3005                     d_k += '_time'
3006                     if d_k not in info and k in s_ks:
3007                         info[d_k] = parse_duration(query[k][0])
3008
3009         # Youtube Music Auto-generated description
3010         if video_description:
3011             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
3012             if mobj:
3013                 release_year = mobj.group('release_year')
3014                 release_date = mobj.group('release_date')
3015                 if release_date:
3016                     release_date = release_date.replace('-', '')
3017                     if not release_year:
3018                         release_year = release_date[:4]
3019                 info.update({
3020                     'album': mobj.group('album'.strip()),
3021                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
3022                     'track': mobj.group('track').strip(),
3023                     'release_date': release_date,
3024                     'release_year': int_or_none(release_year),
3025                 })
3026
3027         initial_data = None
3028         if webpage:
3029             initial_data = self._extract_yt_initial_variable(
3030                 webpage, self._YT_INITIAL_DATA_RE, video_id,
3031                 'yt initial data')
3032         if not initial_data:
3033             headers = self.generate_api_headers(
3034                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
3035                 session_index=self._extract_session_index(master_ytcfg))
3036
3037             initial_data = self._extract_response(
3038                 item_id=video_id, ep='next', fatal=False,
3039                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
3040                 note='Downloading initial data API JSON')
3041
3042         try:
3043             # This will error if there is no livechat
3044             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3045             info['subtitles']['live_chat'] = [{
3046                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3047                 'video_id': video_id,
3048                 'ext': 'json',
3049                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3050             }]
3051         except (KeyError, IndexError, TypeError):
3052             pass
3053
3054         if initial_data:
3055             info['chapters'] = (
3056                 self._extract_chapters_from_json(initial_data, duration)
3057                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3058                 or None)
3059
3060             contents = try_get(
3061                 initial_data,
3062                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3063                 list) or []
3064             for content in contents:
3065                 vpir = content.get('videoPrimaryInfoRenderer')
3066                 if vpir:
3067                     stl = vpir.get('superTitleLink')
3068                     if stl:
3069                         stl = self._get_text(stl)
3070                         if try_get(
3071                                 vpir,
3072                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3073                             info['location'] = stl
3074                         else:
3075                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3076                             if mobj:
3077                                 info.update({
3078                                     'series': mobj.group(1),
3079                                     'season_number': int(mobj.group(2)),
3080                                     'episode_number': int(mobj.group(3)),
3081                                 })
3082                     for tlb in (try_get(
3083                             vpir,
3084                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3085                             list) or []):
3086                         tbr = tlb.get('toggleButtonRenderer') or {}
3087                         for getter, regex in [(
3088                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3089                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3090                                     lambda x: x['accessibility'],
3091                                     lambda x: x['accessibilityData']['accessibilityData'],
3092                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3093                             label = (try_get(tbr, getter, dict) or {}).get('label')
3094                             if label:
3095                                 mobj = re.match(regex, label)
3096                                 if mobj:
3097                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3098                                     break
3099                     sbr_tooltip = try_get(
3100                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3101                     if sbr_tooltip:
3102                         like_count, dislike_count = sbr_tooltip.split(' / ')
3103                         info.update({
3104                             'like_count': str_to_int(like_count),
3105                             'dislike_count': str_to_int(dislike_count),
3106                         })
3107                 vsir = content.get('videoSecondaryInfoRenderer')
3108                 if vsir:
3109                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3110                     rows = try_get(
3111                         vsir,
3112                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3113                         list) or []
3114                     multiple_songs = False
3115                     for row in rows:
3116                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3117                             multiple_songs = True
3118                             break
3119                     for row in rows:
3120                         mrr = row.get('metadataRowRenderer') or {}
3121                         mrr_title = mrr.get('title')
3122                         if not mrr_title:
3123                             continue
3124                         mrr_title = self._get_text(mrr, 'title')
3125                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3126                         if mrr_title == 'License':
3127                             info['license'] = mrr_contents_text
3128                         elif not multiple_songs:
3129                             if mrr_title == 'Album':
3130                                 info['album'] = mrr_contents_text
3131                             elif mrr_title == 'Artist':
3132                                 info['artist'] = mrr_contents_text
3133                             elif mrr_title == 'Song':
3134                                 info['track'] = mrr_contents_text
3135
3136         fallbacks = {
3137             'channel': 'uploader',
3138             'channel_id': 'uploader_id',
3139             'channel_url': 'uploader_url',
3140         }
3141         for to, frm in fallbacks.items():
3142             if not info.get(to):
3143                 info[to] = info.get(frm)
3144
3145         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3146             v = info.get(s_k)
3147             if v:
3148                 info[d_k] = v
3149
3150         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3151         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3152         is_membersonly = None
3153         is_premium = None
3154         if initial_data and is_private is not None:
3155             is_membersonly = False
3156             is_premium = False
3157             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3158             badge_labels = set()
3159             for content in contents:
3160                 if not isinstance(content, dict):
3161                     continue
3162                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3163             for badge_label in badge_labels:
3164                 if badge_label.lower() == 'members only':
3165                     is_membersonly = True
3166                 elif badge_label.lower() == 'premium':
3167                     is_premium = True
3168                 elif badge_label.lower() == 'unlisted':
3169                     is_unlisted = True
3170
3171         info['availability'] = self._availability(
3172             is_private=is_private,
3173             needs_premium=is_premium,
3174             needs_subscription=is_membersonly,
3175             needs_auth=info['age_limit'] >= 18,
3176             is_unlisted=None if is_private is None else is_unlisted)
3177
3178         if self.get_param('getcomments', False):
3179             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3180
3181         self.mark_watched(video_id, player_responses)
3182
3183         return info
3184
3185
3186 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3187     IE_DESC = 'YouTube.com tab'
3188     _VALID_URL = r'''(?x)
3189                     https?://
3190                         (?:\w+\.)?
3191                         (?:
3192                             youtube(?:kids)?\.com|
3193                             invidio\.us
3194                         )/
3195                         (?:
3196                             (?P<channel_type>channel|c|user|browse)/|
3197                             (?P<not_channel>
3198                                 feed/|hashtag/|
3199                                 (?:playlist|watch)\?.*?\blist=
3200                             )|
3201                             (?!(?:%s)\b)  # Direct URLs
3202                         )
3203                         (?P<id>[^/?\#&]+)
3204                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3205     IE_NAME = 'youtube:tab'
3206
3207     _TESTS = [{
3208         'note': 'playlists, multipage',
3209         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3210         'playlist_mincount': 94,
3211         'info_dict': {
3212             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3213             'title': 'Игорь Клейнер - Playlists',
3214             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3215             'uploader': 'Игорь Клейнер',
3216             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3217         },
3218     }, {
3219         'note': 'playlists, multipage, different order',
3220         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3221         'playlist_mincount': 94,
3222         'info_dict': {
3223             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3224             'title': 'Игорь Клейнер - Playlists',
3225             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3226             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3227             'uploader': 'Игорь Клейнер',
3228         },
3229     }, {
3230         'note': 'playlists, series',
3231         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3232         'playlist_mincount': 5,
3233         'info_dict': {
3234             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3235             'title': '3Blue1Brown - Playlists',
3236             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3237             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3238             'uploader': '3Blue1Brown',
3239         },
3240     }, {
3241         'note': 'playlists, singlepage',
3242         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3243         'playlist_mincount': 4,
3244         'info_dict': {
3245             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3246             'title': 'ThirstForScience - Playlists',
3247             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3248             'uploader': 'ThirstForScience',
3249             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3250         }
3251     }, {
3252         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3253         'only_matching': True,
3254     }, {
3255         'note': 'basic, single video playlist',
3256         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3257         'info_dict': {
3258             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3259             'uploader': 'Sergey M.',
3260             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3261             'title': 'youtube-dl public playlist',
3262         },
3263         'playlist_count': 1,
3264     }, {
3265         'note': 'empty playlist',
3266         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3267         'info_dict': {
3268             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3269             'uploader': 'Sergey M.',
3270             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3271             'title': 'youtube-dl empty playlist',
3272         },
3273         'playlist_count': 0,
3274     }, {
3275         'note': 'Home tab',
3276         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3277         'info_dict': {
3278             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3279             'title': 'lex will - Home',
3280             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3281             'uploader': 'lex will',
3282             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3283         },
3284         'playlist_mincount': 2,
3285     }, {
3286         'note': 'Videos tab',
3287         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3288         'info_dict': {
3289             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3290             'title': 'lex will - Videos',
3291             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3292             'uploader': 'lex will',
3293             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3294         },
3295         'playlist_mincount': 975,
3296     }, {
3297         'note': 'Videos tab, sorted by popular',
3298         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3299         'info_dict': {
3300             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3301             'title': 'lex will - Videos',
3302             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3303             'uploader': 'lex will',
3304             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3305         },
3306         'playlist_mincount': 199,
3307     }, {
3308         'note': 'Playlists tab',
3309         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3310         'info_dict': {
3311             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3312             'title': 'lex will - Playlists',
3313             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3314             'uploader': 'lex will',
3315             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3316         },
3317         'playlist_mincount': 17,
3318     }, {
3319         'note': 'Community tab',
3320         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3321         'info_dict': {
3322             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3323             'title': 'lex will - Community',
3324             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3325             'uploader': 'lex will',
3326             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3327         },
3328         'playlist_mincount': 18,
3329     }, {
3330         'note': 'Channels tab',
3331         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3332         'info_dict': {
3333             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3334             'title': 'lex will - Channels',
3335             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3336             'uploader': 'lex will',
3337             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3338         },
3339         'playlist_mincount': 12,
3340     }, {
3341         'note': 'Search tab',
3342         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3343         'playlist_mincount': 40,
3344         'info_dict': {
3345             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3346             'title': '3Blue1Brown - Search - linear algebra',
3347             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3348             'uploader': '3Blue1Brown',
3349             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3350         },
3351     }, {
3352         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3353         'only_matching': True,
3354     }, {
3355         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3356         'only_matching': True,
3357     }, {
3358         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3359         'only_matching': True,
3360     }, {
3361         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3362         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3363         'info_dict': {
3364             'title': '29C3: Not my department',
3365             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3366             'uploader': 'Christiaan008',
3367             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3368             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3369         },
3370         'playlist_count': 96,
3371     }, {
3372         'note': 'Large playlist',
3373         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3374         'info_dict': {
3375             'title': 'Uploads from Cauchemar',
3376             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3377             'uploader': 'Cauchemar',
3378             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3379         },
3380         'playlist_mincount': 1123,
3381     }, {
3382         'note': 'even larger playlist, 8832 videos',
3383         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3384         'only_matching': True,
3385     }, {
3386         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3387         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3388         'info_dict': {
3389             'title': 'Uploads from Interstellar Movie',
3390             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3391             'uploader': 'Interstellar Movie',
3392             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3393         },
3394         'playlist_mincount': 21,
3395     }, {
3396         'note': 'Playlist with "show unavailable videos" button',
3397         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3398         'info_dict': {
3399             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3400             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3401             'uploader': 'Phim Siêu Nhân Nhật Bản',
3402             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3403         },
3404         'playlist_mincount': 200,
3405     }, {
3406         'note': 'Playlist with unavailable videos in page 7',
3407         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3408         'info_dict': {
3409             'title': 'Uploads from BlankTV',
3410             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3411             'uploader': 'BlankTV',
3412             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3413         },
3414         'playlist_mincount': 1000,
3415     }, {
3416         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3417         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3418         'info_dict': {
3419             'title': 'Data Analysis with Dr Mike Pound',
3420             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3421             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3422             'uploader': 'Computerphile',
3423             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3424         },
3425         'playlist_mincount': 11,
3426     }, {
3427         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3428         'only_matching': True,
3429     }, {
3430         'note': 'Playlist URL that does not actually serve a playlist',
3431         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3432         'info_dict': {
3433             'id': 'FqZTN594JQw',
3434             'ext': 'webm',
3435             'title': "Smiley's People 01 detective, Adventure Series, Action",
3436             'uploader': 'STREEM',
3437             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3438             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3439             'upload_date': '20150526',
3440             'license': 'Standard YouTube License',
3441             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3442             'categories': ['People & Blogs'],
3443             'tags': list,
3444             'view_count': int,
3445             'like_count': int,
3446             'dislike_count': int,
3447         },
3448         'params': {
3449             'skip_download': True,
3450         },
3451         'skip': 'This video is not available.',
3452         'add_ie': [YoutubeIE.ie_key()],
3453     }, {
3454         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3455         'only_matching': True,
3456     }, {
3457         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3458         'only_matching': True,
3459     }, {
3460         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3461         'info_dict': {
3462             'id': '3yImotZU3tw',  # This will keep changing
3463             'ext': 'mp4',
3464             'title': compat_str,
3465             'uploader': 'Sky News',
3466             'uploader_id': 'skynews',
3467             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3468             'upload_date': r're:\d{8}',
3469             'description': compat_str,
3470             'categories': ['News & Politics'],
3471             'tags': list,
3472             'like_count': int,
3473             'dislike_count': int,
3474         },
3475         'params': {
3476             'skip_download': True,
3477         },
3478         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3479     }, {
3480         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3481         'info_dict': {
3482             'id': 'a48o2S1cPoo',
3483             'ext': 'mp4',
3484             'title': 'The Young Turks - Live Main Show',
3485             'uploader': 'The Young Turks',
3486             'uploader_id': 'TheYoungTurks',
3487             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3488             'upload_date': '20150715',
3489             'license': 'Standard YouTube License',
3490             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3491             'categories': ['News & Politics'],
3492             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3493             'like_count': int,
3494             'dislike_count': int,
3495         },
3496         'params': {
3497             'skip_download': True,
3498         },
3499         'only_matching': True,
3500     }, {
3501         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3502         'only_matching': True,
3503     }, {
3504         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3505         'only_matching': True,
3506     }, {
3507         'note': 'A channel that is not live. Should raise error',
3508         'url': 'https://www.youtube.com/user/numberphile/live',
3509         'only_matching': True,
3510     }, {
3511         'url': 'https://www.youtube.com/feed/trending',
3512         'only_matching': True,
3513     }, {
3514         'url': 'https://www.youtube.com/feed/library',
3515         'only_matching': True,
3516     }, {
3517         'url': 'https://www.youtube.com/feed/history',
3518         'only_matching': True,
3519     }, {
3520         'url': 'https://www.youtube.com/feed/subscriptions',
3521         'only_matching': True,
3522     }, {
3523         'url': 'https://www.youtube.com/feed/watch_later',
3524         'only_matching': True,
3525     }, {
3526         'note': 'Recommended - redirects to home page',
3527         'url': 'https://www.youtube.com/feed/recommended',
3528         'only_matching': True,
3529     }, {
3530         'note': 'inline playlist with not always working continuations',
3531         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3532         'only_matching': True,
3533     }, {
3534         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3535         'only_matching': True,
3536     }, {
3537         'url': 'https://www.youtube.com/course',
3538         'only_matching': True,
3539     }, {
3540         'url': 'https://www.youtube.com/zsecurity',
3541         'only_matching': True,
3542     }, {
3543         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3544         'only_matching': True,
3545     }, {
3546         'url': 'https://www.youtube.com/TheYoungTurks/live',
3547         'only_matching': True,
3548     }, {
3549         'url': 'https://www.youtube.com/hashtag/cctv9',
3550         'info_dict': {
3551             'id': 'cctv9',
3552             'title': '#cctv9',
3553         },
3554         'playlist_mincount': 350,
3555     }, {
3556         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3557         'only_matching': True,
3558     }, {
3559         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3560         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3561         'only_matching': True
3562     }, {
3563         'note': '/browse/ should redirect to /channel/',
3564         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3565         'only_matching': True
3566     }, {
3567         'note': 'VLPL, should redirect to playlist?list=PL...',
3568         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3569         'info_dict': {
3570             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3571             'uploader': 'NoCopyrightSounds',
3572             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3573             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3574             'title': 'NCS Releases',
3575         },
3576         'playlist_mincount': 166,
3577     }, {
3578         'note': 'Topic, should redirect to playlist?list=UU...',
3579         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3580         'info_dict': {
3581             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3582             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3583             'title': 'Uploads from Royalty Free Music - Topic',
3584             'uploader': 'Royalty Free Music - Topic',
3585         },
3586         'expected_warnings': [
3587             'A channel/user page was given',
3588             'The URL does not have a videos tab',
3589         ],
3590         'playlist_mincount': 101,
3591     }, {
3592         'note': 'Topic without a UU playlist',
3593         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3594         'info_dict': {
3595             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3596             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3597         },
3598         'expected_warnings': [
3599             'A channel/user page was given',
3600             'The URL does not have a videos tab',
3601             'Falling back to channel URL',
3602         ],
3603         'playlist_mincount': 9,
3604     }, {
3605         'note': 'Youtube music Album',
3606         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3607         'info_dict': {
3608             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3609             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3610         },
3611         'playlist_count': 50,
3612     }, {
3613         'note': 'unlisted single video playlist',
3614         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3615         'info_dict': {
3616             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3617             'uploader': 'colethedj',
3618             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3619             'title': 'yt-dlp unlisted playlist test',
3620             'availability': 'unlisted'
3621         },
3622         'playlist_count': 1,
3623     }]
3624
3625     @classmethod
3626     def suitable(cls, url):
3627         return False if YoutubeIE.suitable(url) else super(
3628             YoutubeTabIE, cls).suitable(url)
3629
3630     def _extract_channel_id(self, webpage):
3631         channel_id = self._html_search_meta(
3632             'channelId', webpage, 'channel id', default=None)
3633         if channel_id:
3634             return channel_id
3635         channel_url = self._html_search_meta(
3636             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3637              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3638              'twitter:app:url:googleplay'), webpage, 'channel url')
3639         return self._search_regex(
3640             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3641             channel_url, 'channel id')
3642
3643     @staticmethod
3644     def _extract_basic_item_renderer(item):
3645         # Modified from _extract_grid_item_renderer
3646         known_basic_renderers = (
3647             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3648         )
3649         for key, renderer in item.items():
3650             if not isinstance(renderer, dict):
3651                 continue
3652             elif key in known_basic_renderers:
3653                 return renderer
3654             elif key.startswith('grid') and key.endswith('Renderer'):
3655                 return renderer
3656
3657     def _grid_entries(self, grid_renderer):
3658         for item in grid_renderer['items']:
3659             if not isinstance(item, dict):
3660                 continue
3661             renderer = self._extract_basic_item_renderer(item)
3662             if not isinstance(renderer, dict):
3663                 continue
3664             title = self._get_text(renderer, 'title')
3665
3666             # playlist
3667             playlist_id = renderer.get('playlistId')
3668             if playlist_id:
3669                 yield self.url_result(
3670                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3671                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3672                     video_title=title)
3673                 continue
3674             # video
3675             video_id = renderer.get('videoId')
3676             if video_id:
3677                 yield self._extract_video(renderer)
3678                 continue
3679             # channel
3680             channel_id = renderer.get('channelId')
3681             if channel_id:
3682                 yield self.url_result(
3683                     'https://www.youtube.com/channel/%s' % channel_id,
3684                     ie=YoutubeTabIE.ie_key(), video_title=title)
3685                 continue
3686             # generic endpoint URL support
3687             ep_url = urljoin('https://www.youtube.com/', try_get(
3688                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3689                 compat_str))
3690             if ep_url:
3691                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3692                     if ie.suitable(ep_url):
3693                         yield self.url_result(
3694                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3695                         break
3696
3697     def _shelf_entries_from_content(self, shelf_renderer):
3698         content = shelf_renderer.get('content')
3699         if not isinstance(content, dict):
3700             return
3701         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3702         if renderer:
3703             # TODO: add support for nested playlists so each shelf is processed
3704             # as separate playlist
3705             # TODO: this includes only first N items
3706             for entry in self._grid_entries(renderer):
3707                 yield entry
3708         renderer = content.get('horizontalListRenderer')
3709         if renderer:
3710             # TODO
3711             pass
3712
3713     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3714         ep = try_get(
3715             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3716             compat_str)
3717         shelf_url = urljoin('https://www.youtube.com', ep)
3718         if shelf_url:
3719             # Skipping links to another channels, note that checking for
3720             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3721             # will not work
3722             if skip_channels and '/channels?' in shelf_url:
3723                 return
3724             title = self._get_text(shelf_renderer, 'title')
3725             yield self.url_result(shelf_url, video_title=title)
3726         # Shelf may not contain shelf URL, fallback to extraction from content
3727         for entry in self._shelf_entries_from_content(shelf_renderer):
3728             yield entry
3729
3730     def _playlist_entries(self, video_list_renderer):
3731         for content in video_list_renderer['contents']:
3732             if not isinstance(content, dict):
3733                 continue
3734             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3735             if not isinstance(renderer, dict):
3736                 continue
3737             video_id = renderer.get('videoId')
3738             if not video_id:
3739                 continue
3740             yield self._extract_video(renderer)
3741
3742     def _rich_entries(self, rich_grid_renderer):
3743         renderer = try_get(
3744             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3745         video_id = renderer.get('videoId')
3746         if not video_id:
3747             return
3748         yield self._extract_video(renderer)
3749
3750     def _video_entry(self, video_renderer):
3751         video_id = video_renderer.get('videoId')
3752         if video_id:
3753             return self._extract_video(video_renderer)
3754
3755     def _post_thread_entries(self, post_thread_renderer):
3756         post_renderer = try_get(
3757             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3758         if not post_renderer:
3759             return
3760         # video attachment
3761         video_renderer = try_get(
3762             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3763         video_id = video_renderer.get('videoId')
3764         if video_id:
3765             entry = self._extract_video(video_renderer)
3766             if entry:
3767                 yield entry
3768         # playlist attachment
3769         playlist_id = try_get(
3770             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3771         if playlist_id:
3772             yield self.url_result(
3773                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3774                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3775         # inline video links
3776         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3777         for run in runs:
3778             if not isinstance(run, dict):
3779                 continue
3780             ep_url = try_get(
3781                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3782             if not ep_url:
3783                 continue
3784             if not YoutubeIE.suitable(ep_url):
3785                 continue
3786             ep_video_id = YoutubeIE._match_id(ep_url)
3787             if video_id == ep_video_id:
3788                 continue
3789             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3790
3791     def _post_thread_continuation_entries(self, post_thread_continuation):
3792         contents = post_thread_continuation.get('contents')
3793         if not isinstance(contents, list):
3794             return
3795         for content in contents:
3796             renderer = content.get('backstagePostThreadRenderer')
3797             if not isinstance(renderer, dict):
3798                 continue
3799             for entry in self._post_thread_entries(renderer):
3800                 yield entry
3801
3802     r''' # unused
3803     def _rich_grid_entries(self, contents):
3804         for content in contents:
3805             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3806             if video_renderer:
3807                 entry = self._video_entry(video_renderer)
3808                 if entry:
3809                     yield entry
3810     '''
3811     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3812
3813         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3814             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3815             for content in contents:
3816                 if not isinstance(content, dict):
3817                     continue
3818                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3819                 if not is_renderer:
3820                     renderer = content.get('richItemRenderer')
3821                     if renderer:
3822                         for entry in self._rich_entries(renderer):
3823                             yield entry
3824                         continuation_list[0] = self._extract_continuation(parent_renderer)
3825                     continue
3826                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3827                 for isr_content in isr_contents:
3828                     if not isinstance(isr_content, dict):
3829                         continue
3830
3831                     known_renderers = {
3832                         'playlistVideoListRenderer': self._playlist_entries,
3833                         'gridRenderer': self._grid_entries,
3834                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3835                         'backstagePostThreadRenderer': self._post_thread_entries,
3836                         'videoRenderer': lambda x: [self._video_entry(x)],
3837                     }
3838                     for key, renderer in isr_content.items():
3839                         if key not in known_renderers:
3840                             continue
3841                         for entry in known_renderers[key](renderer):
3842                             if entry:
3843                                 yield entry
3844                         continuation_list[0] = self._extract_continuation(renderer)
3845                         break
3846
3847                 if not continuation_list[0]:
3848                     continuation_list[0] = self._extract_continuation(is_renderer)
3849
3850             if not continuation_list[0]:
3851                 continuation_list[0] = self._extract_continuation(parent_renderer)
3852
3853         continuation_list = [None]  # Python 2 doesnot support nonlocal
3854         tab_content = try_get(tab, lambda x: x['content'], dict)
3855         if not tab_content:
3856             return
3857         parent_renderer = (
3858             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3859             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3860         for entry in extract_entries(parent_renderer):
3861             yield entry
3862         continuation = continuation_list[0]
3863         visitor_data = None
3864
3865         for page_num in itertools.count(1):
3866             if not continuation:
3867                 break
3868             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3869             response = self._extract_response(
3870                 item_id='%s page %s' % (item_id, page_num),
3871                 query=continuation, headers=headers, ytcfg=ytcfg,
3872                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3873
3874             if not response:
3875                 break
3876             visitor_data = try_get(
3877                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3878
3879             known_continuation_renderers = {
3880                 'playlistVideoListContinuation': self._playlist_entries,
3881                 'gridContinuation': self._grid_entries,
3882                 'itemSectionContinuation': self._post_thread_continuation_entries,
3883                 'sectionListContinuation': extract_entries,  # for feeds
3884             }
3885             continuation_contents = try_get(
3886                 response, lambda x: x['continuationContents'], dict) or {}
3887             continuation_renderer = None
3888             for key, value in continuation_contents.items():
3889                 if key not in known_continuation_renderers:
3890                     continue
3891                 continuation_renderer = value
3892                 continuation_list = [None]
3893                 for entry in known_continuation_renderers[key](continuation_renderer):
3894                     yield entry
3895                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3896                 break
3897             if continuation_renderer:
3898                 continue
3899
3900             known_renderers = {
3901                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3902                 'gridVideoRenderer': (self._grid_entries, 'items'),
3903                 'gridChannelRenderer': (self._grid_entries, 'items'),
3904                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3905                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3906                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3907                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3908             }
3909             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3910             continuation_items = try_get(
3911                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3912             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3913             video_items_renderer = None
3914             for key, value in continuation_item.items():
3915                 if key not in known_renderers:
3916                     continue
3917                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3918                 continuation_list = [None]
3919                 for entry in known_renderers[key][0](video_items_renderer):
3920                     yield entry
3921                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3922                 break
3923             if video_items_renderer:
3924                 continue
3925             break
3926
3927     @staticmethod
3928     def _extract_selected_tab(tabs):
3929         for tab in tabs:
3930             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3931             if renderer.get('selected') is True:
3932                 return renderer
3933         else:
3934             raise ExtractorError('Unable to find selected tab')
3935
3936     @classmethod
3937     def _extract_uploader(cls, data):
3938         uploader = {}
3939         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3940         owner = try_get(
3941             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3942         if owner:
3943             uploader['uploader'] = owner.get('text')
3944             uploader['uploader_id'] = try_get(
3945                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3946             uploader['uploader_url'] = urljoin(
3947                 'https://www.youtube.com/',
3948                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3949         return {k: v for k, v in uploader.items() if v is not None}
3950
3951     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3952         playlist_id = title = description = channel_url = channel_name = channel_id = None
3953         thumbnails_list = tags = []
3954
3955         selected_tab = self._extract_selected_tab(tabs)
3956         renderer = try_get(
3957             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3958         if renderer:
3959             channel_name = renderer.get('title')
3960             channel_url = renderer.get('channelUrl')
3961             channel_id = renderer.get('externalId')
3962         else:
3963             renderer = try_get(
3964                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3965
3966         if renderer:
3967             title = renderer.get('title')
3968             description = renderer.get('description', '')
3969             playlist_id = channel_id
3970             tags = renderer.get('keywords', '').split()
3971             thumbnails_list = (
3972                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3973                 or try_get(
3974                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3975                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3976                     list)
3977                 or [])
3978
3979         thumbnails = []
3980         for t in thumbnails_list:
3981             if not isinstance(t, dict):
3982                 continue
3983             thumbnail_url = url_or_none(t.get('url'))
3984             if not thumbnail_url:
3985                 continue
3986             thumbnails.append({
3987                 'url': thumbnail_url,
3988                 'width': int_or_none(t.get('width')),
3989                 'height': int_or_none(t.get('height')),
3990             })
3991         if playlist_id is None:
3992             playlist_id = item_id
3993         if title is None:
3994             title = (
3995                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3996                 or playlist_id)
3997         title += format_field(selected_tab, 'title', ' - %s')
3998         title += format_field(selected_tab, 'expandedText', ' - %s')
3999         metadata = {
4000             'playlist_id': playlist_id,
4001             'playlist_title': title,
4002             'playlist_description': description,
4003             'uploader': channel_name,
4004             'uploader_id': channel_id,
4005             'uploader_url': channel_url,
4006             'thumbnails': thumbnails,
4007             'tags': tags,
4008         }
4009         availability = self._extract_availability(data)
4010         if availability:
4011             metadata['availability'] = availability
4012         if not channel_id:
4013             metadata.update(self._extract_uploader(data))
4014         metadata.update({
4015             'channel': metadata['uploader'],
4016             'channel_id': metadata['uploader_id'],
4017             'channel_url': metadata['uploader_url']})
4018         ytcfg = self.extract_ytcfg(item_id, webpage)
4019         return self.playlist_result(
4020             self._entries(
4021                 selected_tab, playlist_id,
4022                 self._extract_identity_token(webpage, item_id),
4023                 self._extract_account_syncid(ytcfg, data), ytcfg),
4024             **metadata)
4025
4026     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4027         first_id = last_id = None
4028         ytcfg = self.extract_ytcfg(playlist_id, webpage)
4029         headers = self.generate_api_headers(
4030             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4031             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
4032         for page_num in itertools.count(1):
4033             videos = list(self._playlist_entries(playlist))
4034             if not videos:
4035                 return
4036             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4037             if start >= len(videos):
4038                 return
4039             for video in videos[start:]:
4040                 if video['id'] == first_id:
4041                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4042                     return
4043                 yield video
4044             first_id = first_id or videos[0]['id']
4045             last_id = videos[-1]['id']
4046             watch_endpoint = try_get(
4047                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4048             query = {
4049                 'playlistId': playlist_id,
4050                 'videoId': watch_endpoint.get('videoId') or last_id,
4051                 'index': watch_endpoint.get('index') or len(videos),
4052                 'params': watch_endpoint.get('params') or 'OAE%3D'
4053             }
4054             response = self._extract_response(
4055                 item_id='%s page %d' % (playlist_id, page_num),
4056                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4057                 check_get_keys='contents'
4058             )
4059             playlist = try_get(
4060                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4061
4062     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4063         title = playlist.get('title') or try_get(
4064             data, lambda x: x['titleText']['simpleText'], compat_str)
4065         playlist_id = playlist.get('playlistId') or item_id
4066
4067         # Delegating everything except mix playlists to regular tab-based playlist URL
4068         playlist_url = urljoin(url, try_get(
4069             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4070             compat_str))
4071         if playlist_url and playlist_url != url:
4072             return self.url_result(
4073                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4074                 video_title=title)
4075
4076         return self.playlist_result(
4077             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4078             playlist_id=playlist_id, playlist_title=title)
4079
4080     def _extract_availability(self, data):
4081         """
4082         Gets the availability of a given playlist/tab.
4083         Note: Unless YouTube tells us explicitly, we do not assume it is public
4084         @param data: response
4085         """
4086         is_private = is_unlisted = None
4087         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4088         badge_labels = self._extract_badges(renderer)
4089
4090         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4091         privacy_dropdown_entries = try_get(
4092             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4093         for renderer_dict in privacy_dropdown_entries:
4094             is_selected = try_get(
4095                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4096             if not is_selected:
4097                 continue
4098             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4099             if label:
4100                 badge_labels.add(label.lower())
4101                 break
4102
4103         for badge_label in badge_labels:
4104             if badge_label == 'unlisted':
4105                 is_unlisted = True
4106             elif badge_label == 'private':
4107                 is_private = True
4108             elif badge_label == 'public':
4109                 is_unlisted = is_private = False
4110         return self._availability(is_private, False, False, False, is_unlisted)
4111
4112     @staticmethod
4113     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4114         sidebar_renderer = try_get(
4115             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4116         for item in sidebar_renderer:
4117             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4118             if renderer:
4119                 return renderer
4120
4121     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4122         """
4123         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4124         """
4125         browse_id = params = None
4126         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4127         if not renderer:
4128             return
4129         menu_renderer = try_get(
4130             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4131         for menu_item in menu_renderer:
4132             if not isinstance(menu_item, dict):
4133                 continue
4134             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4135             text = try_get(
4136                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4137             if not text or text.lower() != 'show unavailable videos':
4138                 continue
4139             browse_endpoint = try_get(
4140                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4141             browse_id = browse_endpoint.get('browseId')
4142             params = browse_endpoint.get('params')
4143             break
4144
4145         ytcfg = self.extract_ytcfg(item_id, webpage)
4146         headers = self.generate_api_headers(
4147             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4148             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4149             visitor_data=try_get(
4150                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4151         query = {
4152             'params': params or 'wgYCCAA=',
4153             'browseId': browse_id or 'VL%s' % item_id
4154         }
4155         return self._extract_response(
4156             item_id=item_id, headers=headers, query=query,
4157             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4158             note='Downloading API JSON with unavailable videos')
4159
4160     def _extract_webpage(self, url, item_id):
4161         retries = self.get_param('extractor_retries', 3)
4162         count = -1
4163         last_error = 'Incomplete yt initial data recieved'
4164         while count < retries:
4165             count += 1
4166             # Sometimes youtube returns a webpage with incomplete ytInitialData
4167             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4168             if count:
4169                 self.report_warning('%s. Retrying ...' % last_error)
4170             webpage = self._download_webpage(
4171                 url, item_id,
4172                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4173             data = self.extract_yt_initial_data(item_id, webpage)
4174             if data.get('contents') or data.get('currentVideoEndpoint'):
4175                 break
4176             # Extract alerts here only when there is error
4177             self._extract_and_report_alerts(data)
4178             if count >= retries:
4179                 raise ExtractorError(last_error)
4180         return webpage, data
4181
4182     @staticmethod
4183     def _smuggle_data(entries, data):
4184         for entry in entries:
4185             if data:
4186                 entry['url'] = smuggle_url(entry['url'], data)
4187             yield entry
4188
4189     def _real_extract(self, url):
4190         url, smuggled_data = unsmuggle_url(url, {})
4191         if self.is_music_url(url):
4192             smuggled_data['is_music_url'] = True
4193         info_dict = self.__real_extract(url, smuggled_data)
4194         if info_dict.get('entries'):
4195             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4196         return info_dict
4197
4198     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4199
4200     def __real_extract(self, url, smuggled_data):
4201         item_id = self._match_id(url)
4202         url = compat_urlparse.urlunparse(
4203             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4204         compat_opts = self.get_param('compat_opts', [])
4205
4206         def get_mobj(url):
4207             mobj = self._url_re.match(url).groupdict()
4208             mobj.update((k, '') for k, v in mobj.items() if v is None)
4209             return mobj
4210
4211         mobj = get_mobj(url)
4212         # Youtube returns incomplete data if tabname is not lower case
4213         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4214
4215         if is_channel:
4216             if smuggled_data.get('is_music_url'):
4217                 if item_id[:2] == 'VL':
4218                     # Youtube music VL channels have an equivalent playlist
4219                     item_id = item_id[2:]
4220                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4221                 elif item_id[:2] == 'MP':
4222                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4223                     item_id = self._search_regex(
4224                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4225                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4226                         'playlist id')
4227                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4228                 elif mobj['channel_type'] == 'browse':
4229                     # Youtube music /browse/ should be changed to /channel/
4230                     pre = 'https://www.youtube.com/channel/%s' % item_id
4231         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4232             # Home URLs should redirect to /videos/
4233             self.report_warning(
4234                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4235                 'To download only the videos in the home page, add a "/featured" to the URL')
4236             tab = '/videos'
4237
4238         url = ''.join((pre, tab, post))
4239         mobj = get_mobj(url)
4240
4241         # Handle both video/playlist URLs
4242         qs = parse_qs(url)
4243         video_id = qs.get('v', [None])[0]
4244         playlist_id = qs.get('list', [None])[0]
4245
4246         if not video_id and mobj['not_channel'].startswith('watch'):
4247             if not playlist_id:
4248                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4249                 raise ExtractorError('Unable to recognize tab page')
4250             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4251             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4252             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4253             mobj = get_mobj(url)
4254
4255         if video_id and playlist_id:
4256             if self.get_param('noplaylist'):
4257                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4258                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4259             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4260
4261         webpage, data = self._extract_webpage(url, item_id)
4262
4263         tabs = try_get(
4264             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4265         if tabs:
4266             selected_tab = self._extract_selected_tab(tabs)
4267             tab_name = selected_tab.get('title', '')
4268             if 'no-youtube-channel-redirect' not in compat_opts:
4269                 if mobj['tab'] == '/live':
4270                     # Live tab should have redirected to the video
4271                     raise ExtractorError('The channel is not currently live', expected=True)
4272                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4273                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4274                         # Topic channels don't have /videos. Use the equivalent playlist instead
4275                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4276                         pl_id = 'UU%s' % item_id[2:]
4277                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4278                         try:
4279                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4280                             for alert_type, alert_message in self._extract_alerts(pl_data):
4281                                 if alert_type == 'error':
4282                                     raise ExtractorError('Youtube said: %s' % alert_message)
4283                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4284                         except ExtractorError:
4285                             self.report_warning('The playlist gave error. Falling back to channel URL')
4286                     else:
4287                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4288
4289         self.write_debug('Final URL: %s' % url)
4290
4291         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4292         if 'no-youtube-unavailable-videos' not in compat_opts:
4293             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4294         self._extract_and_report_alerts(data, only_once=True)
4295         tabs = try_get(
4296             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4297         if tabs:
4298             return self._extract_from_tabs(item_id, webpage, data, tabs)
4299
4300         playlist = try_get(
4301             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4302         if playlist:
4303             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4304
4305         video_id = try_get(
4306             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4307             compat_str) or video_id
4308         if video_id:
4309             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4310                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4311             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4312
4313         raise ExtractorError('Unable to recognize tab page')
4314
4315
4316 class YoutubePlaylistIE(InfoExtractor):
4317     IE_DESC = 'YouTube.com playlists'
4318     _VALID_URL = r'''(?x)(?:
4319                         (?:https?://)?
4320                         (?:\w+\.)?
4321                         (?:
4322                             (?:
4323                                 youtube(?:kids)?\.com|
4324                                 invidio\.us
4325                             )
4326                             /.*?\?.*?\blist=
4327                         )?
4328                         (?P<id>%(playlist_id)s)
4329                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4330     IE_NAME = 'youtube:playlist'
4331     _TESTS = [{
4332         'note': 'issue #673',
4333         'url': 'PLBB231211A4F62143',
4334         'info_dict': {
4335             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4336             'id': 'PLBB231211A4F62143',
4337             'uploader': 'Wickydoo',
4338             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4339             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4340         },
4341         'playlist_mincount': 29,
4342     }, {
4343         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4344         'info_dict': {
4345             'title': 'YDL_safe_search',
4346             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4347         },
4348         'playlist_count': 2,
4349         'skip': 'This playlist is private',
4350     }, {
4351         'note': 'embedded',
4352         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4353         'playlist_count': 4,
4354         'info_dict': {
4355             'title': 'JODA15',
4356             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4357             'uploader': 'milan',
4358             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4359         }
4360     }, {
4361         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4362         'playlist_mincount': 654,
4363         'info_dict': {
4364             'title': '2018 Chinese New Singles (11/6 updated)',
4365             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4366             'uploader': 'LBK',
4367             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4368             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4369         }
4370     }, {
4371         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4372         'only_matching': True,
4373     }, {
4374         # music album playlist
4375         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4376         'only_matching': True,
4377     }]
4378
4379     @classmethod
4380     def suitable(cls, url):
4381         if YoutubeTabIE.suitable(url):
4382             return False
4383         # Hack for lazy extractors until more generic solution is implemented
4384         # (see #28780)
4385         from .youtube import parse_qs
4386         qs = parse_qs(url)
4387         if qs.get('v', [None])[0]:
4388             return False
4389         return super(YoutubePlaylistIE, cls).suitable(url)
4390
4391     def _real_extract(self, url):
4392         playlist_id = self._match_id(url)
4393         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4394         url = update_url_query(
4395             'https://www.youtube.com/playlist',
4396             parse_qs(url) or {'list': playlist_id})
4397         if is_music_url:
4398             url = smuggle_url(url, {'is_music_url': True})
4399         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4400
4401
4402 class YoutubeYtBeIE(InfoExtractor):
4403     IE_DESC = 'youtu.be'
4404     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4405     _TESTS = [{
4406         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4407         'info_dict': {
4408             'id': 'yeWKywCrFtk',
4409             'ext': 'mp4',
4410             'title': 'Small Scale Baler and Braiding Rugs',
4411             'uploader': 'Backus-Page House Museum',
4412             'uploader_id': 'backuspagemuseum',
4413             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4414             'upload_date': '20161008',
4415             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4416             'categories': ['Nonprofits & Activism'],
4417             'tags': list,
4418             'like_count': int,
4419             'dislike_count': int,
4420         },
4421         'params': {
4422             'noplaylist': True,
4423             'skip_download': True,
4424         },
4425     }, {
4426         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4427         'only_matching': True,
4428     }]
4429
4430     def _real_extract(self, url):
4431         mobj = self._match_valid_url(url)
4432         video_id = mobj.group('id')
4433         playlist_id = mobj.group('playlist_id')
4434         return self.url_result(
4435             update_url_query('https://www.youtube.com/watch', {
4436                 'v': video_id,
4437                 'list': playlist_id,
4438                 'feature': 'youtu.be',
4439             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4440
4441
4442 class YoutubeYtUserIE(InfoExtractor):
4443     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4444     _VALID_URL = r'ytuser:(?P<id>.+)'
4445     _TESTS = [{
4446         'url': 'ytuser:phihag',
4447         'only_matching': True,
4448     }]
4449
4450     def _real_extract(self, url):
4451         user_id = self._match_id(url)
4452         return self.url_result(
4453             'https://www.youtube.com/user/%s' % user_id,
4454             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4455
4456
4457 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4458     IE_NAME = 'youtube:favorites'
4459     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4460     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4461     _LOGIN_REQUIRED = True
4462     _TESTS = [{
4463         'url': ':ytfav',
4464         'only_matching': True,
4465     }, {
4466         'url': ':ytfavorites',
4467         'only_matching': True,
4468     }]
4469
4470     def _real_extract(self, url):
4471         return self.url_result(
4472             'https://www.youtube.com/playlist?list=LL',
4473             ie=YoutubeTabIE.ie_key())
4474
4475
4476 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4477     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4478     # there doesn't appear to be a real limit, for example if you search for
4479     # 'python' you get more than 8.000.000 results
4480     _MAX_RESULTS = float('inf')
4481     IE_NAME = 'youtube:search'
4482     _SEARCH_KEY = 'ytsearch'
4483     _SEARCH_PARAMS = None
4484     _TESTS = []
4485
4486     def _entries(self, query, n):
4487         data = {'query': query}
4488         if self._SEARCH_PARAMS:
4489             data['params'] = self._SEARCH_PARAMS
4490         total = 0
4491         continuation = {}
4492         for page_num in itertools.count(1):
4493             data.update(continuation)
4494             search = self._extract_response(
4495                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4496                 check_get_keys=('contents', 'onResponseReceivedCommands')
4497             )
4498             if not search:
4499                 break
4500             slr_contents = try_get(
4501                 search,
4502                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4503                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4504                 list)
4505             if not slr_contents:
4506                 break
4507
4508             # Youtube sometimes adds promoted content to searches,
4509             # changing the index location of videos and token.
4510             # So we search through all entries till we find them.
4511             continuation = None
4512             for slr_content in slr_contents:
4513                 if not continuation:
4514                     continuation = self._extract_continuation({'contents': [slr_content]})
4515
4516                 isr_contents = try_get(
4517                     slr_content,
4518                     lambda x: x['itemSectionRenderer']['contents'],
4519                     list)
4520                 if not isr_contents:
4521                     continue
4522                 for content in isr_contents:
4523                     if not isinstance(content, dict):
4524                         continue
4525                     video = content.get('videoRenderer')
4526                     if not isinstance(video, dict):
4527                         continue
4528                     video_id = video.get('videoId')
4529                     if not video_id:
4530                         continue
4531
4532                     yield self._extract_video(video)
4533                     total += 1
4534                     if total == n:
4535                         return
4536
4537             if not continuation:
4538                 break
4539
4540     def _get_n_results(self, query, n):
4541         """Get a specified number of results for a query"""
4542         return self.playlist_result(self._entries(query, n), query, query)
4543
4544
4545 class YoutubeSearchDateIE(YoutubeSearchIE):
4546     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4547     _SEARCH_KEY = 'ytsearchdate'
4548     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4549     _SEARCH_PARAMS = 'CAI%3D'
4550
4551
4552 class YoutubeSearchURLIE(YoutubeSearchIE):
4553     IE_DESC = 'YouTube.com search URLs'
4554     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4555     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4556     # _MAX_RESULTS = 100
4557     _TESTS = [{
4558         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4559         'playlist_mincount': 5,
4560         'info_dict': {
4561             'id': 'youtube-dl test video',
4562             'title': 'youtube-dl test video',
4563         }
4564     }, {
4565         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4566         'only_matching': True,
4567     }]
4568
4569     @classmethod
4570     def _make_valid_url(cls):
4571         return cls._VALID_URL
4572
4573     def _real_extract(self, url):
4574         qs = parse_qs(url)
4575         query = (qs.get('search_query') or qs.get('q'))[0]
4576         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4577         return self._get_n_results(query, self._MAX_RESULTS)
4578
4579
4580 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4581     """
4582     Base class for feed extractors
4583     Subclasses must define the _FEED_NAME property.
4584     """
4585     _LOGIN_REQUIRED = True
4586     _TESTS = []
4587
4588     @property
4589     def IE_NAME(self):
4590         return 'youtube:%s' % self._FEED_NAME
4591
4592     def _real_extract(self, url):
4593         return self.url_result(
4594             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4595             ie=YoutubeTabIE.ie_key())
4596
4597
4598 class YoutubeWatchLaterIE(InfoExtractor):
4599     IE_NAME = 'youtube:watchlater'
4600     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4601     _VALID_URL = r':ytwatchlater'
4602     _TESTS = [{
4603         'url': ':ytwatchlater',
4604         'only_matching': True,
4605     }]
4606
4607     def _real_extract(self, url):
4608         return self.url_result(
4609             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4610
4611
4612 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4613     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4614     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4615     _FEED_NAME = 'recommended'
4616     _LOGIN_REQUIRED = False
4617     _TESTS = [{
4618         'url': ':ytrec',
4619         'only_matching': True,
4620     }, {
4621         'url': ':ytrecommended',
4622         'only_matching': True,
4623     }, {
4624         'url': 'https://youtube.com',
4625         'only_matching': True,
4626     }]
4627
4628
4629 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4630     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4631     _VALID_URL = r':ytsub(?:scription)?s?'
4632     _FEED_NAME = 'subscriptions'
4633     _TESTS = [{
4634         'url': ':ytsubs',
4635         'only_matching': True,
4636     }, {
4637         'url': ':ytsubscriptions',
4638         'only_matching': True,
4639     }]
4640
4641
4642 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4643     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4644     _VALID_URL = r':ythis(?:tory)?'
4645     _FEED_NAME = 'history'
4646     _TESTS = [{
4647         'url': ':ythistory',
4648         'only_matching': True,
4649     }]
4650
4651
4652 class YoutubeTruncatedURLIE(InfoExtractor):
4653     IE_NAME = 'youtube:truncated_url'
4654     IE_DESC = False  # Do not list
4655     _VALID_URL = r'''(?x)
4656         (?:https?://)?
4657         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4658         (?:watch\?(?:
4659             feature=[a-z_]+|
4660             annotation_id=annotation_[^&]+|
4661             x-yt-cl=[0-9]+|
4662             hl=[^&]*|
4663             t=[0-9]+
4664         )?
4665         |
4666             attribution_link\?a=[^&]+
4667         )
4668         $
4669     '''
4670
4671     _TESTS = [{
4672         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4673         'only_matching': True,
4674     }, {
4675         'url': 'https://www.youtube.com/watch?',
4676         'only_matching': True,
4677     }, {
4678         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4679         'only_matching': True,
4680     }, {
4681         'url': 'https://www.youtube.com/watch?feature=foo',
4682         'only_matching': True,
4683     }, {
4684         'url': 'https://www.youtube.com/watch?hl=en-GB',
4685         'only_matching': True,
4686     }, {
4687         'url': 'https://www.youtube.com/watch?t=2372',
4688         'only_matching': True,
4689     }]
4690
4691     def _real_extract(self, url):
4692         raise ExtractorError(
4693             'Did you forget to quote the URL? Remember that & is a meta '
4694             'character in most shells, so you want to put the URL in quotes, '
4695             'like  youtube-dl '
4696             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4697             ' or simply  youtube-dl BaW_jenozKc  .',
4698             expected=True)
4699
4700
4701 class YoutubeTruncatedIDIE(InfoExtractor):
4702     IE_NAME = 'youtube:truncated_id'
4703     IE_DESC = False  # Do not list
4704     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4705
4706     _TESTS = [{
4707         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4708         'only_matching': True,
4709     }]
4710
4711     def _real_extract(self, url):
4712         video_id = self._match_id(url)
4713         raise ExtractorError(
4714             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4715             expected=True)