yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     mimetype2ext,
  42     orderedSet,
  43     parse_codecs,
  44     parse_count,
  45     parse_duration,
  46     qualities,
  47     remove_start,
  48     smuggle_url,
  49     str_or_none,
  50     str_to_int,
  51     traverse_obj,
  52     try_get,
  53     unescapeHTML,
  54     unified_strdate,
  55     unsmuggle_url,
  56     update_url_query,
  57     url_or_none,
  58     urlencode_postdata,
  59     urljoin,
  60     variadic,
  61 )
  62
  63
  64 def parse_qs(url):
  65     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  66
  67
  68 class YoutubeBaseInfoExtractor(InfoExtractor):
  69     """Provide base functions for Youtube extractors"""
  70     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  71     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  72
  73     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  74     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  75     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  76
  77     _RESERVED_NAMES = (
  78         r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
  79         r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
  80         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
  81
  82     _NETRC_MACHINE = 'youtube'
  83     # If True it will raise an error if no login info is provided
  84     _LOGIN_REQUIRED = False
  85
  86     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  87
  88     def _login(self):
  89         """
  90         Attempt to log in to YouTube.
  91         True is returned if successful or skipped.
  92         False is returned if login failed.
  93
  94         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  95         """
  96
  97         def warn(message):
  98             self.report_warning(message)
  99
 100         # username+password login is broken
 101         if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 102             self.raise_login_required(
 103                 'Login details are needed to download this content', method='cookies')
 104         username, password = self._get_login_info()
 105         if username:
 106             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 107         return
 108
 109         # Everything below this is broken!
 110         r'''
 111         # No authentication to be performed
 112         if username is None:
 113             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 114                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 115             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 116             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 117             return True
 118
 119         login_page = self._download_webpage(
 120             self._LOGIN_URL, None,
 121             note='Downloading login page',
 122             errnote='unable to fetch login page', fatal=False)
 123         if login_page is False:
 124             return
 125
 126         login_form = self._hidden_inputs(login_page)
 127
 128         def req(url, f_req, note, errnote):
 129             data = login_form.copy()
 130             data.update({
 131                 'pstMsg': 1,
 132                 'checkConnection': 'youtube',
 133                 'checkedDomains': 'youtube',
 134                 'hl': 'en',
 135                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 136                 'f.req': json.dumps(f_req),
 137                 'flowName': 'GlifWebSignIn',
 138                 'flowEntry': 'ServiceLogin',
 139                 # TODO: reverse actual botguard identifier generation algo
 140                 'bgRequest': '["identifier",""]',
 141             })
 142             return self._download_json(
 143                 url, None, note=note, errnote=errnote,
 144                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 145                 fatal=False,
 146                 data=urlencode_postdata(data), headers={
 147                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 148                     'Google-Accounts-XSRF': 1,
 149                 })
 150
 151         lookup_req = [
 152             username,
 153             None, [], None, 'US', None, None, 2, False, True,
 154             [
 155                 None, None,
 156                 [2, 1, None, 1,
 157                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 158                  None, [], 4],
 159                 1, [None, None, []], None, None, None, True
 160             ],
 161             username,
 162         ]
 163
 164         lookup_results = req(
 165             self._LOOKUP_URL, lookup_req,
 166             'Looking up account info', 'Unable to look up account info')
 167
 168         if lookup_results is False:
 169             return False
 170
 171         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 172         if not user_hash:
 173             warn('Unable to extract user hash')
 174             return False
 175
 176         challenge_req = [
 177             user_hash,
 178             None, 1, None, [1, None, None, None, [password, None, True]],
 179             [
 180                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 181                 1, [None, None, []], None, None, None, True
 182             ]]
 183
 184         challenge_results = req(
 185             self._CHALLENGE_URL, challenge_req,
 186             'Logging in', 'Unable to log in')
 187
 188         if challenge_results is False:
 189             return
 190
 191         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 192         if login_res:
 193             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 194             warn(
 195                 'Unable to login: %s' % 'Invalid password'
 196                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 197             return False
 198
 199         res = try_get(challenge_results, lambda x: x[0][-1], list)
 200         if not res:
 201             warn('Unable to extract result entry')
 202             return False
 203
 204         login_challenge = try_get(res, lambda x: x[0][0], list)
 205         if login_challenge:
 206             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 207             if challenge_str == 'TWO_STEP_VERIFICATION':
 208                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 209                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 210                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 211                 if status == 'QUOTA_EXCEEDED':
 212                     warn('Exceeded the limit of TFA codes, try later')
 213                     return False
 214
 215                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 216                 if not tl:
 217                     warn('Unable to extract TL')
 218                     return False
 219
 220                 tfa_code = self._get_tfa_info('2-step verification code')
 221
 222                 if not tfa_code:
 223                     warn(
 224                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 225                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 226                     return False
 227
 228                 tfa_code = remove_start(tfa_code, 'G-')
 229
 230                 tfa_req = [
 231                     user_hash, None, 2, None,
 232                     [
 233                         9, None, None, None, None, None, None, None,
 234                         [None, tfa_code, True, 2]
 235                     ]]
 236
 237                 tfa_results = req(
 238                     self._TFA_URL.format(tl), tfa_req,
 239                     'Submitting TFA code', 'Unable to submit TFA code')
 240
 241                 if tfa_results is False:
 242                     return False
 243
 244                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 245                 if tfa_res:
 246                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 247                     warn(
 248                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 249                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 250                     return False
 251
 252                 check_cookie_url = try_get(
 253                     tfa_results, lambda x: x[0][-1][2], compat_str)
 254             else:
 255                 CHALLENGES = {
 256                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 257                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 258                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 259                 }
 260                 challenge = CHALLENGES.get(
 261                     challenge_str,
 262                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 263                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 264                 return False
 265         else:
 266             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 267
 268         if not check_cookie_url:
 269             warn('Unable to extract CheckCookie URL')
 270             return False
 271
 272         check_cookie_results = self._download_webpage(
 273             check_cookie_url, None, 'Checking cookie', fatal=False)
 274
 275         if check_cookie_results is False:
 276             return False
 277
 278         if 'https://myaccount.google.com/' not in check_cookie_results:
 279             warn('Unable to log in')
 280             return False
 281
 282         return True
 283         '''
 284
 285     def _initialize_consent(self):
 286         cookies = self._get_cookies('https://www.youtube.com/')
 287         if cookies.get('__Secure-3PSID'):
 288             return
 289         consent_id = None
 290         consent = cookies.get('CONSENT')
 291         if consent:
 292             if 'YES' in consent.value:
 293                 return
 294             consent_id = self._search_regex(
 295                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 296         if not consent_id:
 297             consent_id = random.randint(100, 999)
 298         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 299
 300     def _real_initialize(self):
 301         self._initialize_consent()
 302         if self._downloader is None:
 303             return
 304         if not self._login():
 305             return
 306
 307     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 308     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 309     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 310
 311     _YT_DEFAULT_YTCFGS = {
 312         'WEB': {
 313             'INNERTUBE_API_VERSION': 'v1',
 314             'INNERTUBE_CLIENT_NAME': 'WEB',
 315             'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
 316             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 317             'INNERTUBE_CONTEXT': {
 318                 'client': {
 319                     'clientName': 'WEB',
 320                     'clientVersion': '2.20210622.10.00',
 321                     'hl': 'en',
 322                 }
 323             },
 324             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
 325         },
 326         'WEB_REMIX': {
 327             'INNERTUBE_API_VERSION': 'v1',
 328             'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
 329             'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
 330             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 331             'INNERTUBE_CONTEXT': {
 332                 'client': {
 333                     'clientName': 'WEB_REMIX',
 334                     'clientVersion': '1.20210621.00.00',
 335                     'hl': 'en',
 336                 }
 337             },
 338             'INNERTUBE_CONTEXT_CLIENT_NAME': 67
 339         },
 340         'WEB_EMBEDDED_PLAYER': {
 341             'INNERTUBE_API_VERSION': 'v1',
 342             'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
 343             'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
 344             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 345             'INNERTUBE_CONTEXT': {
 346                 'client': {
 347                     'clientName': 'WEB_EMBEDDED_PLAYER',
 348                     'clientVersion': '1.20210620.0.1',
 349                     'hl': 'en',
 350                 }
 351             },
 352             'INNERTUBE_CONTEXT_CLIENT_NAME': 56
 353         },
 354         'ANDROID': {
 355             'INNERTUBE_API_VERSION': 'v1',
 356             'INNERTUBE_CLIENT_NAME': 'ANDROID',
 357             'INNERTUBE_CLIENT_VERSION': '16.20',
 358             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 359             'INNERTUBE_CONTEXT': {
 360                 'client': {
 361                     'clientName': 'ANDROID',
 362                     'clientVersion': '16.20',
 363                     'hl': 'en',
 364                 }
 365             },
 366             'INNERTUBE_CONTEXT_CLIENT_NAME': 3
 367         },
 368         'ANDROID_EMBEDDED_PLAYER': {
 369             'INNERTUBE_API_VERSION': 'v1',
 370             'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
 371             'INNERTUBE_CLIENT_VERSION': '16.20',
 372             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 373             'INNERTUBE_CONTEXT': {
 374                 'client': {
 375                     'clientName': 'ANDROID_EMBEDDED_PLAYER',
 376                     'clientVersion': '16.20',
 377                     'hl': 'en',
 378                 }
 379             },
 380             'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 381         },
 382         'ANDROID_MUSIC': {
 383             'INNERTUBE_API_VERSION': 'v1',
 384             'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
 385             'INNERTUBE_CLIENT_VERSION': '4.32',
 386             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 387             'INNERTUBE_CONTEXT': {
 388                 'client': {
 389                     'clientName': 'ANDROID_MUSIC',
 390                     'clientVersion': '4.32',
 391                     'hl': 'en',
 392                 }
 393             },
 394             'INNERTUBE_CONTEXT_CLIENT_NAME': 21
 395         },
 396         'IOS': {
 397             'INNERTUBE_API_VERSION': 'v1',
 398             'INNERTUBE_CLIENT_NAME': 'IOS',
 399             'INNERTUBE_CLIENT_VERSION': '16.20',
 400             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 401             'INNERTUBE_CONTEXT': {
 402                 'client': {
 403                     'clientName': 'IOS',
 404                     'clientVersion': '16.20',
 405                     'hl': 'en',
 406                 }
 407             },
 408             'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 409
 410         },
 411         'IOS_MUSIC': {
 412             'INNERTUBE_API_VERSION': 'v1',
 413             'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
 414             'INNERTUBE_CLIENT_VERSION': '4.32',
 415             'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 416             'INNERTUBE_CONTEXT': {
 417                 'client': {
 418                     'clientName': 'IOS_MUSIC',
 419                     'clientVersion': '4.32',
 420                     'hl': 'en',
 421                 }
 422             },
 423             'INNERTUBE_CONTEXT_CLIENT_NAME': 26
 424         },
 425         'IOS_MESSAGES_EXTENSION': {
 426             'INNERTUBE_API_VERSION': 'v1',
 427             'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
 428             'INNERTUBE_CLIENT_VERSION': '16.20',
 429             'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 430             'INNERTUBE_CONTEXT': {
 431                 'client': {
 432                     'clientName': 'IOS_MESSAGES_EXTENSION',
 433                     'clientVersion': '16.20',
 434                     'hl': 'en',
 435                 }
 436             },
 437             'INNERTUBE_CONTEXT_CLIENT_NAME': 66
 438         }
 439     }
 440
 441     _YT_DEFAULT_INNERTUBE_HOSTS = {
 442         'DIRECT': 'youtubei.googleapis.com',
 443         'WEB': 'www.youtube.com',
 444         'WEB_REMIX': 'music.youtube.com',
 445         'ANDROID_MUSIC': 'music.youtube.com'
 446     }
 447
 448     # clients starting with _ cannot be explicity requested by the user
 449     _YT_CLIENTS = {
 450         'web': 'WEB',
 451         'web_music': 'WEB_REMIX',
 452         '_web_embedded': 'WEB_EMBEDDED_PLAYER',
 453         '_web_agegate': 'TVHTML5',
 454         'android': 'ANDROID',
 455         'android_music': 'ANDROID_MUSIC',
 456         '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
 457         '_android_agegate': 'ANDROID',
 458         'ios': 'IOS',
 459         'ios_music': 'IOS_MUSIC',
 460         '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
 461         '_ios_agegate': 'IOS'
 462     }
 463
 464     def _get_default_ytcfg(self, client='WEB'):
 465         if client in self._YT_DEFAULT_YTCFGS:
 466             return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
 467         self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
 468         return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
 469
 470     def _get_innertube_host(self, client='WEB'):
 471         return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
 472
 473     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
 474         # try_get but with fallback to default ytcfg client values when present
 475         _func = lambda y: try_get(y, getter, expected_type)
 476         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 477
 478     def _extract_client_name(self, ytcfg, default_client='WEB'):
 479         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
 480
 481     @staticmethod
 482     def _extract_session_index(*data):
 483         for ytcfg in data:
 484             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 485             if session_index is not None:
 486                 return session_index
 487
 488     def _extract_client_version(self, ytcfg, default_client='WEB'):
 489         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
 490
 491     def _extract_api_key(self, ytcfg=None, default_client='WEB'):
 492         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 493
 494     def _extract_context(self, ytcfg=None, default_client='WEB'):
 495         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 496         context = _get_context(ytcfg)
 497         if context:
 498             return context
 499
 500         context = _get_context(self._get_default_ytcfg(default_client))
 501         if not ytcfg:
 502             return context
 503
 504         # Recreate the client context (required)
 505         context['client'].update({
 506             'clientVersion': self._extract_client_version(ytcfg, default_client),
 507             'clientName': self._extract_client_name(ytcfg, default_client),
 508         })
 509         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 510         if visitor_data:
 511             context['client']['visitorData'] = visitor_data
 512         return context
 513
 514     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 515         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 516         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 517         yt_cookies = self._get_cookies('https://www.youtube.com')
 518         sapisid_cookie = dict_get(
 519             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 520         if sapisid_cookie is None:
 521             return
 522         time_now = round(time.time())
 523         # SAPISID cookie is required if not already present
 524         if not yt_cookies.get('SAPISID'):
 525             self._set_cookie(
 526                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 527         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 528         sapisidhash = hashlib.sha1(
 529             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 530         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 531
 532     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 533                   note='Downloading API JSON', errnote='Unable to download API page',
 534                   context=None, api_key=None, api_hostname=None, default_client='WEB'):
 535
 536         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 537         data.update(query)
 538         real_headers = self.generate_api_headers(default_client=default_client)
 539         real_headers.update({'content-type': 'application/json'})
 540         if headers:
 541             real_headers.update(headers)
 542         return self._download_json(
 543             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 544             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 545             data=json.dumps(data).encode('utf8'), headers=real_headers,
 546             query={'key': api_key or self._extract_api_key()})
 547
 548     def extract_yt_initial_data(self, video_id, webpage):
 549         return self._parse_json(
 550             self._search_regex(
 551                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 552                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 553             video_id)
 554
 555     def _extract_identity_token(self, webpage, item_id):
 556         if not webpage:
 557             return None
 558         ytcfg = self.extract_ytcfg(item_id, webpage)
 559         if ytcfg:
 560             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 561             if token:
 562                 return token
 563         return self._search_regex(
 564             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 565             'identity token', default=None)
 566
 567     @staticmethod
 568     def _extract_account_syncid(*args):
 569         """
 570         Extract syncId required to download private playlists of secondary channels
 571         @params response and/or ytcfg
 572         """
 573         for data in args:
 574             # ytcfg includes channel_syncid if on secondary channel
 575             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 576             if delegated_sid:
 577                 return delegated_sid
 578             sync_ids = (try_get(
 579                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 580                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 581             if len(sync_ids) >= 2 and sync_ids[1]:
 582                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 583                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 584                 return sync_ids[0]
 585
 586     def extract_ytcfg(self, video_id, webpage):
 587         if not webpage:
 588             return {}
 589         return self._parse_json(
 590             self._search_regex(
 591                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 592                 default='{}'), video_id, fatal=False) or {}
 593
 594     def generate_api_headers(
 595             self, ytcfg=None, identity_token=None, account_syncid=None,
 596             visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
 597         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 598         headers = {
 599             'X-YouTube-Client-Name': compat_str(
 600                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 601             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 602             'Origin': origin
 603         }
 604         if not visitor_data and ytcfg:
 605             visitor_data = try_get(
 606                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 607         if identity_token:
 608             headers['X-Youtube-Identity-Token'] = identity_token
 609         if account_syncid:
 610             headers['X-Goog-PageId'] = account_syncid
 611         if session_index is None and ytcfg:
 612             session_index = self._extract_session_index(ytcfg)
 613         if account_syncid or session_index is not None:
 614             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 615         if visitor_data:
 616             headers['X-Goog-Visitor-Id'] = visitor_data
 617         auth = self._generate_sapisidhash_header(origin)
 618         if auth is not None:
 619             headers['Authorization'] = auth
 620             headers['X-Origin'] = origin
 621         return headers
 622
 623     @staticmethod
 624     def _build_api_continuation_query(continuation, ctp=None):
 625         query = {
 626             'continuation': continuation
 627         }
 628         # TODO: Inconsistency with clickTrackingParams.
 629         # Currently we have a fixed ctp contained within context (from ytcfg)
 630         # and a ctp in root query for continuation.
 631         if ctp:
 632             query['clickTracking'] = {'clickTrackingParams': ctp}
 633         return query
 634
 635     @classmethod
 636     def _extract_next_continuation_data(cls, renderer):
 637         next_continuation = try_get(
 638             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 639                        lambda x: x['continuation']['reloadContinuationData']), dict)
 640         if not next_continuation:
 641             return
 642         continuation = next_continuation.get('continuation')
 643         if not continuation:
 644             return
 645         ctp = next_continuation.get('clickTrackingParams')
 646         return cls._build_api_continuation_query(continuation, ctp)
 647
 648     @classmethod
 649     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 650         if isinstance(continuation_ep, dict):
 651             continuation = try_get(
 652                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 653             if not continuation:
 654                 return
 655             ctp = continuation_ep.get('clickTrackingParams')
 656             return cls._build_api_continuation_query(continuation, ctp)
 657
 658     @classmethod
 659     def _extract_continuation(cls, renderer):
 660         next_continuation = cls._extract_next_continuation_data(renderer)
 661         if next_continuation:
 662             return next_continuation
 663
 664         contents = []
 665         for key in ('contents', 'items'):
 666             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 667
 668         for content in contents:
 669             if not isinstance(content, dict):
 670                 continue
 671             continuation_ep = try_get(
 672                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 673                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 674                 dict)
 675             continuation = cls._extract_continuation_ep_data(continuation_ep)
 676             if continuation:
 677                 return continuation
 678
 679     @classmethod
 680     def _extract_alerts(cls, data):
 681         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 682             if not isinstance(alert_dict, dict):
 683                 continue
 684             for alert in alert_dict.values():
 685                 alert_type = alert.get('type')
 686                 if not alert_type:
 687                     continue
 688                 message = cls._get_text(alert.get('text'))
 689                 if message:
 690                     yield alert_type, message
 691
 692     def _report_alerts(self, alerts, expected=True):
 693         errors = []
 694         warnings = []
 695         for alert_type, alert_message in alerts:
 696             if alert_type.lower() == 'error':
 697                 errors.append([alert_type, alert_message])
 698             else:
 699                 warnings.append([alert_type, alert_message])
 700
 701         for alert_type, alert_message in (warnings + errors[:-1]):
 702             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 703         if errors:
 704             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 705
 706     def _extract_and_report_alerts(self, data, *args, **kwargs):
 707         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 708
 709     def _extract_badges(self, renderer: dict):
 710         badges = set()
 711         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 712             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 713             if label:
 714                 badges.add(label.lower())
 715         return badges
 716
 717     @staticmethod
 718     def _get_text(data, getter=None, max_runs=None):
 719         for get in variadic(getter):
 720             d = try_get(data, get) if get is not None else data
 721             text = try_get(d, lambda x: x['simpleText'], compat_str)
 722             if text:
 723                 return text
 724             runs = try_get(d, lambda x: x['runs'], list) or []
 725             if not runs and isinstance(d, list):
 726                 runs = d
 727
 728             def get_runs(runs):
 729                 for run in runs[:min(len(runs), max_runs or len(runs))]:
 730                     yield try_get(run, lambda x: x['text'], compat_str) or ''
 731
 732             text = ''.join(get_runs(runs))
 733             if text:
 734                 return text
 735
 736     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 737                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 738                           default_client='WEB'):
 739         response = None
 740         last_error = None
 741         count = -1
 742         retries = self.get_param('extractor_retries', 3)
 743         if check_get_keys is None:
 744             check_get_keys = []
 745         while count < retries:
 746             count += 1
 747             if last_error:
 748                 self.report_warning('%s. Retrying ...' % last_error)
 749             try:
 750                 response = self._call_api(
 751                     ep=ep, fatal=True, headers=headers,
 752                     video_id=item_id, query=query,
 753                     context=self._extract_context(ytcfg, default_client),
 754                     api_key=self._extract_api_key(ytcfg, default_client),
 755                     api_hostname=api_hostname, default_client=default_client,
 756                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 757             except ExtractorError as e:
 758                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
 759                     # Downloading page may result in intermittent 5xx HTTP error
 760                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 761                     last_error = 'HTTP Error %s' % e.cause.code
 762                     if count < retries:
 763                         continue
 764                 if fatal:
 765                     raise
 766                 else:
 767                     self.report_warning(error_to_compat_str(e))
 768                     return
 769
 770             else:
 771                 # Youtube may send alerts if there was an issue with the continuation page
 772                 try:
 773                     self._extract_and_report_alerts(response, expected=False)
 774                 except ExtractorError as e:
 775                     if fatal:
 776                         raise
 777                     self.report_warning(error_to_compat_str(e))
 778                     return
 779                 if not check_get_keys or dict_get(response, check_get_keys):
 780                     break
 781                 # Youtube sometimes sends incomplete data
 782                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 783                 last_error = 'Incomplete data received'
 784                 if count >= retries:
 785                     if fatal:
 786                         raise ExtractorError(last_error)
 787                     else:
 788                         self.report_warning(last_error)
 789                         return
 790         return response
 791
 792     @staticmethod
 793     def is_music_url(url):
 794         return re.match(r'https?://music\.youtube\.com/', url) is not None
 795
 796     def _extract_video(self, renderer):
 797         video_id = renderer.get('videoId')
 798         title = self._get_text(renderer.get('title'))
 799         description = self._get_text(renderer.get('descriptionSnippet'))
 800         duration = parse_duration(self._get_text(renderer.get('lengthText')))
 801         view_count_text = self._get_text(renderer.get('viewCountText')) or ''
 802         view_count = str_to_int(self._search_regex(
 803             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 804             'view count', default=None))
 805
 806         uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
 807
 808         return {
 809             '_type': 'url',
 810             'ie_key': YoutubeIE.ie_key(),
 811             'id': video_id,
 812             'url': video_id,
 813             'title': title,
 814             'description': description,
 815             'duration': duration,
 816             'view_count': view_count,
 817             'uploader': uploader,
 818         }
 819
 820
 821 class YoutubeIE(YoutubeBaseInfoExtractor):
 822     IE_DESC = 'YouTube.com'
 823     _INVIDIOUS_SITES = (
 824         # invidious-redirect websites
 825         r'(?:www\.)?redirect\.invidious\.io',
 826         r'(?:(?:www|dev)\.)?invidio\.us',
 827         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 828         r'(?:www\.)?invidious\.pussthecat\.org',
 829         r'(?:www\.)?invidious\.zee\.li',
 830         r'(?:www\.)?invidious\.ethibox\.fr',
 831         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 832         # youtube-dl invidious instances list
 833         r'(?:(?:www|no)\.)?invidiou\.sh',
 834         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 835         r'(?:www\.)?invidious\.kabi\.tk',
 836         r'(?:www\.)?invidious\.mastodon\.host',
 837         r'(?:www\.)?invidious\.zapashcanon\.fr',
 838         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 839         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 840         r'(?:www\.)?invidious\.himiko\.cloud',
 841         r'(?:www\.)?invidious\.reallyancient\.tech',
 842         r'(?:www\.)?invidious\.tube',
 843         r'(?:www\.)?invidiou\.site',
 844         r'(?:www\.)?invidious\.site',
 845         r'(?:www\.)?invidious\.xyz',
 846         r'(?:www\.)?invidious\.nixnet\.xyz',
 847         r'(?:www\.)?invidious\.048596\.xyz',
 848         r'(?:www\.)?invidious\.drycat\.fr',
 849         r'(?:www\.)?inv\.skyn3t\.in',
 850         r'(?:www\.)?tube\.poal\.co',
 851         r'(?:www\.)?tube\.connect\.cafe',
 852         r'(?:www\.)?vid\.wxzm\.sx',
 853         r'(?:www\.)?vid\.mint\.lgbt',
 854         r'(?:www\.)?vid\.puffyan\.us',
 855         r'(?:www\.)?yewtu\.be',
 856         r'(?:www\.)?yt\.elukerio\.org',
 857         r'(?:www\.)?yt\.lelux\.fi',
 858         r'(?:www\.)?invidious\.ggc-project\.de',
 859         r'(?:www\.)?yt\.maisputain\.ovh',
 860         r'(?:www\.)?ytprivate\.com',
 861         r'(?:www\.)?invidious\.13ad\.de',
 862         r'(?:www\.)?invidious\.toot\.koeln',
 863         r'(?:www\.)?invidious\.fdn\.fr',
 864         r'(?:www\.)?watch\.nettohikari\.com',
 865         r'(?:www\.)?invidious\.namazso\.eu',
 866         r'(?:www\.)?invidious\.silkky\.cloud',
 867         r'(?:www\.)?invidious\.exonip\.de',
 868         r'(?:www\.)?invidious\.riverside\.rocks',
 869         r'(?:www\.)?invidious\.blamefran\.net',
 870         r'(?:www\.)?invidious\.moomoo\.de',
 871         r'(?:www\.)?ytb\.trom\.tf',
 872         r'(?:www\.)?yt\.cyberhost\.uk',
 873         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 874         r'(?:www\.)?qklhadlycap4cnod\.onion',
 875         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 876         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 877         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 878         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 879         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 880         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 881         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 882         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 883         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 884         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 885     )
 886     _VALID_URL = r"""(?x)^
 887                      (
 888                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 889                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 890                             (?:www\.)?deturl\.com/www\.youtube\.com|
 891                             (?:www\.)?pwnyoutube\.com|
 892                             (?:www\.)?hooktube\.com|
 893                             (?:www\.)?yourepeat\.com|
 894                             tube\.majestyc\.net|
 895                             %(invidious)s|
 896                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 897                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 898                          (?:                                                  # the various things that can precede the ID:
 899                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 900                              |(?:                                             # or the v= param in all its forms
 901                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 902                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 903                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 904                                  v=
 905                              )
 906                          ))
 907                          |(?:
 908                             youtu\.be|                                        # just youtu.be/xxxx
 909                             vid\.plus|                                        # or vid.plus/xxxx
 910                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 911                             %(invidious)s
 912                          )/
 913                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 914                          )
 915                      )?                                                       # all until now is optional -> you can pass the naked ID
 916                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 917                      (?(1).+)?                                                # if we found the ID, everything can follow
 918                      (?:\#|$)""" % {
 919         'invidious': '|'.join(_INVIDIOUS_SITES),
 920     }
 921     _PLAYER_INFO_RE = (
 922         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 923         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 924         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 925     )
 926     _formats = {
 927         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 928         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 929         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 930         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 931         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 932         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 933         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 934         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 935         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 936         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 937         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 938         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 939         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 940         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 941         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 942         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 943         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 944         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 945
 946
 947         # 3D videos
 948         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 949         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 950         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 951         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 952         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 953         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 954         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 955
 956         # Apple HTTP Live Streaming
 957         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 958         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 959         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 960         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 961         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 962         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 963         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 964         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 965
 966         # DASH mp4 video
 967         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 968         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 969         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 970         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 971         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 972         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 973         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 974         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 975         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 976         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 977         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 978         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 979
 980         # Dash mp4 audio
 981         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 982         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 983         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 984         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 985         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 986         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 987         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 988
 989         # Dash webm
 990         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 991         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 992         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 993         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 994         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 995         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 996         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 997         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 998         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 999         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1000         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1001         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1002         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1003         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1004         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1005         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1006         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1007         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1008         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1009         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1010         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1012
1013         # Dash webm audio
1014         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1015         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1016
1017         # Dash webm audio with opus inside
1018         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1019         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1020         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1021
1022         # RTMP (unnamed)
1023         '_rtmp': {'protocol': 'rtmp'},
1024
1025         # av01 video only formats sometimes served with "unknown" codecs
1026         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1027         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1028         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1029         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1030     }
1031     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1032
1033     _AGE_GATE_REASONS = (
1034         'Sign in to confirm your age',
1035         'This video may be inappropriate for some users.',
1036         'Sorry, this content is age-restricted.')
1037
1038     _GEO_BYPASS = False
1039
1040     IE_NAME = 'youtube'
1041     _TESTS = [
1042         {
1043             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1044             'info_dict': {
1045                 'id': 'BaW_jenozKc',
1046                 'ext': 'mp4',
1047                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1048                 'uploader': 'Philipp Hagemeister',
1049                 'uploader_id': 'phihag',
1050                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1051                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1052                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1053                 'upload_date': '20121002',
1054                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1055                 'categories': ['Science & Technology'],
1056                 'tags': ['youtube-dl'],
1057                 'duration': 10,
1058                 'view_count': int,
1059                 'like_count': int,
1060                 'dislike_count': int,
1061                 'start_time': 1,
1062                 'end_time': 9,
1063             }
1064         },
1065         {
1066             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1067             'note': 'Embed-only video (#1746)',
1068             'info_dict': {
1069                 'id': 'yZIXLfi8CZQ',
1070                 'ext': 'mp4',
1071                 'upload_date': '20120608',
1072                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1073                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1074                 'uploader': 'SET India',
1075                 'uploader_id': 'setindia',
1076                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1077                 'age_limit': 18,
1078             },
1079             'skip': 'Private video',
1080         },
1081         {
1082             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1083             'note': 'Use the first video ID in the URL',
1084             'info_dict': {
1085                 'id': 'BaW_jenozKc',
1086                 'ext': 'mp4',
1087                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1088                 'uploader': 'Philipp Hagemeister',
1089                 'uploader_id': 'phihag',
1090                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1091                 'upload_date': '20121002',
1092                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1093                 'categories': ['Science & Technology'],
1094                 'tags': ['youtube-dl'],
1095                 'duration': 10,
1096                 'view_count': int,
1097                 'like_count': int,
1098                 'dislike_count': int,
1099             },
1100             'params': {
1101                 'skip_download': True,
1102             },
1103         },
1104         {
1105             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1106             'note': '256k DASH audio (format 141) via DASH manifest',
1107             'info_dict': {
1108                 'id': 'a9LDPn-MO4I',
1109                 'ext': 'm4a',
1110                 'upload_date': '20121002',
1111                 'uploader_id': '8KVIDEO',
1112                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1113                 'description': '',
1114                 'uploader': '8KVIDEO',
1115                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1116             },
1117             'params': {
1118                 'youtube_include_dash_manifest': True,
1119                 'format': '141',
1120             },
1121             'skip': 'format 141 not served anymore',
1122         },
1123         # DASH manifest with encrypted signature
1124         {
1125             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1126             'info_dict': {
1127                 'id': 'IB3lcPjvWLA',
1128                 'ext': 'm4a',
1129                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1130                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1131                 'duration': 244,
1132                 'uploader': 'AfrojackVEVO',
1133                 'uploader_id': 'AfrojackVEVO',
1134                 'upload_date': '20131011',
1135                 'abr': 129.495,
1136             },
1137             'params': {
1138                 'youtube_include_dash_manifest': True,
1139                 'format': '141/bestaudio[ext=m4a]',
1140             },
1141         },
1142         # Normal age-gate video (embed allowed)
1143         {
1144             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1145             'info_dict': {
1146                 'id': 'HtVdAasjOgU',
1147                 'ext': 'mp4',
1148                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1149                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1150                 'duration': 142,
1151                 'uploader': 'The Witcher',
1152                 'uploader_id': 'WitcherGame',
1153                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1154                 'upload_date': '20140605',
1155                 'age_limit': 18,
1156             },
1157         },
1158         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1159         # YouTube Red ad is not captured for creator
1160         {
1161             'url': '__2ABJjxzNo',
1162             'info_dict': {
1163                 'id': '__2ABJjxzNo',
1164                 'ext': 'mp4',
1165                 'duration': 266,
1166                 'upload_date': '20100430',
1167                 'uploader_id': 'deadmau5',
1168                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1169                 'creator': 'deadmau5',
1170                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1171                 'uploader': 'deadmau5',
1172                 'title': 'Deadmau5 - Some Chords (HD)',
1173                 'alt_title': 'Some Chords',
1174             },
1175             'expected_warnings': [
1176                 'DASH manifest missing',
1177             ]
1178         },
1179         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1180         {
1181             'url': 'lqQg6PlCWgI',
1182             'info_dict': {
1183                 'id': 'lqQg6PlCWgI',
1184                 'ext': 'mp4',
1185                 'duration': 6085,
1186                 'upload_date': '20150827',
1187                 'uploader_id': 'olympic',
1188                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1189                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1190                 'uploader': 'Olympics',
1191                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1192             },
1193             'params': {
1194                 'skip_download': 'requires avconv',
1195             }
1196         },
1197         # Non-square pixels
1198         {
1199             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1200             'info_dict': {
1201                 'id': '_b-2C3KPAM0',
1202                 'ext': 'mp4',
1203                 'stretched_ratio': 16 / 9.,
1204                 'duration': 85,
1205                 'upload_date': '20110310',
1206                 'uploader_id': 'AllenMeow',
1207                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1208                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1209                 'uploader': '孫ᄋᄅ',
1210                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1211             },
1212         },
1213         # url_encoded_fmt_stream_map is empty string
1214         {
1215             'url': 'qEJwOuvDf7I',
1216             'info_dict': {
1217                 'id': 'qEJwOuvDf7I',
1218                 'ext': 'webm',
1219                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1220                 'description': '',
1221                 'upload_date': '20150404',
1222                 'uploader_id': 'spbelect',
1223                 'uploader': 'Наблюдатели Петербурга',
1224             },
1225             'params': {
1226                 'skip_download': 'requires avconv',
1227             },
1228             'skip': 'This live event has ended.',
1229         },
1230         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1231         {
1232             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1233             'info_dict': {
1234                 'id': 'FIl7x6_3R5Y',
1235                 'ext': 'webm',
1236                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1237                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1238                 'duration': 220,
1239                 'upload_date': '20150625',
1240                 'uploader_id': 'dorappi2000',
1241                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1242                 'uploader': 'dorappi2000',
1243                 'formats': 'mincount:31',
1244             },
1245             'skip': 'not actual anymore',
1246         },
1247         # DASH manifest with segment_list
1248         {
1249             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1250             'md5': '8ce563a1d667b599d21064e982ab9e31',
1251             'info_dict': {
1252                 'id': 'CsmdDsKjzN8',
1253                 'ext': 'mp4',
1254                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1255                 'uploader': 'Airtek',
1256                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1257                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1258                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1259             },
1260             'params': {
1261                 'youtube_include_dash_manifest': True,
1262                 'format': '135',  # bestvideo
1263             },
1264             'skip': 'This live event has ended.',
1265         },
1266         {
1267             # Multifeed videos (multiple cameras), URL is for Main Camera
1268             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1269             'info_dict': {
1270                 'id': 'jvGDaLqkpTg',
1271                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1272                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1273             },
1274             'playlist': [{
1275                 'info_dict': {
1276                     'id': 'jvGDaLqkpTg',
1277                     'ext': 'mp4',
1278                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1279                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1280                     'duration': 10643,
1281                     'upload_date': '20161111',
1282                     'uploader': 'Team PGP',
1283                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1284                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1285                 },
1286             }, {
1287                 'info_dict': {
1288                     'id': '3AKt1R1aDnw',
1289                     'ext': 'mp4',
1290                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1291                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1292                     'duration': 10991,
1293                     'upload_date': '20161111',
1294                     'uploader': 'Team PGP',
1295                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1296                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1297                 },
1298             }, {
1299                 'info_dict': {
1300                     'id': 'RtAMM00gpVc',
1301                     'ext': 'mp4',
1302                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1303                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1304                     'duration': 10995,
1305                     'upload_date': '20161111',
1306                     'uploader': 'Team PGP',
1307                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1308                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1309                 },
1310             }, {
1311                 'info_dict': {
1312                     'id': '6N2fdlP3C5U',
1313                     'ext': 'mp4',
1314                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1315                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1316                     'duration': 10990,
1317                     'upload_date': '20161111',
1318                     'uploader': 'Team PGP',
1319                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1320                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1321                 },
1322             }],
1323             'params': {
1324                 'skip_download': True,
1325             },
1326         },
1327         {
1328             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1329             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1330             'info_dict': {
1331                 'id': 'gVfLd0zydlo',
1332                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1333             },
1334             'playlist_count': 2,
1335             'skip': 'Not multifeed anymore',
1336         },
1337         {
1338             'url': 'https://vid.plus/FlRa-iH7PGw',
1339             'only_matching': True,
1340         },
1341         {
1342             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1343             'only_matching': True,
1344         },
1345         {
1346             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1347             # Also tests cut-off URL expansion in video description (see
1348             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1349             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1350             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1351             'info_dict': {
1352                 'id': 'lsguqyKfVQg',
1353                 'ext': 'mp4',
1354                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1355                 'alt_title': 'Dark Walk',
1356                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1357                 'duration': 133,
1358                 'upload_date': '20151119',
1359                 'uploader_id': 'IronSoulElf',
1360                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1361                 'uploader': 'IronSoulElf',
1362                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1363                 'track': 'Dark Walk',
1364                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1365                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1366             },
1367             'params': {
1368                 'skip_download': True,
1369             },
1370         },
1371         {
1372             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1373             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1374             'only_matching': True,
1375         },
1376         {
1377             # Video with yt:stretch=17:0
1378             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1379             'info_dict': {
1380                 'id': 'Q39EVAstoRM',
1381                 'ext': 'mp4',
1382                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1383                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1384                 'upload_date': '20151107',
1385                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1386                 'uploader': 'CH GAMER DROID',
1387             },
1388             'params': {
1389                 'skip_download': True,
1390             },
1391             'skip': 'This video does not exist.',
1392         },
1393         {
1394             # Video with incomplete 'yt:stretch=16:'
1395             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1396             'only_matching': True,
1397         },
1398         {
1399             # Video licensed under Creative Commons
1400             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1401             'info_dict': {
1402                 'id': 'M4gD1WSo5mA',
1403                 'ext': 'mp4',
1404                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1405                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1406                 'duration': 721,
1407                 'upload_date': '20150127',
1408                 'uploader_id': 'BerkmanCenter',
1409                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1410                 'uploader': 'The Berkman Klein Center for Internet & Society',
1411                 'license': 'Creative Commons Attribution license (reuse allowed)',
1412             },
1413             'params': {
1414                 'skip_download': True,
1415             },
1416         },
1417         {
1418             # Channel-like uploader_url
1419             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1420             'info_dict': {
1421                 'id': 'eQcmzGIKrzg',
1422                 'ext': 'mp4',
1423                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1424                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1425                 'duration': 4060,
1426                 'upload_date': '20151119',
1427                 'uploader': 'Bernie Sanders',
1428                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1429                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1430                 'license': 'Creative Commons Attribution license (reuse allowed)',
1431             },
1432             'params': {
1433                 'skip_download': True,
1434             },
1435         },
1436         {
1437             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1438             'only_matching': True,
1439         },
1440         {
1441             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1442             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1443             'only_matching': True,
1444         },
1445         {
1446             # Rental video preview
1447             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1448             'info_dict': {
1449                 'id': 'uGpuVWrhIzE',
1450                 'ext': 'mp4',
1451                 'title': 'Piku - Trailer',
1452                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1453                 'upload_date': '20150811',
1454                 'uploader': 'FlixMatrix',
1455                 'uploader_id': 'FlixMatrixKaravan',
1456                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1457                 'license': 'Standard YouTube License',
1458             },
1459             'params': {
1460                 'skip_download': True,
1461             },
1462             'skip': 'This video is not available.',
1463         },
1464         {
1465             # YouTube Red video with episode data
1466             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1467             'info_dict': {
1468                 'id': 'iqKdEhx-dD4',
1469                 'ext': 'mp4',
1470                 'title': 'Isolation - Mind Field (Ep 1)',
1471                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1472                 'duration': 2085,
1473                 'upload_date': '20170118',
1474                 'uploader': 'Vsauce',
1475                 'uploader_id': 'Vsauce',
1476                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1477                 'series': 'Mind Field',
1478                 'season_number': 1,
1479                 'episode_number': 1,
1480             },
1481             'params': {
1482                 'skip_download': True,
1483             },
1484             'expected_warnings': [
1485                 'Skipping DASH manifest',
1486             ],
1487         },
1488         {
1489             # The following content has been identified by the YouTube community
1490             # as inappropriate or offensive to some audiences.
1491             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1492             'info_dict': {
1493                 'id': '6SJNVb0GnPI',
1494                 'ext': 'mp4',
1495                 'title': 'Race Differences in Intelligence',
1496                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1497                 'duration': 965,
1498                 'upload_date': '20140124',
1499                 'uploader': 'New Century Foundation',
1500                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1501                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1502             },
1503             'params': {
1504                 'skip_download': True,
1505             },
1506             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1507         },
1508         {
1509             # itag 212
1510             'url': '1t24XAntNCY',
1511             'only_matching': True,
1512         },
1513         {
1514             # geo restricted to JP
1515             'url': 'sJL6WA-aGkQ',
1516             'only_matching': True,
1517         },
1518         {
1519             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1520             'only_matching': True,
1521         },
1522         {
1523             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1524             'only_matching': True,
1525         },
1526         {
1527             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1528             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1529             'only_matching': True,
1530         },
1531         {
1532             # DRM protected
1533             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1534             'only_matching': True,
1535         },
1536         {
1537             # Video with unsupported adaptive stream type formats
1538             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1539             'info_dict': {
1540                 'id': 'Z4Vy8R84T1U',
1541                 'ext': 'mp4',
1542                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1543                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1544                 'duration': 433,
1545                 'upload_date': '20130923',
1546                 'uploader': 'Amelia Putri Harwita',
1547                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1548                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1549                 'formats': 'maxcount:10',
1550             },
1551             'params': {
1552                 'skip_download': True,
1553                 'youtube_include_dash_manifest': False,
1554             },
1555             'skip': 'not actual anymore',
1556         },
1557         {
1558             # Youtube Music Auto-generated description
1559             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1560             'info_dict': {
1561                 'id': 'MgNrAu2pzNs',
1562                 'ext': 'mp4',
1563                 'title': 'Voyeur Girl',
1564                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1565                 'upload_date': '20190312',
1566                 'uploader': 'Stephen - Topic',
1567                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1568                 'artist': 'Stephen',
1569                 'track': 'Voyeur Girl',
1570                 'album': 'it\'s too much love to know my dear',
1571                 'release_date': '20190313',
1572                 'release_year': 2019,
1573             },
1574             'params': {
1575                 'skip_download': True,
1576             },
1577         },
1578         {
1579             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1580             'only_matching': True,
1581         },
1582         {
1583             # invalid -> valid video id redirection
1584             'url': 'DJztXj2GPfl',
1585             'info_dict': {
1586                 'id': 'DJztXj2GPfk',
1587                 'ext': 'mp4',
1588                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1589                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1590                 'upload_date': '20090125',
1591                 'uploader': 'Prochorowka',
1592                 'uploader_id': 'Prochorowka',
1593                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1594                 'artist': 'Panjabi MC',
1595                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1596                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1597             },
1598             'params': {
1599                 'skip_download': True,
1600             },
1601             'skip': 'Video unavailable',
1602         },
1603         {
1604             # empty description results in an empty string
1605             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1606             'info_dict': {
1607                 'id': 'x41yOUIvK2k',
1608                 'ext': 'mp4',
1609                 'title': 'IMG 3456',
1610                 'description': '',
1611                 'upload_date': '20170613',
1612                 'uploader_id': 'ElevageOrVert',
1613                 'uploader': 'ElevageOrVert',
1614             },
1615             'params': {
1616                 'skip_download': True,
1617             },
1618         },
1619         {
1620             # with '};' inside yt initial data (see [1])
1621             # see [2] for an example with '};' inside ytInitialPlayerResponse
1622             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1623             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1624             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1625             'info_dict': {
1626                 'id': 'CHqg6qOn4no',
1627                 'ext': 'mp4',
1628                 'title': 'Part 77   Sort a list of simple types in c#',
1629                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1630                 'upload_date': '20130831',
1631                 'uploader_id': 'kudvenkat',
1632                 'uploader': 'kudvenkat',
1633             },
1634             'params': {
1635                 'skip_download': True,
1636             },
1637         },
1638         {
1639             # another example of '};' in ytInitialData
1640             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1641             'only_matching': True,
1642         },
1643         {
1644             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1645             'only_matching': True,
1646         },
1647         {
1648             # https://github.com/ytdl-org/youtube-dl/pull/28094
1649             'url': 'OtqTfy26tG0',
1650             'info_dict': {
1651                 'id': 'OtqTfy26tG0',
1652                 'ext': 'mp4',
1653                 'title': 'Burn Out',
1654                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1655                 'upload_date': '20141120',
1656                 'uploader': 'The Cinematic Orchestra - Topic',
1657                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1658                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1659                 'artist': 'The Cinematic Orchestra',
1660                 'track': 'Burn Out',
1661                 'album': 'Every Day',
1662                 'release_data': None,
1663                 'release_year': None,
1664             },
1665             'params': {
1666                 'skip_download': True,
1667             },
1668         },
1669         {
1670             # controversial video, only works with bpctr when authenticated with cookies
1671             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1672             'only_matching': True,
1673         },
1674         {
1675             # controversial video, requires bpctr/contentCheckOk
1676             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1677             'info_dict': {
1678                 'id': 'SZJvDhaSDnc',
1679                 'ext': 'mp4',
1680                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1681                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1682                 'uploader': 'CBS This Morning',
1683                 'uploader_id': 'CBSThisMorning',
1684                 'upload_date': '20140716',
1685                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1686             }
1687         },
1688         {
1689             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1690             'url': 'cBvYw8_A0vQ',
1691             'info_dict': {
1692                 'id': 'cBvYw8_A0vQ',
1693                 'ext': 'mp4',
1694                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1695                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1696                 'upload_date': '20201120',
1697                 'uploader': 'Walk around Japan',
1698                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1699                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1700             },
1701             'params': {
1702                 'skip_download': True,
1703             },
1704         }, {
1705             # Has multiple audio streams
1706             'url': 'WaOKSUlf4TM',
1707             'only_matching': True
1708         }, {
1709             # Requires Premium: has format 141 when requested using YTM url
1710             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1711             'only_matching': True
1712         }, {
1713             # multiple subtitles with same lang_code
1714             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1715             'only_matching': True,
1716         }, {
1717             # Force use android client fallback
1718             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1719             'info_dict': {
1720                 'id': 'YOelRv7fMxY',
1721                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1722                 'ext': '3gp',
1723                 'upload_date': '20210624',
1724                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1725                 'uploader': 'colinfurze',
1726                 'uploader_id': 'colinfurze',
1727                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1728                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1729             },
1730             'params': {
1731                 'format': '17',  # 3gp format available on android
1732                 'extractor_args': {'youtube': {'player_client': ['android']}},
1733             },
1734         },
1735         {
1736             # Skip download of additional client configs (remix client config in this case)
1737             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1738             'only_matching': True,
1739             'params': {
1740                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1741             },
1742         }
1743     ]
1744
1745     @classmethod
1746     def suitable(cls, url):
1747         # Hack for lazy extractors until more generic solution is implemented
1748         # (see #28780)
1749         from .youtube import parse_qs
1750         qs = parse_qs(url)
1751         if qs.get('list', [None])[0]:
1752             return False
1753         return super(YoutubeIE, cls).suitable(url)
1754
1755     def __init__(self, *args, **kwargs):
1756         super(YoutubeIE, self).__init__(*args, **kwargs)
1757         self._code_cache = {}
1758         self._player_cache = {}
1759
1760     def _extract_player_url(self, ytcfg=None, webpage=None):
1761         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1762         if not player_url and webpage:
1763             player_url = self._search_regex(
1764                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1765                 webpage, 'player URL', fatal=False)
1766         if not player_url:
1767             return None
1768         if player_url.startswith('//'):
1769             player_url = 'https:' + player_url
1770         elif not re.match(r'https?://', player_url):
1771             player_url = compat_urlparse.urljoin(
1772                 'https://www.youtube.com', player_url)
1773         return player_url
1774
1775     def _signature_cache_id(self, example_sig):
1776         """ Return a string representation of a signature """
1777         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1778
1779     @classmethod
1780     def _extract_player_info(cls, player_url):
1781         for player_re in cls._PLAYER_INFO_RE:
1782             id_m = re.search(player_re, player_url)
1783             if id_m:
1784                 break
1785         else:
1786             raise ExtractorError('Cannot identify player %r' % player_url)
1787         return id_m.group('id')
1788
1789     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1790         player_id = self._extract_player_info(player_url)
1791         if player_id not in self._code_cache:
1792             self._code_cache[player_id] = self._download_webpage(
1793                 player_url, video_id, fatal=fatal,
1794                 note='Downloading player ' + player_id,
1795                 errnote='Download of %s failed' % player_url)
1796         return player_id in self._code_cache
1797
1798     def _extract_signature_function(self, video_id, player_url, example_sig):
1799         player_id = self._extract_player_info(player_url)
1800
1801         # Read from filesystem cache
1802         func_id = 'js_%s_%s' % (
1803             player_id, self._signature_cache_id(example_sig))
1804         assert os.path.basename(func_id) == func_id
1805
1806         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1807         if cache_spec is not None:
1808             return lambda s: ''.join(s[i] for i in cache_spec)
1809
1810         if self._load_player(video_id, player_url):
1811             code = self._code_cache[player_id]
1812             res = self._parse_sig_js(code)
1813
1814             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1815             cache_res = res(test_string)
1816             cache_spec = [ord(c) for c in cache_res]
1817
1818             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1819             return res
1820
1821     def _print_sig_code(self, func, example_sig):
1822         def gen_sig_code(idxs):
1823             def _genslice(start, end, step):
1824                 starts = '' if start == 0 else str(start)
1825                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1826                 steps = '' if step == 1 else (':%d' % step)
1827                 return 's[%s%s%s]' % (starts, ends, steps)
1828
1829             step = None
1830             # Quelch pyflakes warnings - start will be set when step is set
1831             start = '(Never used)'
1832             for i, prev in zip(idxs[1:], idxs[:-1]):
1833                 if step is not None:
1834                     if i - prev == step:
1835                         continue
1836                     yield _genslice(start, prev, step)
1837                     step = None
1838                     continue
1839                 if i - prev in [-1, 1]:
1840                     step = i - prev
1841                     start = prev
1842                     continue
1843                 else:
1844                     yield 's[%d]' % prev
1845             if step is None:
1846                 yield 's[%d]' % i
1847             else:
1848                 yield _genslice(start, i, step)
1849
1850         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1851         cache_res = func(test_string)
1852         cache_spec = [ord(c) for c in cache_res]
1853         expr_code = ' + '.join(gen_sig_code(cache_spec))
1854         signature_id_tuple = '(%s)' % (
1855             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1856         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1857                 '    return %s\n') % (signature_id_tuple, expr_code)
1858         self.to_screen('Extracted signature function:\n' + code)
1859
1860     def _parse_sig_js(self, jscode):
1861         funcname = self._search_regex(
1862             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1863              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1864              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1865              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1866              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1867              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1868              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1869              # Obsolete patterns
1870              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1871              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1872              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1873              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1874              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1875              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1876              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1877              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1878             jscode, 'Initial JS player signature function name', group='sig')
1879
1880         jsi = JSInterpreter(jscode)
1881         initial_function = jsi.extract_function(funcname)
1882         return lambda s: initial_function([s])
1883
1884     def _decrypt_signature(self, s, video_id, player_url):
1885         """Turn the encrypted s field into a working signature"""
1886
1887         if player_url is None:
1888             raise ExtractorError('Cannot decrypt signature without player_url')
1889
1890         try:
1891             player_id = (player_url, self._signature_cache_id(s))
1892             if player_id not in self._player_cache:
1893                 func = self._extract_signature_function(
1894                     video_id, player_url, s
1895                 )
1896                 self._player_cache[player_id] = func
1897             func = self._player_cache[player_id]
1898             if self.get_param('youtube_print_sig_code'):
1899                 self._print_sig_code(func, s)
1900             return func(s)
1901         except Exception as e:
1902             tb = traceback.format_exc()
1903             raise ExtractorError(
1904                 'Signature extraction failed: ' + tb, cause=e)
1905
1906     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1907         """
1908         Extract signatureTimestamp (sts)
1909         Required to tell API what sig/player version is in use.
1910         """
1911         sts = None
1912         if isinstance(ytcfg, dict):
1913             sts = int_or_none(ytcfg.get('STS'))
1914
1915         if not sts:
1916             # Attempt to extract from player
1917             if player_url is None:
1918                 error_msg = 'Cannot extract signature timestamp without player_url.'
1919                 if fatal:
1920                     raise ExtractorError(error_msg)
1921                 self.report_warning(error_msg)
1922                 return
1923             if self._load_player(video_id, player_url, fatal=fatal):
1924                 player_id = self._extract_player_info(player_url)
1925                 code = self._code_cache[player_id]
1926                 sts = int_or_none(self._search_regex(
1927                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1928                     'JS player signature timestamp', group='sts', fatal=fatal))
1929         return sts
1930
1931     def _mark_watched(self, video_id, player_responses):
1932         playback_url = traverse_obj(
1933             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1934             expected_type=url_or_none, get_all=False)
1935         if not playback_url:
1936             self.report_warning('Unable to mark watched')
1937             return
1938         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1939         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1940
1941         # cpn generation algorithm is reverse engineered from base.js.
1942         # In fact it works even with dummy cpn.
1943         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1944         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1945
1946         qs.update({
1947             'ver': ['2'],
1948             'cpn': [cpn],
1949         })
1950         playback_url = compat_urlparse.urlunparse(
1951             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1952
1953         self._download_webpage(
1954             playback_url, video_id, 'Marking watched',
1955             'Unable to mark watched', fatal=False)
1956
1957     @staticmethod
1958     def _extract_urls(webpage):
1959         # Embedded YouTube player
1960         entries = [
1961             unescapeHTML(mobj.group('url'))
1962             for mobj in re.finditer(r'''(?x)
1963             (?:
1964                 <iframe[^>]+?src=|
1965                 data-video-url=|
1966                 <embed[^>]+?src=|
1967                 embedSWF\(?:\s*|
1968                 <object[^>]+data=|
1969                 new\s+SWFObject\(
1970             )
1971             (["\'])
1972                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1973                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1974             \1''', webpage)]
1975
1976         # lazyYT YouTube embed
1977         entries.extend(list(map(
1978             unescapeHTML,
1979             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1980
1981         # Wordpress "YouTube Video Importer" plugin
1982         matches = re.findall(r'''(?x)<div[^>]+
1983             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1984             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1985         entries.extend(m[-1] for m in matches)
1986
1987         return entries
1988
1989     @staticmethod
1990     def _extract_url(webpage):
1991         urls = YoutubeIE._extract_urls(webpage)
1992         return urls[0] if urls else None
1993
1994     @classmethod
1995     def extract_id(cls, url):
1996         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1997         if mobj is None:
1998             raise ExtractorError('Invalid URL: %s' % url)
1999         video_id = mobj.group(2)
2000         return video_id
2001
2002     def _extract_chapters_from_json(self, data, duration):
2003         chapter_list = traverse_obj(
2004             data, (
2005                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2006                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2007             ), expected_type=list)
2008
2009         return self._extract_chapters(
2010             chapter_list,
2011             chapter_time=lambda chapter: float_or_none(
2012                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2013             chapter_title=lambda chapter: traverse_obj(
2014                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2015             duration=duration)
2016
2017     def _extract_chapters_from_engagement_panel(self, data, duration):
2018         content_list = traverse_obj(
2019             data,
2020             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2021             expected_type=list, default=[])
2022         chapter_time = lambda chapter: parse_duration(self._get_text(chapter.get('timeDescription')))
2023         chapter_title = lambda chapter: self._get_text(chapter.get('title'))
2024
2025         return next((
2026             filter(None, (
2027                 self._extract_chapters(
2028                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2029                     chapter_time, chapter_title, duration)
2030                 for contents in content_list
2031             ))), [])
2032
2033     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2034         chapters = []
2035         last_chapter = {'start_time': 0}
2036         for idx, chapter in enumerate(chapter_list or []):
2037             title = chapter_title(chapter)
2038             start_time = chapter_time(chapter)
2039             if start_time is None:
2040                 continue
2041             last_chapter['end_time'] = start_time
2042             if start_time < last_chapter['start_time']:
2043                 if idx == 1:
2044                     chapters.pop()
2045                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2046                 else:
2047                     self.report_warning(f'Invalid start time for chapter "{title}"')
2048                     continue
2049             last_chapter = {'start_time': start_time, 'title': title}
2050             chapters.append(last_chapter)
2051         last_chapter['end_time'] = duration
2052         return chapters
2053
2054     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2055         return self._parse_json(self._search_regex(
2056             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2057              regex), webpage, name, default='{}'), video_id, fatal=False)
2058
2059     @staticmethod
2060     def parse_time_text(time_text):
2061         """
2062         Parse the comment time text
2063         time_text is in the format 'X units ago (edited)'
2064         """
2065         time_text_split = time_text.split(' ')
2066         if len(time_text_split) >= 3:
2067             try:
2068                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2069             except ValueError:
2070                 return None
2071
2072     def _extract_comment(self, comment_renderer, parent=None):
2073         comment_id = comment_renderer.get('commentId')
2074         if not comment_id:
2075             return
2076
2077         text = self._get_text(comment_renderer.get('contentText'))
2078
2079         # note: timestamp is an estimate calculated from the current time and time_text
2080         time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
2081         time_text_dt = self.parse_time_text(time_text)
2082         if isinstance(time_text_dt, datetime.datetime):
2083             timestamp = calendar.timegm(time_text_dt.timetuple())
2084         author = self._get_text(comment_renderer.get('authorText'))
2085         author_id = try_get(comment_renderer,
2086                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2087
2088         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2089                                                        lambda x: x['likeCount']), compat_str)) or 0
2090         author_thumbnail = try_get(comment_renderer,
2091                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2092
2093         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2094         is_favorited = 'creatorHeart' in (try_get(
2095             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2096         return {
2097             'id': comment_id,
2098             'text': text,
2099             'timestamp': timestamp,
2100             'time_text': time_text,
2101             'like_count': votes,
2102             'is_favorited': is_favorited,
2103             'author': author,
2104             'author_id': author_id,
2105             'author_thumbnail': author_thumbnail,
2106             'author_is_uploader': author_is_uploader,
2107             'parent': parent or 'root'
2108         }
2109
2110     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2111                          ytcfg, video_id, parent=None, comment_counts=None):
2112
2113         def extract_header(contents):
2114             _total_comments = 0
2115             _continuation = None
2116             for content in contents:
2117                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2118                 expected_comment_count = parse_count(self._get_text(
2119                     comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2120
2121                 if expected_comment_count:
2122                     comment_counts[1] = expected_comment_count
2123                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2124                     _total_comments = comment_counts[1]
2125                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2126                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2127
2128                 sort_menu_item = try_get(
2129                     comments_header_renderer,
2130                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2131                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2132
2133                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2134                 if not _continuation:
2135                     continue
2136
2137                 sort_text = sort_menu_item.get('title')
2138                 if isinstance(sort_text, compat_str):
2139                     sort_text = sort_text.lower()
2140                 else:
2141                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2142                 self.to_screen('Sorting comments by %s' % sort_text)
2143                 break
2144             return _total_comments, _continuation
2145
2146         def extract_thread(contents):
2147             if not parent:
2148                 comment_counts[2] = 0
2149             for content in contents:
2150                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2151                 comment_renderer = try_get(
2152                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2153                     content, (lambda x: x['commentRenderer'], dict))
2154
2155                 if not comment_renderer:
2156                     continue
2157                 comment = self._extract_comment(comment_renderer, parent)
2158                 if not comment:
2159                     continue
2160                 comment_counts[0] += 1
2161                 yield comment
2162                 # Attempt to get the replies
2163                 comment_replies_renderer = try_get(
2164                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2165
2166                 if comment_replies_renderer:
2167                     comment_counts[2] += 1
2168                     comment_entries_iter = self._comment_entries(
2169                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2170                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2171
2172                     for reply_comment in comment_entries_iter:
2173                         yield reply_comment
2174
2175         # YouTube comments have a max depth of 2
2176         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2177         if max_depth == 1 and parent:
2178             return
2179         if not comment_counts:
2180             # comment so far, est. total comments, current comment thread #
2181             comment_counts = [0, 0, 0]
2182
2183         continuation = self._extract_continuation(root_continuation_data)
2184         if continuation and len(continuation['continuation']) < 27:
2185             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2186             continuation_token = self._generate_comment_continuation(video_id)
2187             continuation = self._build_api_continuation_query(continuation_token, None)
2188
2189         visitor_data = None
2190         is_first_continuation = parent is None
2191
2192         for page_num in itertools.count(0):
2193             if not continuation:
2194                 break
2195             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2196             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2197             if page_num == 0:
2198                 if is_first_continuation:
2199                     note_prefix = 'Downloading comment section API JSON'
2200                 else:
2201                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2202                         comment_counts[2], comment_prog_str)
2203             else:
2204                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2205                     '       ' if parent else '', ' replies' if parent else '',
2206                     page_num, comment_prog_str)
2207
2208             response = self._extract_response(
2209                 item_id=None, query=continuation,
2210                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2211                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2212             if not response:
2213                 break
2214             visitor_data = try_get(
2215                 response,
2216                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2217                 compat_str) or visitor_data
2218
2219             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2220
2221             continuation = None
2222             if isinstance(continuation_contents, list):
2223                 for continuation_section in continuation_contents:
2224                     if not isinstance(continuation_section, dict):
2225                         continue
2226                     continuation_items = try_get(
2227                         continuation_section,
2228                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2229                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2230                         list) or []
2231                     if is_first_continuation:
2232                         total_comments, continuation = extract_header(continuation_items)
2233                         if total_comments:
2234                             yield total_comments
2235                         is_first_continuation = False
2236                         if continuation:
2237                             break
2238                         continue
2239                     count = 0
2240                     for count, entry in enumerate(extract_thread(continuation_items)):
2241                         yield entry
2242                     continuation = self._extract_continuation({'contents': continuation_items})
2243                     if continuation:
2244                         # Sometimes YouTube provides a continuation without any comments
2245                         # In most cases we end up just downloading these with very little comments to come.
2246                         if count == 0:
2247                             if not parent:
2248                                 self.report_warning('No comments received - assuming end of comments')
2249                             continuation = None
2250                         break
2251
2252             # Deprecated response structure
2253             elif isinstance(continuation_contents, dict):
2254                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2255                 for key, continuation_renderer in continuation_contents.items():
2256                     if key not in known_continuation_renderers:
2257                         continue
2258                     if not isinstance(continuation_renderer, dict):
2259                         continue
2260                     if is_first_continuation:
2261                         header_continuation_items = [continuation_renderer.get('header') or {}]
2262                         total_comments, continuation = extract_header(header_continuation_items)
2263                         if total_comments:
2264                             yield total_comments
2265                         is_first_continuation = False
2266                         if continuation:
2267                             break
2268
2269                     # Sometimes YouTube provides a continuation without any comments
2270                     # In most cases we end up just downloading these with very little comments to come.
2271                     count = 0
2272                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2273                         yield entry
2274                     continuation = self._extract_continuation(continuation_renderer)
2275                     if count == 0:
2276                         if not parent:
2277                             self.report_warning('No comments received - assuming end of comments')
2278                         continuation = None
2279                     break
2280
2281     @staticmethod
2282     def _generate_comment_continuation(video_id):
2283         """
2284         Generates initial comment section continuation token from given video id
2285         """
2286         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2287         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2288         new_continuation_intlist = list(itertools.chain.from_iterable(
2289             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2290         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2291
2292     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2293         """Entry for comment extraction"""
2294         def _real_comment_extract(contents):
2295             if isinstance(contents, list):
2296                 for entry in contents:
2297                     for key, renderer in entry.items():
2298                         if key not in known_entry_comment_renderers:
2299                             continue
2300                         yield from self._comment_entries(
2301                             renderer, video_id=video_id, ytcfg=ytcfg,
2302                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2303                             account_syncid=self._extract_account_syncid(ytcfg))
2304                         break
2305         comments = []
2306         known_entry_comment_renderers = ('itemSectionRenderer',)
2307         estimated_total = 0
2308         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2309
2310         try:
2311             for comment in _real_comment_extract(contents):
2312                 if len(comments) >= max_comments:
2313                     break
2314                 if isinstance(comment, int):
2315                     estimated_total = comment
2316                     continue
2317                 comments.append(comment)
2318         except KeyboardInterrupt:
2319             self.to_screen('Interrupted by user')
2320         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2321         return {
2322             'comments': comments,
2323             'comment_count': len(comments),
2324         }
2325
2326     @staticmethod
2327     def _generate_player_context(sts=None):
2328         context = {
2329             'html5Preference': 'HTML5_PREF_WANTS',
2330         }
2331         if sts is not None:
2332             context['signatureTimestamp'] = sts
2333         return {
2334             'playbackContext': {
2335                 'contentPlaybackContext': context
2336             },
2337             'contentCheckOk': True
2338         }
2339
2340     @staticmethod
2341     def _get_video_info_params(video_id, client='TVHTML5'):
2342         GVI_CLIENTS = {
2343             'ANDROID': {
2344                 'c': 'ANDROID',
2345                 'cver': '16.20',
2346             },
2347             'TVHTML5': {
2348                 'c': 'TVHTML5',
2349                 'cver': '6.20180913',
2350             },
2351             'IOS': {
2352                 'c': 'IOS',
2353                 'cver': '16.20'
2354             }
2355         }
2356         query = {
2357             'video_id': video_id,
2358             'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2359             'html5': '1'
2360         }
2361         query.update(GVI_CLIENTS.get(client))
2362         return query
2363
2364     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2365
2366         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2367         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2368         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2369         headers = self.generate_api_headers(
2370             player_ytcfg, identity_token, syncid,
2371             default_client=self._YT_CLIENTS[client], session_index=session_index)
2372
2373         yt_query = {'videoId': video_id}
2374         yt_query.update(self._generate_player_context(sts))
2375         return self._extract_response(
2376             item_id=video_id, ep='player', query=yt_query,
2377             ytcfg=player_ytcfg, headers=headers, fatal=False,
2378             default_client=self._YT_CLIENTS[client],
2379             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2380         ) or None
2381
2382     def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
2383         gvi_client = self._YT_CLIENTS.get(f'_{client}_agegate')
2384         if not gvi_client:
2385             return
2386
2387         pr = self._parse_json(traverse_obj(
2388             compat_parse_qs(self._download_webpage(
2389                 self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
2390                 'Refetching age-gated %s info webpage' % gvi_client.lower(),
2391                 'unable to download video info webpage', fatal=False,
2392                 query=self._get_video_info_params(video_id, client=gvi_client))),
2393             ('player_response', 0), expected_type=str) or '{}', video_id)
2394         if pr:
2395             return pr
2396
2397         self.report_warning('Falling back to embedded-only age-gate workaround')
2398         embed_webpage = None
2399         if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
2400             embed_webpage = self._download_webpage(
2401                 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2402                 video_id=video_id, note=f'Downloading age-gated {client} embed config')
2403
2404         ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
2405         # If we extracted the embed webpage, it'll tell us if we can view the video
2406         embedded_pr = self._parse_json(
2407             traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2408             video_id=video_id)
2409         embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2410         if embedded_ps_reason in self._AGE_GATE_REASONS:
2411             return
2412         return self._extract_player_response(
2413             f'_{client}_embedded', video_id,
2414             ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
2415             identity_token, player_url, initial_pr)
2416
2417     def _get_requested_clients(self, url, smuggled_data):
2418         requested_clients = [client for client in self._configuration_arg('player_client')
2419                              if client[:0] != '_' and client in self._YT_CLIENTS]
2420         if not requested_clients:
2421             requested_clients = ['android', 'web']
2422
2423         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2424             requested_clients.extend(
2425                 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
2426
2427         return orderedSet(requested_clients)
2428
2429     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2430         initial_pr = None
2431         if webpage:
2432             initial_pr = self._extract_yt_initial_variable(
2433                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2434                 video_id, 'initial player response')
2435
2436         age_gated = False
2437         for client in clients:
2438             player_ytcfg = master_ytcfg if client == 'web' else {}
2439             if age_gated:
2440                 pr = None
2441             elif client == 'web' and initial_pr:
2442                 pr = initial_pr
2443             else:
2444                 if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
2445                     ytm_webpage = self._download_webpage(
2446                         'https://music.youtube.com',
2447                         video_id, fatal=False, note='Downloading remix client config')
2448                     player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
2449                 pr = self._extract_player_response(
2450                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2451             if pr:
2452                 yield pr
2453             if age_gated or traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
2454                 age_gated = True
2455                 pr = self._extract_age_gated_player_response(
2456                     client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
2457                 if pr:
2458                     yield pr
2459         # Android player_response does not have microFormats which are needed for
2460         # extraction of some data. So we return the initial_pr with formats
2461         # stripped out even if not requested by the user
2462         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2463         if initial_pr and 'web' not in clients:
2464             initial_pr['streamingData'] = None
2465             yield initial_pr
2466
2467     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2468         itags, stream_ids = [], []
2469         itag_qualities = {}
2470         q = qualities([
2471             # "tiny" is the smallest video-only format. But some audio-only formats
2472             # was also labeled "tiny". It is not clear if such formats still exist
2473             'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2474             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2475         ])
2476         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2477
2478         for fmt in streaming_formats:
2479             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2480                 continue
2481
2482             itag = str_or_none(fmt.get('itag'))
2483             audio_track = fmt.get('audioTrack') or {}
2484             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2485             if stream_id in stream_ids:
2486                 continue
2487
2488             quality = fmt.get('quality')
2489             if quality == 'tiny' or not quality:
2490                 quality = fmt.get('audioQuality', '').lower() or quality
2491             if itag and quality:
2492                 itag_qualities[itag] = quality
2493             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2494             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2495             # number of fragment that would subsequently requested with (`&sq=N`)
2496             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2497                 continue
2498
2499             fmt_url = fmt.get('url')
2500             if not fmt_url:
2501                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2502                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2503                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2504                 if not (sc and fmt_url and encrypted_sig):
2505                     continue
2506                 if not player_url:
2507                     continue
2508                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2509                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2510                 fmt_url += '&' + sp + '=' + signature
2511
2512             if itag:
2513                 itags.append(itag)
2514                 stream_ids.append(stream_id)
2515
2516             tbr = float_or_none(
2517                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2518             dct = {
2519                 'asr': int_or_none(fmt.get('audioSampleRate')),
2520                 'filesize': int_or_none(fmt.get('contentLength')),
2521                 'format_id': itag,
2522                 'format_note': ', '.join(filter(None, (
2523                     audio_track.get('displayName'), fmt.get('qualityLabel') or quality))),
2524                 'fps': int_or_none(fmt.get('fps')),
2525                 'height': int_or_none(fmt.get('height')),
2526                 'quality': q(quality),
2527                 'tbr': tbr,
2528                 'url': fmt_url,
2529                 'width': fmt.get('width'),
2530                 'language': audio_track.get('id', '').split('.')[0],
2531             }
2532             mime_mobj = re.match(
2533                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2534             if mime_mobj:
2535                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2536                 dct.update(parse_codecs(mime_mobj.group(2)))
2537                 # The 3gp format in android client has a quality of "small",
2538                 # but is actually worse than all other formats
2539                 if dct['ext'] == '3gp':
2540                     dct['quality'] = q('tiny')
2541                     dct['preference'] = -10
2542             no_audio = dct.get('acodec') == 'none'
2543             no_video = dct.get('vcodec') == 'none'
2544             if no_audio:
2545                 dct['vbr'] = tbr
2546             if no_video:
2547                 dct['abr'] = tbr
2548             if no_audio or no_video:
2549                 dct['downloader_options'] = {
2550                     # Youtube throttles chunks >~10M
2551                     'http_chunk_size': 10485760,
2552                 }
2553                 if dct.get('ext'):
2554                     dct['container'] = dct['ext'] + '_dash'
2555             yield dct
2556
2557         skip_manifests = self._configuration_arg('skip')
2558         get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2559         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2560
2561         for sd in streaming_data:
2562             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2563             if hls_manifest_url:
2564                 for f in self._extract_m3u8_formats(
2565                         hls_manifest_url, video_id, 'mp4', fatal=False):
2566                     itag = self._search_regex(
2567                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2568                     if itag in itags:
2569                         continue
2570                     if itag:
2571                         f['format_id'] = itag
2572                         itags.append(itag)
2573                     yield f
2574
2575             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2576             if dash_manifest_url:
2577                 for f in self._extract_mpd_formats(
2578                         dash_manifest_url, video_id, fatal=False):
2579                     itag = f['format_id']
2580                     if itag in itags:
2581                         continue
2582                     if itag:
2583                         itags.append(itag)
2584                     if itag in itag_qualities:
2585                         f['quality'] = q(itag_qualities[itag])
2586                     filesize = int_or_none(self._search_regex(
2587                         r'/clen/(\d+)', f.get('fragment_base_url')
2588                         or f['url'], 'file size', default=None))
2589                     if filesize:
2590                         f['filesize'] = filesize
2591                     yield f
2592
2593     def _real_extract(self, url):
2594         url, smuggled_data = unsmuggle_url(url, {})
2595         video_id = self._match_id(url)
2596
2597         base_url = self.http_scheme() + '//www.youtube.com/'
2598         webpage_url = base_url + 'watch?v=' + video_id
2599         webpage = self._download_webpage(
2600             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2601
2602         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2603         player_url = self._extract_player_url(master_ytcfg, webpage)
2604         identity_token = self._extract_identity_token(webpage, video_id)
2605
2606         player_responses = list(self._extract_player_responses(
2607             self._get_requested_clients(url, smuggled_data),
2608             video_id, webpage, master_ytcfg, player_url, identity_token))
2609
2610         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2611
2612         playability_statuses = traverse_obj(
2613             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2614
2615         trailer_video_id = get_first(
2616             playability_statuses,
2617             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2618             expected_type=str)
2619         if trailer_video_id:
2620             return self.url_result(
2621                 trailer_video_id, self.ie_key(), trailer_video_id)
2622
2623         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2624                        if webpage else (lambda x: None))
2625
2626         video_details = traverse_obj(
2627             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2628         microformats = traverse_obj(
2629             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2630             expected_type=dict, default=[])
2631         video_title = (
2632             get_first(video_details, 'title')
2633             or self._get_text(microformats, (..., 'title'))
2634             or search_meta(['og:title', 'twitter:title', 'title']))
2635         video_description = get_first(video_details, 'shortDescription')
2636
2637         if not smuggled_data.get('force_singlefeed', False):
2638             if not self.get_param('noplaylist'):
2639                 multifeed_metadata_list = get_first(
2640                     player_responses,
2641                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2642                     expected_type=str)
2643                 if multifeed_metadata_list:
2644                     entries = []
2645                     feed_ids = []
2646                     for feed in multifeed_metadata_list.split(','):
2647                         # Unquote should take place before split on comma (,) since textual
2648                         # fields may contain comma as well (see
2649                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2650                         feed_data = compat_parse_qs(
2651                             compat_urllib_parse_unquote_plus(feed))
2652
2653                         def feed_entry(name):
2654                             return try_get(
2655                                 feed_data, lambda x: x[name][0], compat_str)
2656
2657                         feed_id = feed_entry('id')
2658                         if not feed_id:
2659                             continue
2660                         feed_title = feed_entry('title')
2661                         title = video_title
2662                         if feed_title:
2663                             title += ' (%s)' % feed_title
2664                         entries.append({
2665                             '_type': 'url_transparent',
2666                             'ie_key': 'Youtube',
2667                             'url': smuggle_url(
2668                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2669                                 {'force_singlefeed': True}),
2670                             'title': title,
2671                         })
2672                         feed_ids.append(feed_id)
2673                     self.to_screen(
2674                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2675                         % (', '.join(feed_ids), video_id))
2676                     return self.playlist_result(
2677                         entries, video_id, video_title, video_description)
2678             else:
2679                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2680
2681         category = get_first(microformats, 'category') or search_meta('genre')
2682         channel_id = get_first(video_details, 'channelId') \
2683             or get_first(microformats, 'externalChannelId') \
2684             or search_meta('channelId')
2685         duration = int_or_none(
2686             get_first(video_details, 'lengthSeconds')
2687             or get_first(microformats, 'lengthSeconds')) \
2688             or parse_duration(search_meta('duration'))
2689         is_live = get_first(video_details, 'isLive')
2690         is_upcoming = get_first(video_details, 'isUpcoming')
2691         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2692
2693         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2694         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2695
2696         if not formats:
2697             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2698                 self.raise_no_formats(
2699                     'This video is DRM protected.', expected=True)
2700             pemr = get_first(
2701                 playability_statuses,
2702                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2703             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2704             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2705             if subreason:
2706                 if subreason == 'The uploader has not made this video available in your country.':
2707                     countries = get_first(microformats, 'availableCountries')
2708                     if not countries:
2709                         regions_allowed = search_meta('regionsAllowed')
2710                         countries = regions_allowed.split(',') if regions_allowed else None
2711                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2712                 reason += f'. {subreason}'
2713             if reason:
2714                 self.raise_no_formats(reason, expected=True)
2715
2716         for f in formats:
2717             # TODO: detect if throttled
2718             if '&n=' in f['url']:  # possibly throttled
2719                 f['source_preference'] = -10
2720                 # note = f.get('format_note')
2721                 # f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2722
2723         self._sort_formats(formats)
2724
2725         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2726         if not keywords and webpage:
2727             keywords = [
2728                 unescapeHTML(m.group('content'))
2729                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2730         for keyword in keywords:
2731             if keyword.startswith('yt:stretch='):
2732                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2733                 if mobj:
2734                     # NB: float is intentional for forcing float division
2735                     w, h = (float(v) for v in mobj.groups())
2736                     if w > 0 and h > 0:
2737                         ratio = w / h
2738                         for f in formats:
2739                             if f.get('vcodec') != 'none':
2740                                 f['stretched_ratio'] = ratio
2741                         break
2742
2743         thumbnails = []
2744         thumbnail_dicts = traverse_obj(
2745             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2746             expected_type=dict, default=[])
2747         for thumbnail in thumbnail_dicts:
2748             thumbnail_url = thumbnail.get('url')
2749             if not thumbnail_url:
2750                 continue
2751             # Sometimes youtube gives a wrong thumbnail URL. See:
2752             # https://github.com/yt-dlp/yt-dlp/issues/233
2753             # https://github.com/ytdl-org/youtube-dl/issues/28023
2754             if 'maxresdefault' in thumbnail_url:
2755                 thumbnail_url = thumbnail_url.split('?')[0]
2756             thumbnails.append({
2757                 'url': thumbnail_url,
2758                 'height': int_or_none(thumbnail.get('height')),
2759                 'width': int_or_none(thumbnail.get('width')),
2760             })
2761         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2762         if thumbnail_url:
2763             thumbnails.append({
2764                 'url': thumbnail_url,
2765             })
2766         # The best resolution thumbnails sometimes does not appear in the webpage
2767         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2768         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2769         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2770         guaranteed_thumbnail_names = [
2771             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2772             'mqdefault', 'mq1', 'mq2', 'mq3',
2773             'default', '1', '2', '3'
2774         ]
2775         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2776         n_thumbnail_names = len(thumbnail_names)
2777
2778         thumbnails.extend({
2779             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2780                 video_id=video_id, name=name, ext=ext,
2781                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2782             '_test_url': name in hq_thumbnail_names,
2783         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2784         for thumb in thumbnails:
2785             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2786             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2787         self._remove_duplicate_formats(thumbnails)
2788
2789         info = {
2790             'id': video_id,
2791             'title': self._live_title(video_title) if is_live else video_title,
2792             'formats': formats,
2793             'thumbnails': thumbnails,
2794             'description': video_description,
2795             'upload_date': unified_strdate(
2796                 get_first(microformats, 'uploadDate')
2797                 or search_meta('uploadDate')),
2798             'uploader': get_first(video_details, 'author'),
2799             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2800             'uploader_url': owner_profile_url,
2801             'channel_id': channel_id,
2802             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2803             'duration': duration,
2804             'view_count': int_or_none(
2805                 get_first((video_details, microformats), (..., 'viewCount'))
2806                 or search_meta('interactionCount')),
2807             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2808             'age_limit': 18 if (
2809                 get_first(microformats, 'isFamilySafe') is False
2810                 or search_meta('isFamilyFriendly') == 'false'
2811                 or search_meta('og:restrictions:age') == '18+') else 0,
2812             'webpage_url': webpage_url,
2813             'categories': [category] if category else None,
2814             'tags': keywords,
2815             'is_live': is_live,
2816             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2817             'was_live': get_first(video_details, 'isLiveContent'),
2818         }
2819
2820         pctr = get_first(player_responses, ('captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2821         subtitles = {}
2822         if pctr:
2823             def process_language(container, base_url, lang_code, sub_name, query):
2824                 lang_subs = container.setdefault(lang_code, [])
2825                 for fmt in self._SUBTITLE_FORMATS:
2826                     query.update({
2827                         'fmt': fmt,
2828                     })
2829                     lang_subs.append({
2830                         'ext': fmt,
2831                         'url': update_url_query(base_url, query),
2832                         'name': sub_name,
2833                     })
2834
2835             for caption_track in (pctr.get('captionTracks') or []):
2836                 base_url = caption_track.get('baseUrl')
2837                 if not base_url:
2838                     continue
2839                 if caption_track.get('kind') != 'asr':
2840                     lang_code = (
2841                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2842                         or caption_track.get('languageCode'))
2843                     if not lang_code:
2844                         continue
2845                     process_language(
2846                         subtitles, base_url, lang_code,
2847                         try_get(caption_track, lambda x: x['name']['simpleText']),
2848                         {})
2849                     continue
2850                 automatic_captions = {}
2851                 for translation_language in (pctr.get('translationLanguages') or []):
2852                     translation_language_code = translation_language.get('languageCode')
2853                     if not translation_language_code:
2854                         continue
2855                     process_language(
2856                         automatic_captions, base_url, translation_language_code,
2857                         self._get_text(translation_language.get('languageName'), max_runs=1),
2858                         {'tlang': translation_language_code})
2859                 info['automatic_captions'] = automatic_captions
2860         info['subtitles'] = subtitles
2861
2862         parsed_url = compat_urllib_parse_urlparse(url)
2863         for component in [parsed_url.fragment, parsed_url.query]:
2864             query = compat_parse_qs(component)
2865             for k, v in query.items():
2866                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2867                     d_k += '_time'
2868                     if d_k not in info and k in s_ks:
2869                         info[d_k] = parse_duration(query[k][0])
2870
2871         # Youtube Music Auto-generated description
2872         if video_description:
2873             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2874             if mobj:
2875                 release_year = mobj.group('release_year')
2876                 release_date = mobj.group('release_date')
2877                 if release_date:
2878                     release_date = release_date.replace('-', '')
2879                     if not release_year:
2880                         release_year = release_date[:4]
2881                 info.update({
2882                     'album': mobj.group('album'.strip()),
2883                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2884                     'track': mobj.group('track').strip(),
2885                     'release_date': release_date,
2886                     'release_year': int_or_none(release_year),
2887                 })
2888
2889         initial_data = None
2890         if webpage:
2891             initial_data = self._extract_yt_initial_variable(
2892                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2893                 'yt initial data')
2894         if not initial_data:
2895             headers = self.generate_api_headers(
2896                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2897                 session_index=self._extract_session_index(master_ytcfg))
2898
2899             initial_data = self._extract_response(
2900                 item_id=video_id, ep='next', fatal=False,
2901                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
2902                 note='Downloading initial data API JSON')
2903
2904         try:
2905             # This will error if there is no livechat
2906             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2907             info['subtitles']['live_chat'] = [{
2908                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2909                 'video_id': video_id,
2910                 'ext': 'json',
2911                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2912             }]
2913         except (KeyError, IndexError, TypeError):
2914             pass
2915
2916         if initial_data:
2917             info['chapters'] = (
2918                 self._extract_chapters_from_json(initial_data, duration)
2919                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2920                 or None)
2921
2922             contents = try_get(
2923                 initial_data,
2924                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2925                 list) or []
2926             for content in contents:
2927                 vpir = content.get('videoPrimaryInfoRenderer')
2928                 if vpir:
2929                     stl = vpir.get('superTitleLink')
2930                     if stl:
2931                         stl = self._get_text(stl)
2932                         if try_get(
2933                                 vpir,
2934                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2935                             info['location'] = stl
2936                         else:
2937                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2938                             if mobj:
2939                                 info.update({
2940                                     'series': mobj.group(1),
2941                                     'season_number': int(mobj.group(2)),
2942                                     'episode_number': int(mobj.group(3)),
2943                                 })
2944                     for tlb in (try_get(
2945                             vpir,
2946                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2947                             list) or []):
2948                         tbr = tlb.get('toggleButtonRenderer') or {}
2949                         for getter, regex in [(
2950                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2951                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2952                                     lambda x: x['accessibility'],
2953                                     lambda x: x['accessibilityData']['accessibilityData'],
2954                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2955                             label = (try_get(tbr, getter, dict) or {}).get('label')
2956                             if label:
2957                                 mobj = re.match(regex, label)
2958                                 if mobj:
2959                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2960                                     break
2961                     sbr_tooltip = try_get(
2962                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2963                     if sbr_tooltip:
2964                         like_count, dislike_count = sbr_tooltip.split(' / ')
2965                         info.update({
2966                             'like_count': str_to_int(like_count),
2967                             'dislike_count': str_to_int(dislike_count),
2968                         })
2969                 vsir = content.get('videoSecondaryInfoRenderer')
2970                 if vsir:
2971                     info['channel'] = self._get_text(try_get(
2972                         vsir,
2973                         lambda x: x['owner']['videoOwnerRenderer']['title'],
2974                         dict))
2975                     rows = try_get(
2976                         vsir,
2977                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2978                         list) or []
2979                     multiple_songs = False
2980                     for row in rows:
2981                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2982                             multiple_songs = True
2983                             break
2984                     for row in rows:
2985                         mrr = row.get('metadataRowRenderer') or {}
2986                         mrr_title = mrr.get('title')
2987                         if not mrr_title:
2988                             continue
2989                         mrr_title = self._get_text(mrr['title'])
2990                         mrr_contents_text = self._get_text(mrr['contents'][0])
2991                         if mrr_title == 'License':
2992                             info['license'] = mrr_contents_text
2993                         elif not multiple_songs:
2994                             if mrr_title == 'Album':
2995                                 info['album'] = mrr_contents_text
2996                             elif mrr_title == 'Artist':
2997                                 info['artist'] = mrr_contents_text
2998                             elif mrr_title == 'Song':
2999                                 info['track'] = mrr_contents_text
3000
3001         fallbacks = {
3002             'channel': 'uploader',
3003             'channel_id': 'uploader_id',
3004             'channel_url': 'uploader_url',
3005         }
3006         for to, frm in fallbacks.items():
3007             if not info.get(to):
3008                 info[to] = info.get(frm)
3009
3010         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3011             v = info.get(s_k)
3012             if v:
3013                 info[d_k] = v
3014
3015         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3016         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3017         is_membersonly = None
3018         is_premium = None
3019         if initial_data and is_private is not None:
3020             is_membersonly = False
3021             is_premium = False
3022             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3023             badge_labels = set()
3024             for content in contents:
3025                 if not isinstance(content, dict):
3026                     continue
3027                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3028             for badge_label in badge_labels:
3029                 if badge_label.lower() == 'members only':
3030                     is_membersonly = True
3031                 elif badge_label.lower() == 'premium':
3032                     is_premium = True
3033                 elif badge_label.lower() == 'unlisted':
3034                     is_unlisted = True
3035
3036         info['availability'] = self._availability(
3037             is_private=is_private,
3038             needs_premium=is_premium,
3039             needs_subscription=is_membersonly,
3040             needs_auth=info['age_limit'] >= 18,
3041             is_unlisted=None if is_private is None else is_unlisted)
3042
3043         # get xsrf for annotations or comments
3044         get_annotations = self.get_param('writeannotations', False)
3045         get_comments = self.get_param('getcomments', False)
3046         if get_annotations or get_comments:
3047             xsrf_token = None
3048             if master_ytcfg:
3049                 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
3050             if not xsrf_token:
3051                 xsrf_token = self._search_regex(
3052                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3053                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
3054
3055         # annotations
3056         if get_annotations:
3057             invideo_url = get_first(
3058                 player_responses,
3059                 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3060                 expected_type=str)
3061             if xsrf_token and invideo_url:
3062                 xsrf_field_name = None
3063                 if master_ytcfg:
3064                     xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3065                 if not xsrf_field_name:
3066                     xsrf_field_name = self._search_regex(
3067                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3068                         webpage, 'xsrf field name',
3069                         group='xsrf_field_name', default='session_token')
3070                 info['annotations'] = self._download_webpage(
3071                     self._proto_relative_url(invideo_url),
3072                     video_id, note='Downloading annotations',
3073                     errnote='Unable to download video annotations', fatal=False,
3074                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3075
3076         if get_comments:
3077             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3078
3079         self.mark_watched(video_id, player_responses)
3080
3081         return info
3082
3083
3084 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3085     IE_DESC = 'YouTube.com tab'
3086     _VALID_URL = r'''(?x)
3087                     https?://
3088                         (?:\w+\.)?
3089                         (?:
3090                             youtube(?:kids)?\.com|
3091                             invidio\.us
3092                         )/
3093                         (?:
3094                             (?P<channel_type>channel|c|user|browse)/|
3095                             (?P<not_channel>
3096                                 feed/|hashtag/|
3097                                 (?:playlist|watch)\?.*?\blist=
3098                             )|
3099                             (?!(?:%s)\b)  # Direct URLs
3100                         )
3101                         (?P<id>[^/?\#&]+)
3102                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3103     IE_NAME = 'youtube:tab'
3104
3105     _TESTS = [{
3106         'note': 'playlists, multipage',
3107         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3108         'playlist_mincount': 94,
3109         'info_dict': {
3110             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3111             'title': 'Игорь Клейнер - Playlists',
3112             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3113             'uploader': 'Игорь Клейнер',
3114             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3115         },
3116     }, {
3117         'note': 'playlists, multipage, different order',
3118         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3119         'playlist_mincount': 94,
3120         'info_dict': {
3121             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3122             'title': 'Игорь Клейнер - Playlists',
3123             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3124             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3125             'uploader': 'Игорь Клейнер',
3126         },
3127     }, {
3128         'note': 'playlists, series',
3129         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3130         'playlist_mincount': 5,
3131         'info_dict': {
3132             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3133             'title': '3Blue1Brown - Playlists',
3134             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3135             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3136             'uploader': '3Blue1Brown',
3137         },
3138     }, {
3139         'note': 'playlists, singlepage',
3140         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3141         'playlist_mincount': 4,
3142         'info_dict': {
3143             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3144             'title': 'ThirstForScience - Playlists',
3145             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3146             'uploader': 'ThirstForScience',
3147             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3148         }
3149     }, {
3150         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3151         'only_matching': True,
3152     }, {
3153         'note': 'basic, single video playlist',
3154         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3155         'info_dict': {
3156             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3157             'uploader': 'Sergey M.',
3158             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3159             'title': 'youtube-dl public playlist',
3160         },
3161         'playlist_count': 1,
3162     }, {
3163         'note': 'empty playlist',
3164         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3165         'info_dict': {
3166             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3167             'uploader': 'Sergey M.',
3168             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3169             'title': 'youtube-dl empty playlist',
3170         },
3171         'playlist_count': 0,
3172     }, {
3173         'note': 'Home tab',
3174         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3175         'info_dict': {
3176             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3177             'title': 'lex will - Home',
3178             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3179             'uploader': 'lex will',
3180             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3181         },
3182         'playlist_mincount': 2,
3183     }, {
3184         'note': 'Videos tab',
3185         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3186         'info_dict': {
3187             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3188             'title': 'lex will - Videos',
3189             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3190             'uploader': 'lex will',
3191             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3192         },
3193         'playlist_mincount': 975,
3194     }, {
3195         'note': 'Videos tab, sorted by popular',
3196         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3197         'info_dict': {
3198             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3199             'title': 'lex will - Videos',
3200             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3201             'uploader': 'lex will',
3202             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3203         },
3204         'playlist_mincount': 199,
3205     }, {
3206         'note': 'Playlists tab',
3207         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3208         'info_dict': {
3209             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3210             'title': 'lex will - Playlists',
3211             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3212             'uploader': 'lex will',
3213             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3214         },
3215         'playlist_mincount': 17,
3216     }, {
3217         'note': 'Community tab',
3218         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3219         'info_dict': {
3220             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3221             'title': 'lex will - Community',
3222             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3223             'uploader': 'lex will',
3224             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3225         },
3226         'playlist_mincount': 18,
3227     }, {
3228         'note': 'Channels tab',
3229         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3230         'info_dict': {
3231             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3232             'title': 'lex will - Channels',
3233             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3234             'uploader': 'lex will',
3235             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3236         },
3237         'playlist_mincount': 12,
3238     }, {
3239         'note': 'Search tab',
3240         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3241         'playlist_mincount': 40,
3242         'info_dict': {
3243             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3244             'title': '3Blue1Brown - Search - linear algebra',
3245             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3246             'uploader': '3Blue1Brown',
3247             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3248         },
3249     }, {
3250         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3251         'only_matching': True,
3252     }, {
3253         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3254         'only_matching': True,
3255     }, {
3256         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3257         'only_matching': True,
3258     }, {
3259         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3260         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3261         'info_dict': {
3262             'title': '29C3: Not my department',
3263             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3264             'uploader': 'Christiaan008',
3265             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3266             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3267         },
3268         'playlist_count': 96,
3269     }, {
3270         'note': 'Large playlist',
3271         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3272         'info_dict': {
3273             'title': 'Uploads from Cauchemar',
3274             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3275             'uploader': 'Cauchemar',
3276             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3277         },
3278         'playlist_mincount': 1123,
3279     }, {
3280         'note': 'even larger playlist, 8832 videos',
3281         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3282         'only_matching': True,
3283     }, {
3284         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3285         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3286         'info_dict': {
3287             'title': 'Uploads from Interstellar Movie',
3288             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3289             'uploader': 'Interstellar Movie',
3290             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3291         },
3292         'playlist_mincount': 21,
3293     }, {
3294         'note': 'Playlist with "show unavailable videos" button',
3295         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3296         'info_dict': {
3297             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3298             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3299             'uploader': 'Phim Siêu Nhân Nhật Bản',
3300             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3301         },
3302         'playlist_mincount': 200,
3303     }, {
3304         'note': 'Playlist with unavailable videos in page 7',
3305         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3306         'info_dict': {
3307             'title': 'Uploads from BlankTV',
3308             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3309             'uploader': 'BlankTV',
3310             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3311         },
3312         'playlist_mincount': 1000,
3313     }, {
3314         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3315         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3316         'info_dict': {
3317             'title': 'Data Analysis with Dr Mike Pound',
3318             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3319             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3320             'uploader': 'Computerphile',
3321             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3322         },
3323         'playlist_mincount': 11,
3324     }, {
3325         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3326         'only_matching': True,
3327     }, {
3328         'note': 'Playlist URL that does not actually serve a playlist',
3329         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3330         'info_dict': {
3331             'id': 'FqZTN594JQw',
3332             'ext': 'webm',
3333             'title': "Smiley's People 01 detective, Adventure Series, Action",
3334             'uploader': 'STREEM',
3335             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3336             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3337             'upload_date': '20150526',
3338             'license': 'Standard YouTube License',
3339             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3340             'categories': ['People & Blogs'],
3341             'tags': list,
3342             'view_count': int,
3343             'like_count': int,
3344             'dislike_count': int,
3345         },
3346         'params': {
3347             'skip_download': True,
3348         },
3349         'skip': 'This video is not available.',
3350         'add_ie': [YoutubeIE.ie_key()],
3351     }, {
3352         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3353         'only_matching': True,
3354     }, {
3355         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3356         'only_matching': True,
3357     }, {
3358         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3359         'info_dict': {
3360             'id': 'FMtPN8yp5LU',  # This will keep changing
3361             'ext': 'mp4',
3362             'title': compat_str,
3363             'uploader': 'Sky News',
3364             'uploader_id': 'skynews',
3365             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3366             'upload_date': r're:\d{8}',
3367             'description': compat_str,
3368             'categories': ['News & Politics'],
3369             'tags': list,
3370             'like_count': int,
3371             'dislike_count': int,
3372         },
3373         'params': {
3374             'skip_download': True,
3375         },
3376         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3377     }, {
3378         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3379         'info_dict': {
3380             'id': 'a48o2S1cPoo',
3381             'ext': 'mp4',
3382             'title': 'The Young Turks - Live Main Show',
3383             'uploader': 'The Young Turks',
3384             'uploader_id': 'TheYoungTurks',
3385             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3386             'upload_date': '20150715',
3387             'license': 'Standard YouTube License',
3388             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3389             'categories': ['News & Politics'],
3390             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3391             'like_count': int,
3392             'dislike_count': int,
3393         },
3394         'params': {
3395             'skip_download': True,
3396         },
3397         'only_matching': True,
3398     }, {
3399         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3400         'only_matching': True,
3401     }, {
3402         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3403         'only_matching': True,
3404     }, {
3405         'note': 'A channel that is not live. Should raise error',
3406         'url': 'https://www.youtube.com/user/numberphile/live',
3407         'only_matching': True,
3408     }, {
3409         'url': 'https://www.youtube.com/feed/trending',
3410         'only_matching': True,
3411     }, {
3412         'url': 'https://www.youtube.com/feed/library',
3413         'only_matching': True,
3414     }, {
3415         'url': 'https://www.youtube.com/feed/history',
3416         'only_matching': True,
3417     }, {
3418         'url': 'https://www.youtube.com/feed/subscriptions',
3419         'only_matching': True,
3420     }, {
3421         'url': 'https://www.youtube.com/feed/watch_later',
3422         'only_matching': True,
3423     }, {
3424         'note': 'Recommended - redirects to home page',
3425         'url': 'https://www.youtube.com/feed/recommended',
3426         'only_matching': True,
3427     }, {
3428         'note': 'inline playlist with not always working continuations',
3429         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3430         'only_matching': True,
3431     }, {
3432         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3433         'only_matching': True,
3434     }, {
3435         'url': 'https://www.youtube.com/course',
3436         'only_matching': True,
3437     }, {
3438         'url': 'https://www.youtube.com/zsecurity',
3439         'only_matching': True,
3440     }, {
3441         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3442         'only_matching': True,
3443     }, {
3444         'url': 'https://www.youtube.com/TheYoungTurks/live',
3445         'only_matching': True,
3446     }, {
3447         'url': 'https://www.youtube.com/hashtag/cctv9',
3448         'info_dict': {
3449             'id': 'cctv9',
3450             'title': '#cctv9',
3451         },
3452         'playlist_mincount': 350,
3453     }, {
3454         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3455         'only_matching': True,
3456     }, {
3457         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3458         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3459         'only_matching': True
3460     }, {
3461         'note': '/browse/ should redirect to /channel/',
3462         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3463         'only_matching': True
3464     }, {
3465         'note': 'VLPL, should redirect to playlist?list=PL...',
3466         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3467         'info_dict': {
3468             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3469             'uploader': 'NoCopyrightSounds',
3470             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3471             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3472             'title': 'NCS Releases',
3473         },
3474         'playlist_mincount': 166,
3475     }, {
3476         'note': 'Topic, should redirect to playlist?list=UU...',
3477         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3478         'info_dict': {
3479             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3480             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3481             'title': 'Uploads from Royalty Free Music - Topic',
3482             'uploader': 'Royalty Free Music - Topic',
3483         },
3484         'expected_warnings': [
3485             'A channel/user page was given',
3486             'The URL does not have a videos tab',
3487         ],
3488         'playlist_mincount': 101,
3489     }, {
3490         'note': 'Topic without a UU playlist',
3491         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3492         'info_dict': {
3493             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3494             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3495         },
3496         'expected_warnings': [
3497             'A channel/user page was given',
3498             'The URL does not have a videos tab',
3499             'Falling back to channel URL',
3500         ],
3501         'playlist_mincount': 9,
3502     }, {
3503         'note': 'Youtube music Album',
3504         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3505         'info_dict': {
3506             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3507             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3508         },
3509         'playlist_count': 50,
3510     }, {
3511         'note': 'unlisted single video playlist',
3512         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3513         'info_dict': {
3514             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3515             'uploader': 'colethedj',
3516             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3517             'title': 'yt-dlp unlisted playlist test',
3518             'availability': 'unlisted'
3519         },
3520         'playlist_count': 1,
3521     }]
3522
3523     @classmethod
3524     def suitable(cls, url):
3525         return False if YoutubeIE.suitable(url) else super(
3526             YoutubeTabIE, cls).suitable(url)
3527
3528     def _extract_channel_id(self, webpage):
3529         channel_id = self._html_search_meta(
3530             'channelId', webpage, 'channel id', default=None)
3531         if channel_id:
3532             return channel_id
3533         channel_url = self._html_search_meta(
3534             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3535              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3536              'twitter:app:url:googleplay'), webpage, 'channel url')
3537         return self._search_regex(
3538             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3539             channel_url, 'channel id')
3540
3541     @staticmethod
3542     def _extract_basic_item_renderer(item):
3543         # Modified from _extract_grid_item_renderer
3544         known_basic_renderers = (
3545             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3546         )
3547         for key, renderer in item.items():
3548             if not isinstance(renderer, dict):
3549                 continue
3550             elif key in known_basic_renderers:
3551                 return renderer
3552             elif key.startswith('grid') and key.endswith('Renderer'):
3553                 return renderer
3554
3555     def _grid_entries(self, grid_renderer):
3556         for item in grid_renderer['items']:
3557             if not isinstance(item, dict):
3558                 continue
3559             renderer = self._extract_basic_item_renderer(item)
3560             if not isinstance(renderer, dict):
3561                 continue
3562             title = self._get_text(renderer.get('title'))
3563
3564             # playlist
3565             playlist_id = renderer.get('playlistId')
3566             if playlist_id:
3567                 yield self.url_result(
3568                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3569                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3570                     video_title=title)
3571                 continue
3572             # video
3573             video_id = renderer.get('videoId')
3574             if video_id:
3575                 yield self._extract_video(renderer)
3576                 continue
3577             # channel
3578             channel_id = renderer.get('channelId')
3579             if channel_id:
3580                 yield self.url_result(
3581                     'https://www.youtube.com/channel/%s' % channel_id,
3582                     ie=YoutubeTabIE.ie_key(), video_title=title)
3583                 continue
3584             # generic endpoint URL support
3585             ep_url = urljoin('https://www.youtube.com/', try_get(
3586                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3587                 compat_str))
3588             if ep_url:
3589                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3590                     if ie.suitable(ep_url):
3591                         yield self.url_result(
3592                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3593                         break
3594
3595     def _shelf_entries_from_content(self, shelf_renderer):
3596         content = shelf_renderer.get('content')
3597         if not isinstance(content, dict):
3598             return
3599         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3600         if renderer:
3601             # TODO: add support for nested playlists so each shelf is processed
3602             # as separate playlist
3603             # TODO: this includes only first N items
3604             for entry in self._grid_entries(renderer):
3605                 yield entry
3606         renderer = content.get('horizontalListRenderer')
3607         if renderer:
3608             # TODO
3609             pass
3610
3611     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3612         ep = try_get(
3613             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3614             compat_str)
3615         shelf_url = urljoin('https://www.youtube.com', ep)
3616         if shelf_url:
3617             # Skipping links to another channels, note that checking for
3618             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3619             # will not work
3620             if skip_channels and '/channels?' in shelf_url:
3621                 return
3622             title = self._get_text(shelf_renderer, lambda x: x['title'])
3623             yield self.url_result(shelf_url, video_title=title)
3624         # Shelf may not contain shelf URL, fallback to extraction from content
3625         for entry in self._shelf_entries_from_content(shelf_renderer):
3626             yield entry
3627
3628     def _playlist_entries(self, video_list_renderer):
3629         for content in video_list_renderer['contents']:
3630             if not isinstance(content, dict):
3631                 continue
3632             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3633             if not isinstance(renderer, dict):
3634                 continue
3635             video_id = renderer.get('videoId')
3636             if not video_id:
3637                 continue
3638             yield self._extract_video(renderer)
3639
3640     def _rich_entries(self, rich_grid_renderer):
3641         renderer = try_get(
3642             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3643         video_id = renderer.get('videoId')
3644         if not video_id:
3645             return
3646         yield self._extract_video(renderer)
3647
3648     def _video_entry(self, video_renderer):
3649         video_id = video_renderer.get('videoId')
3650         if video_id:
3651             return self._extract_video(video_renderer)
3652
3653     def _post_thread_entries(self, post_thread_renderer):
3654         post_renderer = try_get(
3655             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3656         if not post_renderer:
3657             return
3658         # video attachment
3659         video_renderer = try_get(
3660             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3661         video_id = video_renderer.get('videoId')
3662         if video_id:
3663             entry = self._extract_video(video_renderer)
3664             if entry:
3665                 yield entry
3666         # playlist attachment
3667         playlist_id = try_get(
3668             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3669         if playlist_id:
3670             yield self.url_result(
3671                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3672                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3673         # inline video links
3674         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3675         for run in runs:
3676             if not isinstance(run, dict):
3677                 continue
3678             ep_url = try_get(
3679                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3680             if not ep_url:
3681                 continue
3682             if not YoutubeIE.suitable(ep_url):
3683                 continue
3684             ep_video_id = YoutubeIE._match_id(ep_url)
3685             if video_id == ep_video_id:
3686                 continue
3687             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3688
3689     def _post_thread_continuation_entries(self, post_thread_continuation):
3690         contents = post_thread_continuation.get('contents')
3691         if not isinstance(contents, list):
3692             return
3693         for content in contents:
3694             renderer = content.get('backstagePostThreadRenderer')
3695             if not isinstance(renderer, dict):
3696                 continue
3697             for entry in self._post_thread_entries(renderer):
3698                 yield entry
3699
3700     r''' # unused
3701     def _rich_grid_entries(self, contents):
3702         for content in contents:
3703             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3704             if video_renderer:
3705                 entry = self._video_entry(video_renderer)
3706                 if entry:
3707                     yield entry
3708     '''
3709     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3710
3711         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3712             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3713             for content in contents:
3714                 if not isinstance(content, dict):
3715                     continue
3716                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3717                 if not is_renderer:
3718                     renderer = content.get('richItemRenderer')
3719                     if renderer:
3720                         for entry in self._rich_entries(renderer):
3721                             yield entry
3722                         continuation_list[0] = self._extract_continuation(parent_renderer)
3723                     continue
3724                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3725                 for isr_content in isr_contents:
3726                     if not isinstance(isr_content, dict):
3727                         continue
3728
3729                     known_renderers = {
3730                         'playlistVideoListRenderer': self._playlist_entries,
3731                         'gridRenderer': self._grid_entries,
3732                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3733                         'backstagePostThreadRenderer': self._post_thread_entries,
3734                         'videoRenderer': lambda x: [self._video_entry(x)],
3735                     }
3736                     for key, renderer in isr_content.items():
3737                         if key not in known_renderers:
3738                             continue
3739                         for entry in known_renderers[key](renderer):
3740                             if entry:
3741                                 yield entry
3742                         continuation_list[0] = self._extract_continuation(renderer)
3743                         break
3744
3745                 if not continuation_list[0]:
3746                     continuation_list[0] = self._extract_continuation(is_renderer)
3747
3748             if not continuation_list[0]:
3749                 continuation_list[0] = self._extract_continuation(parent_renderer)
3750
3751         continuation_list = [None]  # Python 2 doesnot support nonlocal
3752         tab_content = try_get(tab, lambda x: x['content'], dict)
3753         if not tab_content:
3754             return
3755         parent_renderer = (
3756             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3757             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3758         for entry in extract_entries(parent_renderer):
3759             yield entry
3760         continuation = continuation_list[0]
3761         visitor_data = None
3762
3763         for page_num in itertools.count(1):
3764             if not continuation:
3765                 break
3766             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3767             response = self._extract_response(
3768                 item_id='%s page %s' % (item_id, page_num),
3769                 query=continuation, headers=headers, ytcfg=ytcfg,
3770                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3771
3772             if not response:
3773                 break
3774             visitor_data = try_get(
3775                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3776
3777             known_continuation_renderers = {
3778                 'playlistVideoListContinuation': self._playlist_entries,
3779                 'gridContinuation': self._grid_entries,
3780                 'itemSectionContinuation': self._post_thread_continuation_entries,
3781                 'sectionListContinuation': extract_entries,  # for feeds
3782             }
3783             continuation_contents = try_get(
3784                 response, lambda x: x['continuationContents'], dict) or {}
3785             continuation_renderer = None
3786             for key, value in continuation_contents.items():
3787                 if key not in known_continuation_renderers:
3788                     continue
3789                 continuation_renderer = value
3790                 continuation_list = [None]
3791                 for entry in known_continuation_renderers[key](continuation_renderer):
3792                     yield entry
3793                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3794                 break
3795             if continuation_renderer:
3796                 continue
3797
3798             known_renderers = {
3799                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3800                 'gridVideoRenderer': (self._grid_entries, 'items'),
3801                 'gridChannelRenderer': (self._grid_entries, 'items'),
3802                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3803                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3804                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3805                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3806             }
3807             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3808             continuation_items = try_get(
3809                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3810             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3811             video_items_renderer = None
3812             for key, value in continuation_item.items():
3813                 if key not in known_renderers:
3814                     continue
3815                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3816                 continuation_list = [None]
3817                 for entry in known_renderers[key][0](video_items_renderer):
3818                     yield entry
3819                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3820                 break
3821             if video_items_renderer:
3822                 continue
3823             break
3824
3825     @staticmethod
3826     def _extract_selected_tab(tabs):
3827         for tab in tabs:
3828             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3829             if renderer.get('selected') is True:
3830                 return renderer
3831         else:
3832             raise ExtractorError('Unable to find selected tab')
3833
3834     @classmethod
3835     def _extract_uploader(cls, data):
3836         uploader = {}
3837         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3838         owner = try_get(
3839             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3840         if owner:
3841             uploader['uploader'] = owner.get('text')
3842             uploader['uploader_id'] = try_get(
3843                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3844             uploader['uploader_url'] = urljoin(
3845                 'https://www.youtube.com/',
3846                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3847         return {k: v for k, v in uploader.items() if v is not None}
3848
3849     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3850         playlist_id = title = description = channel_url = channel_name = channel_id = None
3851         thumbnails_list = tags = []
3852
3853         selected_tab = self._extract_selected_tab(tabs)
3854         renderer = try_get(
3855             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3856         if renderer:
3857             channel_name = renderer.get('title')
3858             channel_url = renderer.get('channelUrl')
3859             channel_id = renderer.get('externalId')
3860         else:
3861             renderer = try_get(
3862                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3863
3864         if renderer:
3865             title = renderer.get('title')
3866             description = renderer.get('description', '')
3867             playlist_id = channel_id
3868             tags = renderer.get('keywords', '').split()
3869             thumbnails_list = (
3870                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3871                 or try_get(
3872                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3873                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3874                     list)
3875                 or [])
3876
3877         thumbnails = []
3878         for t in thumbnails_list:
3879             if not isinstance(t, dict):
3880                 continue
3881             thumbnail_url = url_or_none(t.get('url'))
3882             if not thumbnail_url:
3883                 continue
3884             thumbnails.append({
3885                 'url': thumbnail_url,
3886                 'width': int_or_none(t.get('width')),
3887                 'height': int_or_none(t.get('height')),
3888             })
3889         if playlist_id is None:
3890             playlist_id = item_id
3891         if title is None:
3892             title = (
3893                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3894                 or playlist_id)
3895         title += format_field(selected_tab, 'title', ' - %s')
3896         title += format_field(selected_tab, 'expandedText', ' - %s')
3897         metadata = {
3898             'playlist_id': playlist_id,
3899             'playlist_title': title,
3900             'playlist_description': description,
3901             'uploader': channel_name,
3902             'uploader_id': channel_id,
3903             'uploader_url': channel_url,
3904             'thumbnails': thumbnails,
3905             'tags': tags,
3906         }
3907         availability = self._extract_availability(data)
3908         if availability:
3909             metadata['availability'] = availability
3910         if not channel_id:
3911             metadata.update(self._extract_uploader(data))
3912         metadata.update({
3913             'channel': metadata['uploader'],
3914             'channel_id': metadata['uploader_id'],
3915             'channel_url': metadata['uploader_url']})
3916         ytcfg = self.extract_ytcfg(item_id, webpage)
3917         return self.playlist_result(
3918             self._entries(
3919                 selected_tab, playlist_id,
3920                 self._extract_identity_token(webpage, item_id),
3921                 self._extract_account_syncid(ytcfg, data), ytcfg),
3922             **metadata)
3923
3924     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3925         first_id = last_id = None
3926         ytcfg = self.extract_ytcfg(playlist_id, webpage)
3927         headers = self.generate_api_headers(
3928             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3929             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
3930         for page_num in itertools.count(1):
3931             videos = list(self._playlist_entries(playlist))
3932             if not videos:
3933                 return
3934             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3935             if start >= len(videos):
3936                 return
3937             for video in videos[start:]:
3938                 if video['id'] == first_id:
3939                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3940                     return
3941                 yield video
3942             first_id = first_id or videos[0]['id']
3943             last_id = videos[-1]['id']
3944             watch_endpoint = try_get(
3945                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3946             query = {
3947                 'playlistId': playlist_id,
3948                 'videoId': watch_endpoint.get('videoId') or last_id,
3949                 'index': watch_endpoint.get('index') or len(videos),
3950                 'params': watch_endpoint.get('params') or 'OAE%3D'
3951             }
3952             response = self._extract_response(
3953                 item_id='%s page %d' % (playlist_id, page_num),
3954                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3955                 check_get_keys='contents'
3956             )
3957             playlist = try_get(
3958                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3959
3960     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3961         title = playlist.get('title') or try_get(
3962             data, lambda x: x['titleText']['simpleText'], compat_str)
3963         playlist_id = playlist.get('playlistId') or item_id
3964
3965         # Delegating everything except mix playlists to regular tab-based playlist URL
3966         playlist_url = urljoin(url, try_get(
3967             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3968             compat_str))
3969         if playlist_url and playlist_url != url:
3970             return self.url_result(
3971                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3972                 video_title=title)
3973
3974         return self.playlist_result(
3975             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
3976             playlist_id=playlist_id, playlist_title=title)
3977
3978     def _extract_availability(self, data):
3979         """
3980         Gets the availability of a given playlist/tab.
3981         Note: Unless YouTube tells us explicitly, we do not assume it is public
3982         @param data: response
3983         """
3984         is_private = is_unlisted = None
3985         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3986         badge_labels = self._extract_badges(renderer)
3987
3988         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3989         privacy_dropdown_entries = try_get(
3990             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3991         for renderer_dict in privacy_dropdown_entries:
3992             is_selected = try_get(
3993                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3994             if not is_selected:
3995                 continue
3996             label = self._get_text(
3997                 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
3998             if label:
3999                 badge_labels.add(label.lower())
4000                 break
4001
4002         for badge_label in badge_labels:
4003             if badge_label == 'unlisted':
4004                 is_unlisted = True
4005             elif badge_label == 'private':
4006                 is_private = True
4007             elif badge_label == 'public':
4008                 is_unlisted = is_private = False
4009         return self._availability(is_private, False, False, False, is_unlisted)
4010
4011     @staticmethod
4012     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4013         sidebar_renderer = try_get(
4014             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4015         for item in sidebar_renderer:
4016             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4017             if renderer:
4018                 return renderer
4019
4020     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4021         """
4022         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4023         """
4024         browse_id = params = None
4025         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4026         if not renderer:
4027             return
4028         menu_renderer = try_get(
4029             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4030         for menu_item in menu_renderer:
4031             if not isinstance(menu_item, dict):
4032                 continue
4033             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4034             text = try_get(
4035                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4036             if not text or text.lower() != 'show unavailable videos':
4037                 continue
4038             browse_endpoint = try_get(
4039                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4040             browse_id = browse_endpoint.get('browseId')
4041             params = browse_endpoint.get('params')
4042             break
4043
4044         ytcfg = self.extract_ytcfg(item_id, webpage)
4045         headers = self.generate_api_headers(
4046             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4047             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4048             visitor_data=try_get(
4049                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4050         query = {
4051             'params': params or 'wgYCCAA=',
4052             'browseId': browse_id or 'VL%s' % item_id
4053         }
4054         return self._extract_response(
4055             item_id=item_id, headers=headers, query=query,
4056             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4057             note='Downloading API JSON with unavailable videos')
4058
4059     def _extract_webpage(self, url, item_id):
4060         retries = self.get_param('extractor_retries', 3)
4061         count = -1
4062         last_error = 'Incomplete yt initial data recieved'
4063         while count < retries:
4064             count += 1
4065             # Sometimes youtube returns a webpage with incomplete ytInitialData
4066             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4067             if count:
4068                 self.report_warning('%s. Retrying ...' % last_error)
4069             webpage = self._download_webpage(
4070                 url, item_id,
4071                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4072             data = self.extract_yt_initial_data(item_id, webpage)
4073             if data.get('contents') or data.get('currentVideoEndpoint'):
4074                 break
4075             # Extract alerts here only when there is error
4076             self._extract_and_report_alerts(data)
4077             if count >= retries:
4078                 raise ExtractorError(last_error)
4079         return webpage, data
4080
4081     @staticmethod
4082     def _smuggle_data(entries, data):
4083         for entry in entries:
4084             if data:
4085                 entry['url'] = smuggle_url(entry['url'], data)
4086             yield entry
4087
4088     def _real_extract(self, url):
4089         url, smuggled_data = unsmuggle_url(url, {})
4090         if self.is_music_url(url):
4091             smuggled_data['is_music_url'] = True
4092         info_dict = self.__real_extract(url, smuggled_data)
4093         if info_dict.get('entries'):
4094             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4095         return info_dict
4096
4097     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4098
4099     def __real_extract(self, url, smuggled_data):
4100         item_id = self._match_id(url)
4101         url = compat_urlparse.urlunparse(
4102             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4103         compat_opts = self.get_param('compat_opts', [])
4104
4105         def get_mobj(url):
4106             mobj = self._url_re.match(url).groupdict()
4107             mobj.update((k, '') for k, v in mobj.items() if v is None)
4108             return mobj
4109
4110         mobj = get_mobj(url)
4111         # Youtube returns incomplete data if tabname is not lower case
4112         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4113
4114         if is_channel:
4115             if smuggled_data.get('is_music_url'):
4116                 if item_id[:2] == 'VL':
4117                     # Youtube music VL channels have an equivalent playlist
4118                     item_id = item_id[2:]
4119                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4120                 elif item_id[:2] == 'MP':
4121                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4122                     item_id = self._search_regex(
4123                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4124                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4125                         'playlist id')
4126                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4127                 elif mobj['channel_type'] == 'browse':
4128                     # Youtube music /browse/ should be changed to /channel/
4129                     pre = 'https://www.youtube.com/channel/%s' % item_id
4130         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4131             # Home URLs should redirect to /videos/
4132             self.report_warning(
4133                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4134                 'To download only the videos in the home page, add a "/featured" to the URL')
4135             tab = '/videos'
4136
4137         url = ''.join((pre, tab, post))
4138         mobj = get_mobj(url)
4139
4140         # Handle both video/playlist URLs
4141         qs = parse_qs(url)
4142         video_id = qs.get('v', [None])[0]
4143         playlist_id = qs.get('list', [None])[0]
4144
4145         if not video_id and mobj['not_channel'].startswith('watch'):
4146             if not playlist_id:
4147                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4148                 raise ExtractorError('Unable to recognize tab page')
4149             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4150             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4151             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4152             mobj = get_mobj(url)
4153
4154         if video_id and playlist_id:
4155             if self.get_param('noplaylist'):
4156                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4157                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4158             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4159
4160         webpage, data = self._extract_webpage(url, item_id)
4161
4162         tabs = try_get(
4163             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4164         if tabs:
4165             selected_tab = self._extract_selected_tab(tabs)
4166             tab_name = selected_tab.get('title', '')
4167             if 'no-youtube-channel-redirect' not in compat_opts:
4168                 if mobj['tab'] == '/live':
4169                     # Live tab should have redirected to the video
4170                     raise ExtractorError('The channel is not currently live', expected=True)
4171                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4172                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4173                         # Topic channels don't have /videos. Use the equivalent playlist instead
4174                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4175                         pl_id = 'UU%s' % item_id[2:]
4176                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4177                         try:
4178                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4179                             for alert_type, alert_message in self._extract_alerts(pl_data):
4180                                 if alert_type == 'error':
4181                                     raise ExtractorError('Youtube said: %s' % alert_message)
4182                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4183                         except ExtractorError:
4184                             self.report_warning('The playlist gave error. Falling back to channel URL')
4185                     else:
4186                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4187
4188         self.write_debug('Final URL: %s' % url)
4189
4190         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4191         if 'no-youtube-unavailable-videos' not in compat_opts:
4192             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4193         self._extract_and_report_alerts(data)
4194         tabs = try_get(
4195             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4196         if tabs:
4197             return self._extract_from_tabs(item_id, webpage, data, tabs)
4198
4199         playlist = try_get(
4200             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4201         if playlist:
4202             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4203
4204         video_id = try_get(
4205             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4206             compat_str) or video_id
4207         if video_id:
4208             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4209                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4210             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4211
4212         raise ExtractorError('Unable to recognize tab page')
4213
4214
4215 class YoutubePlaylistIE(InfoExtractor):
4216     IE_DESC = 'YouTube.com playlists'
4217     _VALID_URL = r'''(?x)(?:
4218                         (?:https?://)?
4219                         (?:\w+\.)?
4220                         (?:
4221                             (?:
4222                                 youtube(?:kids)?\.com|
4223                                 invidio\.us
4224                             )
4225                             /.*?\?.*?\blist=
4226                         )?
4227                         (?P<id>%(playlist_id)s)
4228                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4229     IE_NAME = 'youtube:playlist'
4230     _TESTS = [{
4231         'note': 'issue #673',
4232         'url': 'PLBB231211A4F62143',
4233         'info_dict': {
4234             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4235             'id': 'PLBB231211A4F62143',
4236             'uploader': 'Wickydoo',
4237             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4238             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4239         },
4240         'playlist_mincount': 29,
4241     }, {
4242         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4243         'info_dict': {
4244             'title': 'YDL_safe_search',
4245             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4246         },
4247         'playlist_count': 2,
4248         'skip': 'This playlist is private',
4249     }, {
4250         'note': 'embedded',
4251         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4252         'playlist_count': 4,
4253         'info_dict': {
4254             'title': 'JODA15',
4255             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4256             'uploader': 'milan',
4257             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4258         }
4259     }, {
4260         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4261         'playlist_mincount': 654,
4262         'info_dict': {
4263             'title': '2018 Chinese New Singles (11/6 updated)',
4264             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4265             'uploader': 'LBK',
4266             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4267             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4268         }
4269     }, {
4270         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4271         'only_matching': True,
4272     }, {
4273         # music album playlist
4274         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4275         'only_matching': True,
4276     }]
4277
4278     @classmethod
4279     def suitable(cls, url):
4280         if YoutubeTabIE.suitable(url):
4281             return False
4282         # Hack for lazy extractors until more generic solution is implemented
4283         # (see #28780)
4284         from .youtube import parse_qs
4285         qs = parse_qs(url)
4286         if qs.get('v', [None])[0]:
4287             return False
4288         return super(YoutubePlaylistIE, cls).suitable(url)
4289
4290     def _real_extract(self, url):
4291         playlist_id = self._match_id(url)
4292         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4293         url = update_url_query(
4294             'https://www.youtube.com/playlist',
4295             parse_qs(url) or {'list': playlist_id})
4296         if is_music_url:
4297             url = smuggle_url(url, {'is_music_url': True})
4298         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4299
4300
4301 class YoutubeYtBeIE(InfoExtractor):
4302     IE_DESC = 'youtu.be'
4303     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4304     _TESTS = [{
4305         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4306         'info_dict': {
4307             'id': 'yeWKywCrFtk',
4308             'ext': 'mp4',
4309             'title': 'Small Scale Baler and Braiding Rugs',
4310             'uploader': 'Backus-Page House Museum',
4311             'uploader_id': 'backuspagemuseum',
4312             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4313             'upload_date': '20161008',
4314             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4315             'categories': ['Nonprofits & Activism'],
4316             'tags': list,
4317             'like_count': int,
4318             'dislike_count': int,
4319         },
4320         'params': {
4321             'noplaylist': True,
4322             'skip_download': True,
4323         },
4324     }, {
4325         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4326         'only_matching': True,
4327     }]
4328
4329     def _real_extract(self, url):
4330         mobj = re.match(self._VALID_URL, url)
4331         video_id = mobj.group('id')
4332         playlist_id = mobj.group('playlist_id')
4333         return self.url_result(
4334             update_url_query('https://www.youtube.com/watch', {
4335                 'v': video_id,
4336                 'list': playlist_id,
4337                 'feature': 'youtu.be',
4338             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4339
4340
4341 class YoutubeYtUserIE(InfoExtractor):
4342     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4343     _VALID_URL = r'ytuser:(?P<id>.+)'
4344     _TESTS = [{
4345         'url': 'ytuser:phihag',
4346         'only_matching': True,
4347     }]
4348
4349     def _real_extract(self, url):
4350         user_id = self._match_id(url)
4351         return self.url_result(
4352             'https://www.youtube.com/user/%s' % user_id,
4353             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4354
4355
4356 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4357     IE_NAME = 'youtube:favorites'
4358     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4359     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4360     _LOGIN_REQUIRED = True
4361     _TESTS = [{
4362         'url': ':ytfav',
4363         'only_matching': True,
4364     }, {
4365         'url': ':ytfavorites',
4366         'only_matching': True,
4367     }]
4368
4369     def _real_extract(self, url):
4370         return self.url_result(
4371             'https://www.youtube.com/playlist?list=LL',
4372             ie=YoutubeTabIE.ie_key())
4373
4374
4375 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4376     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4377     # there doesn't appear to be a real limit, for example if you search for
4378     # 'python' you get more than 8.000.000 results
4379     _MAX_RESULTS = float('inf')
4380     IE_NAME = 'youtube:search'
4381     _SEARCH_KEY = 'ytsearch'
4382     _SEARCH_PARAMS = None
4383     _TESTS = []
4384
4385     def _entries(self, query, n):
4386         data = {'query': query}
4387         if self._SEARCH_PARAMS:
4388             data['params'] = self._SEARCH_PARAMS
4389         total = 0
4390         continuation = {}
4391         for page_num in itertools.count(1):
4392             data.update(continuation)
4393             search = self._extract_response(
4394                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4395                 check_get_keys=('contents', 'onResponseReceivedCommands')
4396             )
4397             if not search:
4398                 break
4399             slr_contents = try_get(
4400                 search,
4401                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4402                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4403                 list)
4404             if not slr_contents:
4405                 break
4406
4407             # Youtube sometimes adds promoted content to searches,
4408             # changing the index location of videos and token.
4409             # So we search through all entries till we find them.
4410             continuation = None
4411             for slr_content in slr_contents:
4412                 if not continuation:
4413                     continuation = self._extract_continuation({'contents': [slr_content]})
4414
4415                 isr_contents = try_get(
4416                     slr_content,
4417                     lambda x: x['itemSectionRenderer']['contents'],
4418                     list)
4419                 if not isr_contents:
4420                     continue
4421                 for content in isr_contents:
4422                     if not isinstance(content, dict):
4423                         continue
4424                     video = content.get('videoRenderer')
4425                     if not isinstance(video, dict):
4426                         continue
4427                     video_id = video.get('videoId')
4428                     if not video_id:
4429                         continue
4430
4431                     yield self._extract_video(video)
4432                     total += 1
4433                     if total == n:
4434                         return
4435
4436             if not continuation:
4437                 break
4438
4439     def _get_n_results(self, query, n):
4440         """Get a specified number of results for a query"""
4441         return self.playlist_result(self._entries(query, n), query, query)
4442
4443
4444 class YoutubeSearchDateIE(YoutubeSearchIE):
4445     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4446     _SEARCH_KEY = 'ytsearchdate'
4447     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4448     _SEARCH_PARAMS = 'CAI%3D'
4449
4450
4451 class YoutubeSearchURLIE(YoutubeSearchIE):
4452     IE_DESC = 'YouTube.com search URLs'
4453     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4454     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4455     # _MAX_RESULTS = 100
4456     _TESTS = [{
4457         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4458         'playlist_mincount': 5,
4459         'info_dict': {
4460             'id': 'youtube-dl test video',
4461             'title': 'youtube-dl test video',
4462         }
4463     }, {
4464         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4465         'only_matching': True,
4466     }]
4467
4468     @classmethod
4469     def _make_valid_url(cls):
4470         return cls._VALID_URL
4471
4472     def _real_extract(self, url):
4473         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4474         query = (qs.get('search_query') or qs.get('q'))[0]
4475         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4476         return self._get_n_results(query, self._MAX_RESULTS)
4477
4478
4479 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4480     """
4481     Base class for feed extractors
4482     Subclasses must define the _FEED_NAME property.
4483     """
4484     _LOGIN_REQUIRED = True
4485     _TESTS = []
4486
4487     @property
4488     def IE_NAME(self):
4489         return 'youtube:%s' % self._FEED_NAME
4490
4491     def _real_extract(self, url):
4492         return self.url_result(
4493             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4494             ie=YoutubeTabIE.ie_key())
4495
4496
4497 class YoutubeWatchLaterIE(InfoExtractor):
4498     IE_NAME = 'youtube:watchlater'
4499     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4500     _VALID_URL = r':ytwatchlater'
4501     _TESTS = [{
4502         'url': ':ytwatchlater',
4503         'only_matching': True,
4504     }]
4505
4506     def _real_extract(self, url):
4507         return self.url_result(
4508             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4509
4510
4511 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4512     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4513     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4514     _FEED_NAME = 'recommended'
4515     _LOGIN_REQUIRED = False
4516     _TESTS = [{
4517         'url': ':ytrec',
4518         'only_matching': True,
4519     }, {
4520         'url': ':ytrecommended',
4521         'only_matching': True,
4522     }, {
4523         'url': 'https://youtube.com',
4524         'only_matching': True,
4525     }]
4526
4527
4528 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4529     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4530     _VALID_URL = r':ytsub(?:scription)?s?'
4531     _FEED_NAME = 'subscriptions'
4532     _TESTS = [{
4533         'url': ':ytsubs',
4534         'only_matching': True,
4535     }, {
4536         'url': ':ytsubscriptions',
4537         'only_matching': True,
4538     }]
4539
4540
4541 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4542     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4543     _VALID_URL = r':ythis(?:tory)?'
4544     _FEED_NAME = 'history'
4545     _TESTS = [{
4546         'url': ':ythistory',
4547         'only_matching': True,
4548     }]
4549
4550
4551 class YoutubeTruncatedURLIE(InfoExtractor):
4552     IE_NAME = 'youtube:truncated_url'
4553     IE_DESC = False  # Do not list
4554     _VALID_URL = r'''(?x)
4555         (?:https?://)?
4556         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4557         (?:watch\?(?:
4558             feature=[a-z_]+|
4559             annotation_id=annotation_[^&]+|
4560             x-yt-cl=[0-9]+|
4561             hl=[^&]*|
4562             t=[0-9]+
4563         )?
4564         |
4565             attribution_link\?a=[^&]+
4566         )
4567         $
4568     '''
4569
4570     _TESTS = [{
4571         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4572         'only_matching': True,
4573     }, {
4574         'url': 'https://www.youtube.com/watch?',
4575         'only_matching': True,
4576     }, {
4577         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4578         'only_matching': True,
4579     }, {
4580         'url': 'https://www.youtube.com/watch?feature=foo',
4581         'only_matching': True,
4582     }, {
4583         'url': 'https://www.youtube.com/watch?hl=en-GB',
4584         'only_matching': True,
4585     }, {
4586         'url': 'https://www.youtube.com/watch?t=2372',
4587         'only_matching': True,
4588     }]
4589
4590     def _real_extract(self, url):
4591         raise ExtractorError(
4592             'Did you forget to quote the URL? Remember that & is a meta '
4593             'character in most shells, so you want to put the URL in quotes, '
4594             'like  youtube-dl '
4595             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4596             ' or simply  youtube-dl BaW_jenozKc  .',
4597             expected=True)
4598
4599
4600 class YoutubeTruncatedIDIE(InfoExtractor):
4601     IE_NAME = 'youtube:truncated_id'
4602     IE_DESC = False  # Do not list
4603     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4604
4605     _TESTS = [{
4606         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4607         'only_matching': True,
4608     }]
4609
4610     def _real_extract(self, url):
4611         video_id = self._match_id(url)
4612         raise ExtractorError(
4613             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4614             expected=True)