yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     mimetype2ext,
  42     network_exceptions,
  43     orderedSet,
  44     parse_codecs,
  45     parse_count,
  46     parse_duration,
  47     parse_iso8601,
  48     qualities,
  49     remove_start,
  50     smuggle_url,
  51     str_or_none,
  52     str_to_int,
  53     traverse_obj,
  54     try_get,
  55     unescapeHTML,
  56     unified_strdate,
  57     unsmuggle_url,
  58     update_url_query,
  59     url_or_none,
  60     urlencode_postdata,
  61     urljoin,
  62     variadic,
  63 )
  64
  65
  66 def parse_qs(url):
  67     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  68
  69
  70 class YoutubeBaseInfoExtractor(InfoExtractor):
  71     """Provide base functions for Youtube extractors"""
  72     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  73     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  74
  75     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  76     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  77     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  78
  79     _RESERVED_NAMES = (
  80         r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
  81         r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
  82         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
  83
  84     _NETRC_MACHINE = 'youtube'
  85     # If True it will raise an error if no login info is provided
  86     _LOGIN_REQUIRED = False
  87
  88     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  89
  90     def _login(self):
  91         """
  92         Attempt to log in to YouTube.
  93         True is returned if successful or skipped.
  94         False is returned if login failed.
  95
  96         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  97         """
  98
  99         def warn(message):
 100             self.report_warning(message)
 101
 102         # username+password login is broken
 103         if (self._LOGIN_REQUIRED
 104                 and self.get_param('cookiefile') is None
 105                 and self.get_param('cookiesfrombrowser') is None):
 106             self.raise_login_required(
 107                 'Login details are needed to download this content', method='cookies')
 108         username, password = self._get_login_info()
 109         if username:
 110             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 111         return
 112
 113         # Everything below this is broken!
 114         r'''
 115         # No authentication to be performed
 116         if username is None:
 117             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 118                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 119             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 120             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 121             return True
 122
 123         login_page = self._download_webpage(
 124             self._LOGIN_URL, None,
 125             note='Downloading login page',
 126             errnote='unable to fetch login page', fatal=False)
 127         if login_page is False:
 128             return
 129
 130         login_form = self._hidden_inputs(login_page)
 131
 132         def req(url, f_req, note, errnote):
 133             data = login_form.copy()
 134             data.update({
 135                 'pstMsg': 1,
 136                 'checkConnection': 'youtube',
 137                 'checkedDomains': 'youtube',
 138                 'hl': 'en',
 139                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 140                 'f.req': json.dumps(f_req),
 141                 'flowName': 'GlifWebSignIn',
 142                 'flowEntry': 'ServiceLogin',
 143                 # TODO: reverse actual botguard identifier generation algo
 144                 'bgRequest': '["identifier",""]',
 145             })
 146             return self._download_json(
 147                 url, None, note=note, errnote=errnote,
 148                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 149                 fatal=False,
 150                 data=urlencode_postdata(data), headers={
 151                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 152                     'Google-Accounts-XSRF': 1,
 153                 })
 154
 155         lookup_req = [
 156             username,
 157             None, [], None, 'US', None, None, 2, False, True,
 158             [
 159                 None, None,
 160                 [2, 1, None, 1,
 161                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 162                  None, [], 4],
 163                 1, [None, None, []], None, None, None, True
 164             ],
 165             username,
 166         ]
 167
 168         lookup_results = req(
 169             self._LOOKUP_URL, lookup_req,
 170             'Looking up account info', 'Unable to look up account info')
 171
 172         if lookup_results is False:
 173             return False
 174
 175         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 176         if not user_hash:
 177             warn('Unable to extract user hash')
 178             return False
 179
 180         challenge_req = [
 181             user_hash,
 182             None, 1, None, [1, None, None, None, [password, None, True]],
 183             [
 184                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 185                 1, [None, None, []], None, None, None, True
 186             ]]
 187
 188         challenge_results = req(
 189             self._CHALLENGE_URL, challenge_req,
 190             'Logging in', 'Unable to log in')
 191
 192         if challenge_results is False:
 193             return
 194
 195         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 196         if login_res:
 197             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 198             warn(
 199                 'Unable to login: %s' % 'Invalid password'
 200                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 201             return False
 202
 203         res = try_get(challenge_results, lambda x: x[0][-1], list)
 204         if not res:
 205             warn('Unable to extract result entry')
 206             return False
 207
 208         login_challenge = try_get(res, lambda x: x[0][0], list)
 209         if login_challenge:
 210             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 211             if challenge_str == 'TWO_STEP_VERIFICATION':
 212                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 213                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 214                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 215                 if status == 'QUOTA_EXCEEDED':
 216                     warn('Exceeded the limit of TFA codes, try later')
 217                     return False
 218
 219                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 220                 if not tl:
 221                     warn('Unable to extract TL')
 222                     return False
 223
 224                 tfa_code = self._get_tfa_info('2-step verification code')
 225
 226                 if not tfa_code:
 227                     warn(
 228                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 229                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 230                     return False
 231
 232                 tfa_code = remove_start(tfa_code, 'G-')
 233
 234                 tfa_req = [
 235                     user_hash, None, 2, None,
 236                     [
 237                         9, None, None, None, None, None, None, None,
 238                         [None, tfa_code, True, 2]
 239                     ]]
 240
 241                 tfa_results = req(
 242                     self._TFA_URL.format(tl), tfa_req,
 243                     'Submitting TFA code', 'Unable to submit TFA code')
 244
 245                 if tfa_results is False:
 246                     return False
 247
 248                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 249                 if tfa_res:
 250                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 251                     warn(
 252                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 253                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 254                     return False
 255
 256                 check_cookie_url = try_get(
 257                     tfa_results, lambda x: x[0][-1][2], compat_str)
 258             else:
 259                 CHALLENGES = {
 260                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 261                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 262                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 263                 }
 264                 challenge = CHALLENGES.get(
 265                     challenge_str,
 266                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 267                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 268                 return False
 269         else:
 270             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 271
 272         if not check_cookie_url:
 273             warn('Unable to extract CheckCookie URL')
 274             return False
 275
 276         check_cookie_results = self._download_webpage(
 277             check_cookie_url, None, 'Checking cookie', fatal=False)
 278
 279         if check_cookie_results is False:
 280             return False
 281
 282         if 'https://myaccount.google.com/' not in check_cookie_results:
 283             warn('Unable to log in')
 284             return False
 285
 286         return True
 287         '''
 288
 289     def _initialize_consent(self):
 290         cookies = self._get_cookies('https://www.youtube.com/')
 291         if cookies.get('__Secure-3PSID'):
 292             return
 293         consent_id = None
 294         consent = cookies.get('CONSENT')
 295         if consent:
 296             if 'YES' in consent.value:
 297                 return
 298             consent_id = self._search_regex(
 299                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 300         if not consent_id:
 301             consent_id = random.randint(100, 999)
 302         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 303
 304     def _real_initialize(self):
 305         self._initialize_consent()
 306         if self._downloader is None:
 307             return
 308         if not self._login():
 309             return
 310
 311     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 312     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 313     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 314
 315     _YT_DEFAULT_YTCFGS = {
 316         'WEB': {
 317             'INNERTUBE_API_VERSION': 'v1',
 318             'INNERTUBE_CLIENT_NAME': 'WEB',
 319             'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
 320             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 321             'INNERTUBE_CONTEXT': {
 322                 'client': {
 323                     'clientName': 'WEB',
 324                     'clientVersion': '2.20210622.10.00',
 325                     'hl': 'en',
 326                 }
 327             },
 328             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
 329         },
 330         'WEB_REMIX': {
 331             'INNERTUBE_API_VERSION': 'v1',
 332             'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
 333             'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
 334             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 335             'INNERTUBE_CONTEXT': {
 336                 'client': {
 337                     'clientName': 'WEB_REMIX',
 338                     'clientVersion': '1.20210621.00.00',
 339                     'hl': 'en',
 340                 }
 341             },
 342             'INNERTUBE_CONTEXT_CLIENT_NAME': 67
 343         },
 344         'WEB_EMBEDDED_PLAYER': {
 345             'INNERTUBE_API_VERSION': 'v1',
 346             'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
 347             'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
 348             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 349             'INNERTUBE_CONTEXT': {
 350                 'client': {
 351                     'clientName': 'WEB_EMBEDDED_PLAYER',
 352                     'clientVersion': '1.20210620.0.1',
 353                     'hl': 'en',
 354                 }
 355             },
 356             'INNERTUBE_CONTEXT_CLIENT_NAME': 56
 357         },
 358         'ANDROID': {
 359             'INNERTUBE_API_VERSION': 'v1',
 360             'INNERTUBE_CLIENT_NAME': 'ANDROID',
 361             'INNERTUBE_CLIENT_VERSION': '16.20',
 362             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 363             'INNERTUBE_CONTEXT': {
 364                 'client': {
 365                     'clientName': 'ANDROID',
 366                     'clientVersion': '16.20',
 367                     'hl': 'en',
 368                 }
 369             },
 370             'INNERTUBE_CONTEXT_CLIENT_NAME': 3
 371         },
 372         'ANDROID_EMBEDDED_PLAYER': {
 373             'INNERTUBE_API_VERSION': 'v1',
 374             'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
 375             'INNERTUBE_CLIENT_VERSION': '16.20',
 376             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 377             'INNERTUBE_CONTEXT': {
 378                 'client': {
 379                     'clientName': 'ANDROID_EMBEDDED_PLAYER',
 380                     'clientVersion': '16.20',
 381                     'hl': 'en',
 382                 }
 383             },
 384             'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 385         },
 386         'ANDROID_MUSIC': {
 387             'INNERTUBE_API_VERSION': 'v1',
 388             'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
 389             'INNERTUBE_CLIENT_VERSION': '4.32',
 390             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 391             'INNERTUBE_CONTEXT': {
 392                 'client': {
 393                     'clientName': 'ANDROID_MUSIC',
 394                     'clientVersion': '4.32',
 395                     'hl': 'en',
 396                 }
 397             },
 398             'INNERTUBE_CONTEXT_CLIENT_NAME': 21
 399         },
 400         'IOS': {
 401             'INNERTUBE_API_VERSION': 'v1',
 402             'INNERTUBE_CLIENT_NAME': 'IOS',
 403             'INNERTUBE_CLIENT_VERSION': '16.20',
 404             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 405             'INNERTUBE_CONTEXT': {
 406                 'client': {
 407                     'clientName': 'IOS',
 408                     'clientVersion': '16.20',
 409                     'hl': 'en',
 410                 }
 411             },
 412             'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 413
 414         },
 415         'IOS_MUSIC': {
 416             'INNERTUBE_API_VERSION': 'v1',
 417             'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
 418             'INNERTUBE_CLIENT_VERSION': '4.32',
 419             'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 420             'INNERTUBE_CONTEXT': {
 421                 'client': {
 422                     'clientName': 'IOS_MUSIC',
 423                     'clientVersion': '4.32',
 424                     'hl': 'en',
 425                 }
 426             },
 427             'INNERTUBE_CONTEXT_CLIENT_NAME': 26
 428         },
 429         'IOS_MESSAGES_EXTENSION': {
 430             'INNERTUBE_API_VERSION': 'v1',
 431             'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
 432             'INNERTUBE_CLIENT_VERSION': '16.20',
 433             'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 434             'INNERTUBE_CONTEXT': {
 435                 'client': {
 436                     'clientName': 'IOS_MESSAGES_EXTENSION',
 437                     'clientVersion': '16.20',
 438                     'hl': 'en',
 439                 }
 440             },
 441             'INNERTUBE_CONTEXT_CLIENT_NAME': 66
 442         }
 443     }
 444
 445     _YT_DEFAULT_INNERTUBE_HOSTS = {
 446         'DIRECT': 'youtubei.googleapis.com',
 447         'WEB': 'www.youtube.com',
 448         'WEB_REMIX': 'music.youtube.com',
 449         'ANDROID_MUSIC': 'music.youtube.com'
 450     }
 451
 452     # clients starting with _ cannot be explicity requested by the user
 453     _YT_CLIENTS = {
 454         'web': 'WEB',
 455         'web_music': 'WEB_REMIX',
 456         '_web_embedded': 'WEB_EMBEDDED_PLAYER',
 457         '_web_agegate': 'TVHTML5',
 458         'android': 'ANDROID',
 459         'android_music': 'ANDROID_MUSIC',
 460         '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
 461         '_android_agegate': 'ANDROID',
 462         'ios': 'IOS',
 463         'ios_music': 'IOS_MUSIC',
 464         '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
 465         '_ios_agegate': 'IOS'
 466     }
 467
 468     def _get_default_ytcfg(self, client='WEB'):
 469         if client in self._YT_DEFAULT_YTCFGS:
 470             return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
 471         self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
 472         return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
 473
 474     def _get_innertube_host(self, client='WEB'):
 475         return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
 476
 477     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
 478         # try_get but with fallback to default ytcfg client values when present
 479         _func = lambda y: try_get(y, getter, expected_type)
 480         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 481
 482     def _extract_client_name(self, ytcfg, default_client='WEB'):
 483         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
 484
 485     @staticmethod
 486     def _extract_session_index(*data):
 487         for ytcfg in data:
 488             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 489             if session_index is not None:
 490                 return session_index
 491
 492     def _extract_client_version(self, ytcfg, default_client='WEB'):
 493         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
 494
 495     def _extract_api_key(self, ytcfg=None, default_client='WEB'):
 496         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 497
 498     def _extract_context(self, ytcfg=None, default_client='WEB'):
 499         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 500         context = _get_context(ytcfg)
 501         if context:
 502             return context
 503
 504         context = _get_context(self._get_default_ytcfg(default_client))
 505         if not ytcfg:
 506             return context
 507
 508         # Recreate the client context (required)
 509         context['client'].update({
 510             'clientVersion': self._extract_client_version(ytcfg, default_client),
 511             'clientName': self._extract_client_name(ytcfg, default_client),
 512         })
 513         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 514         if visitor_data:
 515             context['client']['visitorData'] = visitor_data
 516         return context
 517
 518     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 519         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 520         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 521         yt_cookies = self._get_cookies('https://www.youtube.com')
 522         sapisid_cookie = dict_get(
 523             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 524         if sapisid_cookie is None or not sapisid_cookie.value:
 525             return
 526         time_now = round(time.time())
 527         # SAPISID cookie is required if not already present
 528         if not yt_cookies.get('SAPISID'):
 529             self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
 530             self._set_cookie(
 531                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 532         self.write_debug('Extracted SAPISID cookie', only_once=True)
 533         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 534         sapisidhash = hashlib.sha1(
 535             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 536         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 537
 538     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 539                   note='Downloading API JSON', errnote='Unable to download API page',
 540                   context=None, api_key=None, api_hostname=None, default_client='WEB'):
 541
 542         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 543         data.update(query)
 544         real_headers = self.generate_api_headers(default_client=default_client)
 545         real_headers.update({'content-type': 'application/json'})
 546         if headers:
 547             real_headers.update(headers)
 548         return self._download_json(
 549             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 550             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 551             data=json.dumps(data).encode('utf8'), headers=real_headers,
 552             query={'key': api_key or self._extract_api_key()})
 553
 554     def extract_yt_initial_data(self, video_id, webpage):
 555         return self._parse_json(
 556             self._search_regex(
 557                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 558                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 559             video_id)
 560
 561     def _extract_identity_token(self, webpage, item_id):
 562         if not webpage:
 563             return None
 564         ytcfg = self.extract_ytcfg(item_id, webpage)
 565         if ytcfg:
 566             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 567             if token:
 568                 return token
 569         return self._search_regex(
 570             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 571             'identity token', default=None)
 572
 573     @staticmethod
 574     def _extract_account_syncid(*args):
 575         """
 576         Extract syncId required to download private playlists of secondary channels
 577         @params response and/or ytcfg
 578         """
 579         for data in args:
 580             # ytcfg includes channel_syncid if on secondary channel
 581             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 582             if delegated_sid:
 583                 return delegated_sid
 584             sync_ids = (try_get(
 585                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 586                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 587             if len(sync_ids) >= 2 and sync_ids[1]:
 588                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 589                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 590                 return sync_ids[0]
 591
 592     def extract_ytcfg(self, video_id, webpage):
 593         if not webpage:
 594             return {}
 595         return self._parse_json(
 596             self._search_regex(
 597                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 598                 default='{}'), video_id, fatal=False) or {}
 599
 600     def generate_api_headers(
 601             self, ytcfg=None, identity_token=None, account_syncid=None,
 602             visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
 603         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 604         headers = {
 605             'X-YouTube-Client-Name': compat_str(
 606                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 607             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 608             'Origin': origin
 609         }
 610         if not visitor_data and ytcfg:
 611             visitor_data = try_get(
 612                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 613         if identity_token:
 614             headers['X-Youtube-Identity-Token'] = identity_token
 615         if account_syncid:
 616             headers['X-Goog-PageId'] = account_syncid
 617         if session_index is None and ytcfg:
 618             session_index = self._extract_session_index(ytcfg)
 619         if account_syncid or session_index is not None:
 620             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 621         if visitor_data:
 622             headers['X-Goog-Visitor-Id'] = visitor_data
 623         auth = self._generate_sapisidhash_header(origin)
 624         if auth is not None:
 625             headers['Authorization'] = auth
 626             headers['X-Origin'] = origin
 627         return headers
 628
 629     @staticmethod
 630     def _build_api_continuation_query(continuation, ctp=None):
 631         query = {
 632             'continuation': continuation
 633         }
 634         # TODO: Inconsistency with clickTrackingParams.
 635         # Currently we have a fixed ctp contained within context (from ytcfg)
 636         # and a ctp in root query for continuation.
 637         if ctp:
 638             query['clickTracking'] = {'clickTrackingParams': ctp}
 639         return query
 640
 641     @classmethod
 642     def _extract_next_continuation_data(cls, renderer):
 643         next_continuation = try_get(
 644             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 645                        lambda x: x['continuation']['reloadContinuationData']), dict)
 646         if not next_continuation:
 647             return
 648         continuation = next_continuation.get('continuation')
 649         if not continuation:
 650             return
 651         ctp = next_continuation.get('clickTrackingParams')
 652         return cls._build_api_continuation_query(continuation, ctp)
 653
 654     @classmethod
 655     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 656         if isinstance(continuation_ep, dict):
 657             continuation = try_get(
 658                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 659             if not continuation:
 660                 return
 661             ctp = continuation_ep.get('clickTrackingParams')
 662             return cls._build_api_continuation_query(continuation, ctp)
 663
 664     @classmethod
 665     def _extract_continuation(cls, renderer):
 666         next_continuation = cls._extract_next_continuation_data(renderer)
 667         if next_continuation:
 668             return next_continuation
 669
 670         contents = []
 671         for key in ('contents', 'items'):
 672             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 673
 674         for content in contents:
 675             if not isinstance(content, dict):
 676                 continue
 677             continuation_ep = try_get(
 678                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 679                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 680                 dict)
 681             continuation = cls._extract_continuation_ep_data(continuation_ep)
 682             if continuation:
 683                 return continuation
 684
 685     @classmethod
 686     def _extract_alerts(cls, data):
 687         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 688             if not isinstance(alert_dict, dict):
 689                 continue
 690             for alert in alert_dict.values():
 691                 alert_type = alert.get('type')
 692                 if not alert_type:
 693                     continue
 694                 message = cls._get_text(alert, 'text')
 695                 if message:
 696                     yield alert_type, message
 697
 698     def _report_alerts(self, alerts, expected=True):
 699         errors = []
 700         warnings = []
 701         for alert_type, alert_message in alerts:
 702             if alert_type.lower() == 'error':
 703                 errors.append([alert_type, alert_message])
 704             else:
 705                 warnings.append([alert_type, alert_message])
 706
 707         for alert_type, alert_message in (warnings + errors[:-1]):
 708             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 709         if errors:
 710             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 711
 712     def _extract_and_report_alerts(self, data, *args, **kwargs):
 713         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 714
 715     def _extract_badges(self, renderer: dict):
 716         badges = set()
 717         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 718             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 719             if label:
 720                 badges.add(label.lower())
 721         return badges
 722
 723     @staticmethod
 724     def _get_text(data, *path_list, max_runs=None):
 725         for path in path_list or [None]:
 726             if path is None:
 727                 obj = [data]
 728             else:
 729                 obj = traverse_obj(data, path, default=[])
 730                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 731                     obj = [obj]
 732             for item in obj:
 733                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 734                 if text:
 735                     return text
 736                 runs = try_get(item, lambda x: x['runs'], list) or []
 737                 if not runs and isinstance(item, list):
 738                     runs = item
 739
 740                 runs = runs[:min(len(runs), max_runs or len(runs))]
 741                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 742                 if text:
 743                     return text
 744
 745     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 746                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 747                           default_client='WEB'):
 748         response = None
 749         last_error = None
 750         count = -1
 751         retries = self.get_param('extractor_retries', 3)
 752         if check_get_keys is None:
 753             check_get_keys = []
 754         while count < retries:
 755             count += 1
 756             if last_error:
 757                 self.report_warning('%s. Retrying ...' % last_error)
 758             try:
 759                 response = self._call_api(
 760                     ep=ep, fatal=True, headers=headers,
 761                     video_id=item_id, query=query,
 762                     context=self._extract_context(ytcfg, default_client),
 763                     api_key=self._extract_api_key(ytcfg, default_client),
 764                     api_hostname=api_hostname, default_client=default_client,
 765                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 766             except ExtractorError as e:
 767                 if isinstance(e.cause, network_exceptions):
 768                     # Downloading page may result in intermittent 5xx HTTP error
 769                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 770                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 771                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 772                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 773                         last_error = error_to_compat_str(e.cause or e)
 774                         if count < retries:
 775                             continue
 776                 if fatal:
 777                     raise
 778                 else:
 779                     self.report_warning(error_to_compat_str(e))
 780                     return
 781
 782             else:
 783                 # Youtube may send alerts if there was an issue with the continuation page
 784                 try:
 785                     self._extract_and_report_alerts(response, expected=False)
 786                 except ExtractorError as e:
 787                     if fatal:
 788                         raise
 789                     self.report_warning(error_to_compat_str(e))
 790                     return
 791                 if not check_get_keys or dict_get(response, check_get_keys):
 792                     break
 793                 # Youtube sometimes sends incomplete data
 794                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 795                 last_error = 'Incomplete data received'
 796                 if count >= retries:
 797                     if fatal:
 798                         raise ExtractorError(last_error)
 799                     else:
 800                         self.report_warning(last_error)
 801                         return
 802         return response
 803
 804     @staticmethod
 805     def is_music_url(url):
 806         return re.match(r'https?://music\.youtube\.com/', url) is not None
 807
 808     def _extract_video(self, renderer):
 809         video_id = renderer.get('videoId')
 810         title = self._get_text(renderer, 'title')
 811         description = self._get_text(renderer, 'descriptionSnippet')
 812         duration = parse_duration(self._get_text(renderer, 'lengthText'))
 813         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 814         view_count = str_to_int(self._search_regex(
 815             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 816             'view count', default=None))
 817
 818         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 819
 820         return {
 821             '_type': 'url',
 822             'ie_key': YoutubeIE.ie_key(),
 823             'id': video_id,
 824             'url': video_id,
 825             'title': title,
 826             'description': description,
 827             'duration': duration,
 828             'view_count': view_count,
 829             'uploader': uploader,
 830         }
 831
 832
 833 class YoutubeIE(YoutubeBaseInfoExtractor):
 834     IE_DESC = 'YouTube.com'
 835     _INVIDIOUS_SITES = (
 836         # invidious-redirect websites
 837         r'(?:www\.)?redirect\.invidious\.io',
 838         r'(?:(?:www|dev)\.)?invidio\.us',
 839         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 840         r'(?:www\.)?invidious\.pussthecat\.org',
 841         r'(?:www\.)?invidious\.zee\.li',
 842         r'(?:www\.)?invidious\.ethibox\.fr',
 843         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 844         # youtube-dl invidious instances list
 845         r'(?:(?:www|no)\.)?invidiou\.sh',
 846         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 847         r'(?:www\.)?invidious\.kabi\.tk',
 848         r'(?:www\.)?invidious\.mastodon\.host',
 849         r'(?:www\.)?invidious\.zapashcanon\.fr',
 850         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 851         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 852         r'(?:www\.)?invidious\.himiko\.cloud',
 853         r'(?:www\.)?invidious\.reallyancient\.tech',
 854         r'(?:www\.)?invidious\.tube',
 855         r'(?:www\.)?invidiou\.site',
 856         r'(?:www\.)?invidious\.site',
 857         r'(?:www\.)?invidious\.xyz',
 858         r'(?:www\.)?invidious\.nixnet\.xyz',
 859         r'(?:www\.)?invidious\.048596\.xyz',
 860         r'(?:www\.)?invidious\.drycat\.fr',
 861         r'(?:www\.)?inv\.skyn3t\.in',
 862         r'(?:www\.)?tube\.poal\.co',
 863         r'(?:www\.)?tube\.connect\.cafe',
 864         r'(?:www\.)?vid\.wxzm\.sx',
 865         r'(?:www\.)?vid\.mint\.lgbt',
 866         r'(?:www\.)?vid\.puffyan\.us',
 867         r'(?:www\.)?yewtu\.be',
 868         r'(?:www\.)?yt\.elukerio\.org',
 869         r'(?:www\.)?yt\.lelux\.fi',
 870         r'(?:www\.)?invidious\.ggc-project\.de',
 871         r'(?:www\.)?yt\.maisputain\.ovh',
 872         r'(?:www\.)?ytprivate\.com',
 873         r'(?:www\.)?invidious\.13ad\.de',
 874         r'(?:www\.)?invidious\.toot\.koeln',
 875         r'(?:www\.)?invidious\.fdn\.fr',
 876         r'(?:www\.)?watch\.nettohikari\.com',
 877         r'(?:www\.)?invidious\.namazso\.eu',
 878         r'(?:www\.)?invidious\.silkky\.cloud',
 879         r'(?:www\.)?invidious\.exonip\.de',
 880         r'(?:www\.)?invidious\.riverside\.rocks',
 881         r'(?:www\.)?invidious\.blamefran\.net',
 882         r'(?:www\.)?invidious\.moomoo\.de',
 883         r'(?:www\.)?ytb\.trom\.tf',
 884         r'(?:www\.)?yt\.cyberhost\.uk',
 885         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 886         r'(?:www\.)?qklhadlycap4cnod\.onion',
 887         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 888         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 889         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 890         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 891         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 892         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 893         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 894         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 895         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 896         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 897     )
 898     _VALID_URL = r"""(?x)^
 899                      (
 900                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 901                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 902                             (?:www\.)?deturl\.com/www\.youtube\.com|
 903                             (?:www\.)?pwnyoutube\.com|
 904                             (?:www\.)?hooktube\.com|
 905                             (?:www\.)?yourepeat\.com|
 906                             tube\.majestyc\.net|
 907                             %(invidious)s|
 908                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 909                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 910                          (?:                                                  # the various things that can precede the ID:
 911                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 912                              |(?:                                             # or the v= param in all its forms
 913                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 914                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 915                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 916                                  v=
 917                              )
 918                          ))
 919                          |(?:
 920                             youtu\.be|                                        # just youtu.be/xxxx
 921                             vid\.plus|                                        # or vid.plus/xxxx
 922                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 923                             %(invidious)s
 924                          )/
 925                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 926                          )
 927                      )?                                                       # all until now is optional -> you can pass the naked ID
 928                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 929                      (?(1).+)?                                                # if we found the ID, everything can follow
 930                      (?:\#|$)""" % {
 931         'invidious': '|'.join(_INVIDIOUS_SITES),
 932     }
 933     _PLAYER_INFO_RE = (
 934         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 935         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 936         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 937     )
 938     _formats = {
 939         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 940         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 941         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 942         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 943         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 944         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 945         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 946         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 947         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 948         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 949         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 950         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 951         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 952         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 953         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 954         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 955         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 956         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 957
 958
 959         # 3D videos
 960         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 961         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 962         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 963         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 964         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 965         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 966         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 967
 968         # Apple HTTP Live Streaming
 969         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 970         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 971         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 972         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 973         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 974         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 975         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 976         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 977
 978         # DASH mp4 video
 979         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 980         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 981         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 982         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 983         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 984         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 985         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 986         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 987         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 988         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 989         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 990         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 991
 992         # Dash mp4 audio
 993         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 994         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 995         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 996         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 997         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 998         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 999         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1000
1001         # Dash webm
1002         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1003         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1004         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1005         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1006         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1007         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1008         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1009         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1013         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1015         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1017         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1018         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1019         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1020         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1021         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1022         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1023         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1024
1025         # Dash webm audio
1026         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1027         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1028
1029         # Dash webm audio with opus inside
1030         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1031         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1032         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1033
1034         # RTMP (unnamed)
1035         '_rtmp': {'protocol': 'rtmp'},
1036
1037         # av01 video only formats sometimes served with "unknown" codecs
1038         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1039         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1040         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1041         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1042     }
1043     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1044
1045     _AGE_GATE_REASONS = (
1046         'Sign in to confirm your age',
1047         'This video may be inappropriate for some users.',
1048         'Sorry, this content is age-restricted.')
1049
1050     _GEO_BYPASS = False
1051
1052     IE_NAME = 'youtube'
1053     _TESTS = [
1054         {
1055             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1056             'info_dict': {
1057                 'id': 'BaW_jenozKc',
1058                 'ext': 'mp4',
1059                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1060                 'uploader': 'Philipp Hagemeister',
1061                 'uploader_id': 'phihag',
1062                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1063                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1064                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1065                 'upload_date': '20121002',
1066                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1067                 'categories': ['Science & Technology'],
1068                 'tags': ['youtube-dl'],
1069                 'duration': 10,
1070                 'view_count': int,
1071                 'like_count': int,
1072                 'dislike_count': int,
1073                 'start_time': 1,
1074                 'end_time': 9,
1075             }
1076         },
1077         {
1078             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1079             'note': 'Embed-only video (#1746)',
1080             'info_dict': {
1081                 'id': 'yZIXLfi8CZQ',
1082                 'ext': 'mp4',
1083                 'upload_date': '20120608',
1084                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1085                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1086                 'uploader': 'SET India',
1087                 'uploader_id': 'setindia',
1088                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1089                 'age_limit': 18,
1090             },
1091             'skip': 'Private video',
1092         },
1093         {
1094             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1095             'note': 'Use the first video ID in the URL',
1096             'info_dict': {
1097                 'id': 'BaW_jenozKc',
1098                 'ext': 'mp4',
1099                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1100                 'uploader': 'Philipp Hagemeister',
1101                 'uploader_id': 'phihag',
1102                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1103                 'upload_date': '20121002',
1104                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1105                 'categories': ['Science & Technology'],
1106                 'tags': ['youtube-dl'],
1107                 'duration': 10,
1108                 'view_count': int,
1109                 'like_count': int,
1110                 'dislike_count': int,
1111             },
1112             'params': {
1113                 'skip_download': True,
1114             },
1115         },
1116         {
1117             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1118             'note': '256k DASH audio (format 141) via DASH manifest',
1119             'info_dict': {
1120                 'id': 'a9LDPn-MO4I',
1121                 'ext': 'm4a',
1122                 'upload_date': '20121002',
1123                 'uploader_id': '8KVIDEO',
1124                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1125                 'description': '',
1126                 'uploader': '8KVIDEO',
1127                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1128             },
1129             'params': {
1130                 'youtube_include_dash_manifest': True,
1131                 'format': '141',
1132             },
1133             'skip': 'format 141 not served anymore',
1134         },
1135         # DASH manifest with encrypted signature
1136         {
1137             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1138             'info_dict': {
1139                 'id': 'IB3lcPjvWLA',
1140                 'ext': 'm4a',
1141                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1142                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1143                 'duration': 244,
1144                 'uploader': 'AfrojackVEVO',
1145                 'uploader_id': 'AfrojackVEVO',
1146                 'upload_date': '20131011',
1147                 'abr': 129.495,
1148             },
1149             'params': {
1150                 'youtube_include_dash_manifest': True,
1151                 'format': '141/bestaudio[ext=m4a]',
1152             },
1153         },
1154         # Normal age-gate video (embed allowed)
1155         {
1156             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1157             'info_dict': {
1158                 'id': 'HtVdAasjOgU',
1159                 'ext': 'mp4',
1160                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1161                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1162                 'duration': 142,
1163                 'uploader': 'The Witcher',
1164                 'uploader_id': 'WitcherGame',
1165                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1166                 'upload_date': '20140605',
1167                 'age_limit': 18,
1168             },
1169         },
1170         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1171         # YouTube Red ad is not captured for creator
1172         {
1173             'url': '__2ABJjxzNo',
1174             'info_dict': {
1175                 'id': '__2ABJjxzNo',
1176                 'ext': 'mp4',
1177                 'duration': 266,
1178                 'upload_date': '20100430',
1179                 'uploader_id': 'deadmau5',
1180                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1181                 'creator': 'deadmau5',
1182                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1183                 'uploader': 'deadmau5',
1184                 'title': 'Deadmau5 - Some Chords (HD)',
1185                 'alt_title': 'Some Chords',
1186             },
1187             'expected_warnings': [
1188                 'DASH manifest missing',
1189             ]
1190         },
1191         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1192         {
1193             'url': 'lqQg6PlCWgI',
1194             'info_dict': {
1195                 'id': 'lqQg6PlCWgI',
1196                 'ext': 'mp4',
1197                 'duration': 6085,
1198                 'upload_date': '20150827',
1199                 'uploader_id': 'olympic',
1200                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1201                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1202                 'uploader': 'Olympics',
1203                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1204             },
1205             'params': {
1206                 'skip_download': 'requires avconv',
1207             }
1208         },
1209         # Non-square pixels
1210         {
1211             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1212             'info_dict': {
1213                 'id': '_b-2C3KPAM0',
1214                 'ext': 'mp4',
1215                 'stretched_ratio': 16 / 9.,
1216                 'duration': 85,
1217                 'upload_date': '20110310',
1218                 'uploader_id': 'AllenMeow',
1219                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1220                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1221                 'uploader': '孫ᄋᄅ',
1222                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1223             },
1224         },
1225         # url_encoded_fmt_stream_map is empty string
1226         {
1227             'url': 'qEJwOuvDf7I',
1228             'info_dict': {
1229                 'id': 'qEJwOuvDf7I',
1230                 'ext': 'webm',
1231                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1232                 'description': '',
1233                 'upload_date': '20150404',
1234                 'uploader_id': 'spbelect',
1235                 'uploader': 'Наблюдатели Петербурга',
1236             },
1237             'params': {
1238                 'skip_download': 'requires avconv',
1239             },
1240             'skip': 'This live event has ended.',
1241         },
1242         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1243         {
1244             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1245             'info_dict': {
1246                 'id': 'FIl7x6_3R5Y',
1247                 'ext': 'webm',
1248                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1249                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1250                 'duration': 220,
1251                 'upload_date': '20150625',
1252                 'uploader_id': 'dorappi2000',
1253                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1254                 'uploader': 'dorappi2000',
1255                 'formats': 'mincount:31',
1256             },
1257             'skip': 'not actual anymore',
1258         },
1259         # DASH manifest with segment_list
1260         {
1261             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1262             'md5': '8ce563a1d667b599d21064e982ab9e31',
1263             'info_dict': {
1264                 'id': 'CsmdDsKjzN8',
1265                 'ext': 'mp4',
1266                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1267                 'uploader': 'Airtek',
1268                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1269                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1270                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1271             },
1272             'params': {
1273                 'youtube_include_dash_manifest': True,
1274                 'format': '135',  # bestvideo
1275             },
1276             'skip': 'This live event has ended.',
1277         },
1278         {
1279             # Multifeed videos (multiple cameras), URL is for Main Camera
1280             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1281             'info_dict': {
1282                 'id': 'jvGDaLqkpTg',
1283                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1284                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1285             },
1286             'playlist': [{
1287                 'info_dict': {
1288                     'id': 'jvGDaLqkpTg',
1289                     'ext': 'mp4',
1290                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1291                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1292                     'duration': 10643,
1293                     'upload_date': '20161111',
1294                     'uploader': 'Team PGP',
1295                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1296                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1297                 },
1298             }, {
1299                 'info_dict': {
1300                     'id': '3AKt1R1aDnw',
1301                     'ext': 'mp4',
1302                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1303                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1304                     'duration': 10991,
1305                     'upload_date': '20161111',
1306                     'uploader': 'Team PGP',
1307                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1308                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1309                 },
1310             }, {
1311                 'info_dict': {
1312                     'id': 'RtAMM00gpVc',
1313                     'ext': 'mp4',
1314                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1315                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1316                     'duration': 10995,
1317                     'upload_date': '20161111',
1318                     'uploader': 'Team PGP',
1319                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1320                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1321                 },
1322             }, {
1323                 'info_dict': {
1324                     'id': '6N2fdlP3C5U',
1325                     'ext': 'mp4',
1326                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1327                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1328                     'duration': 10990,
1329                     'upload_date': '20161111',
1330                     'uploader': 'Team PGP',
1331                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1332                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1333                 },
1334             }],
1335             'params': {
1336                 'skip_download': True,
1337             },
1338         },
1339         {
1340             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1341             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1342             'info_dict': {
1343                 'id': 'gVfLd0zydlo',
1344                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1345             },
1346             'playlist_count': 2,
1347             'skip': 'Not multifeed anymore',
1348         },
1349         {
1350             'url': 'https://vid.plus/FlRa-iH7PGw',
1351             'only_matching': True,
1352         },
1353         {
1354             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1355             'only_matching': True,
1356         },
1357         {
1358             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1359             # Also tests cut-off URL expansion in video description (see
1360             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1361             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1362             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1363             'info_dict': {
1364                 'id': 'lsguqyKfVQg',
1365                 'ext': 'mp4',
1366                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1367                 'alt_title': 'Dark Walk',
1368                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1369                 'duration': 133,
1370                 'upload_date': '20151119',
1371                 'uploader_id': 'IronSoulElf',
1372                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1373                 'uploader': 'IronSoulElf',
1374                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1375                 'track': 'Dark Walk',
1376                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1377                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1378             },
1379             'params': {
1380                 'skip_download': True,
1381             },
1382         },
1383         {
1384             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1385             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1386             'only_matching': True,
1387         },
1388         {
1389             # Video with yt:stretch=17:0
1390             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1391             'info_dict': {
1392                 'id': 'Q39EVAstoRM',
1393                 'ext': 'mp4',
1394                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1395                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1396                 'upload_date': '20151107',
1397                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1398                 'uploader': 'CH GAMER DROID',
1399             },
1400             'params': {
1401                 'skip_download': True,
1402             },
1403             'skip': 'This video does not exist.',
1404         },
1405         {
1406             # Video with incomplete 'yt:stretch=16:'
1407             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1408             'only_matching': True,
1409         },
1410         {
1411             # Video licensed under Creative Commons
1412             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1413             'info_dict': {
1414                 'id': 'M4gD1WSo5mA',
1415                 'ext': 'mp4',
1416                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1417                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1418                 'duration': 721,
1419                 'upload_date': '20150127',
1420                 'uploader_id': 'BerkmanCenter',
1421                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1422                 'uploader': 'The Berkman Klein Center for Internet & Society',
1423                 'license': 'Creative Commons Attribution license (reuse allowed)',
1424             },
1425             'params': {
1426                 'skip_download': True,
1427             },
1428         },
1429         {
1430             # Channel-like uploader_url
1431             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1432             'info_dict': {
1433                 'id': 'eQcmzGIKrzg',
1434                 'ext': 'mp4',
1435                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1436                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1437                 'duration': 4060,
1438                 'upload_date': '20151119',
1439                 'uploader': 'Bernie Sanders',
1440                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1441                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1442                 'license': 'Creative Commons Attribution license (reuse allowed)',
1443             },
1444             'params': {
1445                 'skip_download': True,
1446             },
1447         },
1448         {
1449             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1450             'only_matching': True,
1451         },
1452         {
1453             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1454             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1455             'only_matching': True,
1456         },
1457         {
1458             # Rental video preview
1459             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1460             'info_dict': {
1461                 'id': 'uGpuVWrhIzE',
1462                 'ext': 'mp4',
1463                 'title': 'Piku - Trailer',
1464                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1465                 'upload_date': '20150811',
1466                 'uploader': 'FlixMatrix',
1467                 'uploader_id': 'FlixMatrixKaravan',
1468                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1469                 'license': 'Standard YouTube License',
1470             },
1471             'params': {
1472                 'skip_download': True,
1473             },
1474             'skip': 'This video is not available.',
1475         },
1476         {
1477             # YouTube Red video with episode data
1478             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1479             'info_dict': {
1480                 'id': 'iqKdEhx-dD4',
1481                 'ext': 'mp4',
1482                 'title': 'Isolation - Mind Field (Ep 1)',
1483                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1484                 'duration': 2085,
1485                 'upload_date': '20170118',
1486                 'uploader': 'Vsauce',
1487                 'uploader_id': 'Vsauce',
1488                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1489                 'series': 'Mind Field',
1490                 'season_number': 1,
1491                 'episode_number': 1,
1492             },
1493             'params': {
1494                 'skip_download': True,
1495             },
1496             'expected_warnings': [
1497                 'Skipping DASH manifest',
1498             ],
1499         },
1500         {
1501             # The following content has been identified by the YouTube community
1502             # as inappropriate or offensive to some audiences.
1503             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1504             'info_dict': {
1505                 'id': '6SJNVb0GnPI',
1506                 'ext': 'mp4',
1507                 'title': 'Race Differences in Intelligence',
1508                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1509                 'duration': 965,
1510                 'upload_date': '20140124',
1511                 'uploader': 'New Century Foundation',
1512                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1513                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1514             },
1515             'params': {
1516                 'skip_download': True,
1517             },
1518             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1519         },
1520         {
1521             # itag 212
1522             'url': '1t24XAntNCY',
1523             'only_matching': True,
1524         },
1525         {
1526             # geo restricted to JP
1527             'url': 'sJL6WA-aGkQ',
1528             'only_matching': True,
1529         },
1530         {
1531             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1532             'only_matching': True,
1533         },
1534         {
1535             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1536             'only_matching': True,
1537         },
1538         {
1539             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1540             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1541             'only_matching': True,
1542         },
1543         {
1544             # DRM protected
1545             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1546             'only_matching': True,
1547         },
1548         {
1549             # Video with unsupported adaptive stream type formats
1550             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1551             'info_dict': {
1552                 'id': 'Z4Vy8R84T1U',
1553                 'ext': 'mp4',
1554                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1555                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1556                 'duration': 433,
1557                 'upload_date': '20130923',
1558                 'uploader': 'Amelia Putri Harwita',
1559                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1560                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1561                 'formats': 'maxcount:10',
1562             },
1563             'params': {
1564                 'skip_download': True,
1565                 'youtube_include_dash_manifest': False,
1566             },
1567             'skip': 'not actual anymore',
1568         },
1569         {
1570             # Youtube Music Auto-generated description
1571             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1572             'info_dict': {
1573                 'id': 'MgNrAu2pzNs',
1574                 'ext': 'mp4',
1575                 'title': 'Voyeur Girl',
1576                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1577                 'upload_date': '20190312',
1578                 'uploader': 'Stephen - Topic',
1579                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1580                 'artist': 'Stephen',
1581                 'track': 'Voyeur Girl',
1582                 'album': 'it\'s too much love to know my dear',
1583                 'release_date': '20190313',
1584                 'release_year': 2019,
1585             },
1586             'params': {
1587                 'skip_download': True,
1588             },
1589         },
1590         {
1591             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1592             'only_matching': True,
1593         },
1594         {
1595             # invalid -> valid video id redirection
1596             'url': 'DJztXj2GPfl',
1597             'info_dict': {
1598                 'id': 'DJztXj2GPfk',
1599                 'ext': 'mp4',
1600                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1601                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1602                 'upload_date': '20090125',
1603                 'uploader': 'Prochorowka',
1604                 'uploader_id': 'Prochorowka',
1605                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1606                 'artist': 'Panjabi MC',
1607                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1608                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1609             },
1610             'params': {
1611                 'skip_download': True,
1612             },
1613             'skip': 'Video unavailable',
1614         },
1615         {
1616             # empty description results in an empty string
1617             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1618             'info_dict': {
1619                 'id': 'x41yOUIvK2k',
1620                 'ext': 'mp4',
1621                 'title': 'IMG 3456',
1622                 'description': '',
1623                 'upload_date': '20170613',
1624                 'uploader_id': 'ElevageOrVert',
1625                 'uploader': 'ElevageOrVert',
1626             },
1627             'params': {
1628                 'skip_download': True,
1629             },
1630         },
1631         {
1632             # with '};' inside yt initial data (see [1])
1633             # see [2] for an example with '};' inside ytInitialPlayerResponse
1634             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1635             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1636             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1637             'info_dict': {
1638                 'id': 'CHqg6qOn4no',
1639                 'ext': 'mp4',
1640                 'title': 'Part 77   Sort a list of simple types in c#',
1641                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1642                 'upload_date': '20130831',
1643                 'uploader_id': 'kudvenkat',
1644                 'uploader': 'kudvenkat',
1645             },
1646             'params': {
1647                 'skip_download': True,
1648             },
1649         },
1650         {
1651             # another example of '};' in ytInitialData
1652             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1653             'only_matching': True,
1654         },
1655         {
1656             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1657             'only_matching': True,
1658         },
1659         {
1660             # https://github.com/ytdl-org/youtube-dl/pull/28094
1661             'url': 'OtqTfy26tG0',
1662             'info_dict': {
1663                 'id': 'OtqTfy26tG0',
1664                 'ext': 'mp4',
1665                 'title': 'Burn Out',
1666                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1667                 'upload_date': '20141120',
1668                 'uploader': 'The Cinematic Orchestra - Topic',
1669                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1670                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1671                 'artist': 'The Cinematic Orchestra',
1672                 'track': 'Burn Out',
1673                 'album': 'Every Day',
1674                 'release_data': None,
1675                 'release_year': None,
1676             },
1677             'params': {
1678                 'skip_download': True,
1679             },
1680         },
1681         {
1682             # controversial video, only works with bpctr when authenticated with cookies
1683             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1684             'only_matching': True,
1685         },
1686         {
1687             # controversial video, requires bpctr/contentCheckOk
1688             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1689             'info_dict': {
1690                 'id': 'SZJvDhaSDnc',
1691                 'ext': 'mp4',
1692                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1693                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1694                 'uploader': 'CBS This Morning',
1695                 'uploader_id': 'CBSThisMorning',
1696                 'upload_date': '20140716',
1697                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1698             }
1699         },
1700         {
1701             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1702             'url': 'cBvYw8_A0vQ',
1703             'info_dict': {
1704                 'id': 'cBvYw8_A0vQ',
1705                 'ext': 'mp4',
1706                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1707                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1708                 'upload_date': '20201120',
1709                 'uploader': 'Walk around Japan',
1710                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1711                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1712             },
1713             'params': {
1714                 'skip_download': True,
1715             },
1716         }, {
1717             # Has multiple audio streams
1718             'url': 'WaOKSUlf4TM',
1719             'only_matching': True
1720         }, {
1721             # Requires Premium: has format 141 when requested using YTM url
1722             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1723             'only_matching': True
1724         }, {
1725             # multiple subtitles with same lang_code
1726             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1727             'only_matching': True,
1728         }, {
1729             # Force use android client fallback
1730             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1731             'info_dict': {
1732                 'id': 'YOelRv7fMxY',
1733                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1734                 'ext': '3gp',
1735                 'upload_date': '20210624',
1736                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1737                 'uploader': 'colinfurze',
1738                 'uploader_id': 'colinfurze',
1739                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1740                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1741             },
1742             'params': {
1743                 'format': '17',  # 3gp format available on android
1744                 'extractor_args': {'youtube': {'player_client': ['android']}},
1745             },
1746         },
1747         {
1748             # Skip download of additional client configs (remix client config in this case)
1749             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1750             'only_matching': True,
1751             'params': {
1752                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1753             },
1754         }
1755     ]
1756
1757     @classmethod
1758     def suitable(cls, url):
1759         # Hack for lazy extractors until more generic solution is implemented
1760         # (see #28780)
1761         from .youtube import parse_qs
1762         qs = parse_qs(url)
1763         if qs.get('list', [None])[0]:
1764             return False
1765         return super(YoutubeIE, cls).suitable(url)
1766
1767     def __init__(self, *args, **kwargs):
1768         super(YoutubeIE, self).__init__(*args, **kwargs)
1769         self._code_cache = {}
1770         self._player_cache = {}
1771
1772     def _extract_player_url(self, ytcfg=None, webpage=None):
1773         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1774         if not player_url and webpage:
1775             player_url = self._search_regex(
1776                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1777                 webpage, 'player URL', fatal=False)
1778         if not player_url:
1779             return None
1780         if player_url.startswith('//'):
1781             player_url = 'https:' + player_url
1782         elif not re.match(r'https?://', player_url):
1783             player_url = compat_urlparse.urljoin(
1784                 'https://www.youtube.com', player_url)
1785         return player_url
1786
1787     def _signature_cache_id(self, example_sig):
1788         """ Return a string representation of a signature """
1789         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1790
1791     @classmethod
1792     def _extract_player_info(cls, player_url):
1793         for player_re in cls._PLAYER_INFO_RE:
1794             id_m = re.search(player_re, player_url)
1795             if id_m:
1796                 break
1797         else:
1798             raise ExtractorError('Cannot identify player %r' % player_url)
1799         return id_m.group('id')
1800
1801     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1802         player_id = self._extract_player_info(player_url)
1803         if player_id not in self._code_cache:
1804             self._code_cache[player_id] = self._download_webpage(
1805                 player_url, video_id, fatal=fatal,
1806                 note='Downloading player ' + player_id,
1807                 errnote='Download of %s failed' % player_url)
1808         return player_id in self._code_cache
1809
1810     def _extract_signature_function(self, video_id, player_url, example_sig):
1811         player_id = self._extract_player_info(player_url)
1812
1813         # Read from filesystem cache
1814         func_id = 'js_%s_%s' % (
1815             player_id, self._signature_cache_id(example_sig))
1816         assert os.path.basename(func_id) == func_id
1817
1818         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1819         if cache_spec is not None:
1820             return lambda s: ''.join(s[i] for i in cache_spec)
1821
1822         if self._load_player(video_id, player_url):
1823             code = self._code_cache[player_id]
1824             res = self._parse_sig_js(code)
1825
1826             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1827             cache_res = res(test_string)
1828             cache_spec = [ord(c) for c in cache_res]
1829
1830             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1831             return res
1832
1833     def _print_sig_code(self, func, example_sig):
1834         def gen_sig_code(idxs):
1835             def _genslice(start, end, step):
1836                 starts = '' if start == 0 else str(start)
1837                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1838                 steps = '' if step == 1 else (':%d' % step)
1839                 return 's[%s%s%s]' % (starts, ends, steps)
1840
1841             step = None
1842             # Quelch pyflakes warnings - start will be set when step is set
1843             start = '(Never used)'
1844             for i, prev in zip(idxs[1:], idxs[:-1]):
1845                 if step is not None:
1846                     if i - prev == step:
1847                         continue
1848                     yield _genslice(start, prev, step)
1849                     step = None
1850                     continue
1851                 if i - prev in [-1, 1]:
1852                     step = i - prev
1853                     start = prev
1854                     continue
1855                 else:
1856                     yield 's[%d]' % prev
1857             if step is None:
1858                 yield 's[%d]' % i
1859             else:
1860                 yield _genslice(start, i, step)
1861
1862         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1863         cache_res = func(test_string)
1864         cache_spec = [ord(c) for c in cache_res]
1865         expr_code = ' + '.join(gen_sig_code(cache_spec))
1866         signature_id_tuple = '(%s)' % (
1867             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1868         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1869                 '    return %s\n') % (signature_id_tuple, expr_code)
1870         self.to_screen('Extracted signature function:\n' + code)
1871
1872     def _parse_sig_js(self, jscode):
1873         funcname = self._search_regex(
1874             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1875              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1876              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1877              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1878              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1879              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1880              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1881              # Obsolete patterns
1882              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1883              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1884              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1885              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1886              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1887              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1888              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1889              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1890             jscode, 'Initial JS player signature function name', group='sig')
1891
1892         jsi = JSInterpreter(jscode)
1893         initial_function = jsi.extract_function(funcname)
1894         return lambda s: initial_function([s])
1895
1896     def _decrypt_signature(self, s, video_id, player_url):
1897         """Turn the encrypted s field into a working signature"""
1898
1899         if player_url is None:
1900             raise ExtractorError('Cannot decrypt signature without player_url')
1901
1902         try:
1903             player_id = (player_url, self._signature_cache_id(s))
1904             if player_id not in self._player_cache:
1905                 func = self._extract_signature_function(
1906                     video_id, player_url, s
1907                 )
1908                 self._player_cache[player_id] = func
1909             func = self._player_cache[player_id]
1910             if self.get_param('youtube_print_sig_code'):
1911                 self._print_sig_code(func, s)
1912             return func(s)
1913         except Exception as e:
1914             tb = traceback.format_exc()
1915             raise ExtractorError(
1916                 'Signature extraction failed: ' + tb, cause=e)
1917
1918     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1919         """
1920         Extract signatureTimestamp (sts)
1921         Required to tell API what sig/player version is in use.
1922         """
1923         sts = None
1924         if isinstance(ytcfg, dict):
1925             sts = int_or_none(ytcfg.get('STS'))
1926
1927         if not sts:
1928             # Attempt to extract from player
1929             if player_url is None:
1930                 error_msg = 'Cannot extract signature timestamp without player_url.'
1931                 if fatal:
1932                     raise ExtractorError(error_msg)
1933                 self.report_warning(error_msg)
1934                 return
1935             if self._load_player(video_id, player_url, fatal=fatal):
1936                 player_id = self._extract_player_info(player_url)
1937                 code = self._code_cache[player_id]
1938                 sts = int_or_none(self._search_regex(
1939                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1940                     'JS player signature timestamp', group='sts', fatal=fatal))
1941         return sts
1942
1943     def _mark_watched(self, video_id, player_responses):
1944         playback_url = traverse_obj(
1945             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1946             expected_type=url_or_none, get_all=False)
1947         if not playback_url:
1948             self.report_warning('Unable to mark watched')
1949             return
1950         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1951         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1952
1953         # cpn generation algorithm is reverse engineered from base.js.
1954         # In fact it works even with dummy cpn.
1955         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1956         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1957
1958         qs.update({
1959             'ver': ['2'],
1960             'cpn': [cpn],
1961         })
1962         playback_url = compat_urlparse.urlunparse(
1963             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1964
1965         self._download_webpage(
1966             playback_url, video_id, 'Marking watched',
1967             'Unable to mark watched', fatal=False)
1968
1969     @staticmethod
1970     def _extract_urls(webpage):
1971         # Embedded YouTube player
1972         entries = [
1973             unescapeHTML(mobj.group('url'))
1974             for mobj in re.finditer(r'''(?x)
1975             (?:
1976                 <iframe[^>]+?src=|
1977                 data-video-url=|
1978                 <embed[^>]+?src=|
1979                 embedSWF\(?:\s*|
1980                 <object[^>]+data=|
1981                 new\s+SWFObject\(
1982             )
1983             (["\'])
1984                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1985                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1986             \1''', webpage)]
1987
1988         # lazyYT YouTube embed
1989         entries.extend(list(map(
1990             unescapeHTML,
1991             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1992
1993         # Wordpress "YouTube Video Importer" plugin
1994         matches = re.findall(r'''(?x)<div[^>]+
1995             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1996             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1997         entries.extend(m[-1] for m in matches)
1998
1999         return entries
2000
2001     @staticmethod
2002     def _extract_url(webpage):
2003         urls = YoutubeIE._extract_urls(webpage)
2004         return urls[0] if urls else None
2005
2006     @classmethod
2007     def extract_id(cls, url):
2008         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2009         if mobj is None:
2010             raise ExtractorError('Invalid URL: %s' % url)
2011         video_id = mobj.group(2)
2012         return video_id
2013
2014     def _extract_chapters_from_json(self, data, duration):
2015         chapter_list = traverse_obj(
2016             data, (
2017                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2018                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2019             ), expected_type=list)
2020
2021         return self._extract_chapters(
2022             chapter_list,
2023             chapter_time=lambda chapter: float_or_none(
2024                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2025             chapter_title=lambda chapter: traverse_obj(
2026                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2027             duration=duration)
2028
2029     def _extract_chapters_from_engagement_panel(self, data, duration):
2030         content_list = traverse_obj(
2031             data,
2032             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2033             expected_type=list, default=[])
2034         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2035         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2036
2037         return next((
2038             filter(None, (
2039                 self._extract_chapters(
2040                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2041                     chapter_time, chapter_title, duration)
2042                 for contents in content_list
2043             ))), [])
2044
2045     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2046         chapters = []
2047         last_chapter = {'start_time': 0}
2048         for idx, chapter in enumerate(chapter_list or []):
2049             title = chapter_title(chapter)
2050             start_time = chapter_time(chapter)
2051             if start_time is None:
2052                 continue
2053             last_chapter['end_time'] = start_time
2054             if start_time < last_chapter['start_time']:
2055                 if idx == 1:
2056                     chapters.pop()
2057                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2058                 else:
2059                     self.report_warning(f'Invalid start time for chapter "{title}"')
2060                     continue
2061             last_chapter = {'start_time': start_time, 'title': title}
2062             chapters.append(last_chapter)
2063         last_chapter['end_time'] = duration
2064         return chapters
2065
2066     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2067         return self._parse_json(self._search_regex(
2068             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2069              regex), webpage, name, default='{}'), video_id, fatal=False)
2070
2071     @staticmethod
2072     def parse_time_text(time_text):
2073         """
2074         Parse the comment time text
2075         time_text is in the format 'X units ago (edited)'
2076         """
2077         time_text_split = time_text.split(' ')
2078         if len(time_text_split) >= 3:
2079             try:
2080                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2081             except ValueError:
2082                 return None
2083
2084     def _extract_comment(self, comment_renderer, parent=None):
2085         comment_id = comment_renderer.get('commentId')
2086         if not comment_id:
2087             return
2088
2089         text = self._get_text(comment_renderer, 'contentText')
2090
2091         # note: timestamp is an estimate calculated from the current time and time_text
2092         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2093         time_text_dt = self.parse_time_text(time_text)
2094         if isinstance(time_text_dt, datetime.datetime):
2095             timestamp = calendar.timegm(time_text_dt.timetuple())
2096         author = self._get_text(comment_renderer, 'authorText')
2097         author_id = try_get(comment_renderer,
2098                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2099
2100         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2101                                                        lambda x: x['likeCount']), compat_str)) or 0
2102         author_thumbnail = try_get(comment_renderer,
2103                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2104
2105         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2106         is_favorited = 'creatorHeart' in (try_get(
2107             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2108         return {
2109             'id': comment_id,
2110             'text': text,
2111             'timestamp': timestamp,
2112             'time_text': time_text,
2113             'like_count': votes,
2114             'is_favorited': is_favorited,
2115             'author': author,
2116             'author_id': author_id,
2117             'author_thumbnail': author_thumbnail,
2118             'author_is_uploader': author_is_uploader,
2119             'parent': parent or 'root'
2120         }
2121
2122     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2123                          ytcfg, video_id, parent=None, comment_counts=None):
2124
2125         def extract_header(contents):
2126             _total_comments = 0
2127             _continuation = None
2128             for content in contents:
2129                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2130                 expected_comment_count = parse_count(self._get_text(
2131                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2132
2133                 if expected_comment_count:
2134                     comment_counts[1] = expected_comment_count
2135                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2136                     _total_comments = comment_counts[1]
2137                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2138                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2139
2140                 sort_menu_item = try_get(
2141                     comments_header_renderer,
2142                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2143                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2144
2145                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2146                 if not _continuation:
2147                     continue
2148
2149                 sort_text = sort_menu_item.get('title')
2150                 if isinstance(sort_text, compat_str):
2151                     sort_text = sort_text.lower()
2152                 else:
2153                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2154                 self.to_screen('Sorting comments by %s' % sort_text)
2155                 break
2156             return _total_comments, _continuation
2157
2158         def extract_thread(contents):
2159             if not parent:
2160                 comment_counts[2] = 0
2161             for content in contents:
2162                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2163                 comment_renderer = try_get(
2164                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2165                     content, (lambda x: x['commentRenderer'], dict))
2166
2167                 if not comment_renderer:
2168                     continue
2169                 comment = self._extract_comment(comment_renderer, parent)
2170                 if not comment:
2171                     continue
2172                 comment_counts[0] += 1
2173                 yield comment
2174                 # Attempt to get the replies
2175                 comment_replies_renderer = try_get(
2176                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2177
2178                 if comment_replies_renderer:
2179                     comment_counts[2] += 1
2180                     comment_entries_iter = self._comment_entries(
2181                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2182                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2183
2184                     for reply_comment in comment_entries_iter:
2185                         yield reply_comment
2186
2187         # YouTube comments have a max depth of 2
2188         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2189         if max_depth == 1 and parent:
2190             return
2191         if not comment_counts:
2192             # comment so far, est. total comments, current comment thread #
2193             comment_counts = [0, 0, 0]
2194
2195         continuation = self._extract_continuation(root_continuation_data)
2196         if continuation and len(continuation['continuation']) < 27:
2197             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2198             continuation_token = self._generate_comment_continuation(video_id)
2199             continuation = self._build_api_continuation_query(continuation_token, None)
2200
2201         visitor_data = None
2202         is_first_continuation = parent is None
2203
2204         for page_num in itertools.count(0):
2205             if not continuation:
2206                 break
2207             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2208             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2209             if page_num == 0:
2210                 if is_first_continuation:
2211                     note_prefix = 'Downloading comment section API JSON'
2212                 else:
2213                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2214                         comment_counts[2], comment_prog_str)
2215             else:
2216                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2217                     '       ' if parent else '', ' replies' if parent else '',
2218                     page_num, comment_prog_str)
2219
2220             response = self._extract_response(
2221                 item_id=None, query=continuation,
2222                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2223                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2224             if not response:
2225                 break
2226             visitor_data = try_get(
2227                 response,
2228                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2229                 compat_str) or visitor_data
2230
2231             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2232
2233             continuation = None
2234             if isinstance(continuation_contents, list):
2235                 for continuation_section in continuation_contents:
2236                     if not isinstance(continuation_section, dict):
2237                         continue
2238                     continuation_items = try_get(
2239                         continuation_section,
2240                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2241                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2242                         list) or []
2243                     if is_first_continuation:
2244                         total_comments, continuation = extract_header(continuation_items)
2245                         if total_comments:
2246                             yield total_comments
2247                         is_first_continuation = False
2248                         if continuation:
2249                             break
2250                         continue
2251                     count = 0
2252                     for count, entry in enumerate(extract_thread(continuation_items)):
2253                         yield entry
2254                     continuation = self._extract_continuation({'contents': continuation_items})
2255                     if continuation:
2256                         # Sometimes YouTube provides a continuation without any comments
2257                         # In most cases we end up just downloading these with very little comments to come.
2258                         if count == 0:
2259                             if not parent:
2260                                 self.report_warning('No comments received - assuming end of comments')
2261                             continuation = None
2262                         break
2263
2264             # Deprecated response structure
2265             elif isinstance(continuation_contents, dict):
2266                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2267                 for key, continuation_renderer in continuation_contents.items():
2268                     if key not in known_continuation_renderers:
2269                         continue
2270                     if not isinstance(continuation_renderer, dict):
2271                         continue
2272                     if is_first_continuation:
2273                         header_continuation_items = [continuation_renderer.get('header') or {}]
2274                         total_comments, continuation = extract_header(header_continuation_items)
2275                         if total_comments:
2276                             yield total_comments
2277                         is_first_continuation = False
2278                         if continuation:
2279                             break
2280
2281                     # Sometimes YouTube provides a continuation without any comments
2282                     # In most cases we end up just downloading these with very little comments to come.
2283                     count = 0
2284                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2285                         yield entry
2286                     continuation = self._extract_continuation(continuation_renderer)
2287                     if count == 0:
2288                         if not parent:
2289                             self.report_warning('No comments received - assuming end of comments')
2290                         continuation = None
2291                     break
2292
2293     @staticmethod
2294     def _generate_comment_continuation(video_id):
2295         """
2296         Generates initial comment section continuation token from given video id
2297         """
2298         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2299         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2300         new_continuation_intlist = list(itertools.chain.from_iterable(
2301             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2302         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2303
2304     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2305         """Entry for comment extraction"""
2306         def _real_comment_extract(contents):
2307             if isinstance(contents, list):
2308                 for entry in contents:
2309                     for key, renderer in entry.items():
2310                         if key not in known_entry_comment_renderers:
2311                             continue
2312                         yield from self._comment_entries(
2313                             renderer, video_id=video_id, ytcfg=ytcfg,
2314                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2315                             account_syncid=self._extract_account_syncid(ytcfg))
2316                         break
2317         comments = []
2318         known_entry_comment_renderers = ('itemSectionRenderer',)
2319         estimated_total = 0
2320         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2321
2322         try:
2323             for comment in _real_comment_extract(contents):
2324                 if len(comments) >= max_comments:
2325                     break
2326                 if isinstance(comment, int):
2327                     estimated_total = comment
2328                     continue
2329                 comments.append(comment)
2330         except KeyboardInterrupt:
2331             self.to_screen('Interrupted by user')
2332         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2333         return {
2334             'comments': comments,
2335             'comment_count': len(comments),
2336         }
2337
2338     @staticmethod
2339     def _generate_player_context(sts=None):
2340         context = {
2341             'html5Preference': 'HTML5_PREF_WANTS',
2342         }
2343         if sts is not None:
2344             context['signatureTimestamp'] = sts
2345         return {
2346             'playbackContext': {
2347                 'contentPlaybackContext': context
2348             },
2349             'contentCheckOk': True,
2350             'racyCheckOk': True
2351         }
2352
2353     @staticmethod
2354     def _get_video_info_params(video_id, client='TVHTML5'):
2355         GVI_CLIENTS = {
2356             'ANDROID': {
2357                 'c': 'ANDROID',
2358                 'cver': '16.20',
2359             },
2360             'TVHTML5': {
2361                 'c': 'TVHTML5',
2362                 'cver': '6.20180913',
2363             },
2364             'IOS': {
2365                 'c': 'IOS',
2366                 'cver': '16.20'
2367             }
2368         }
2369         query = {
2370             'video_id': video_id,
2371             'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2372             'html5': '1'
2373         }
2374         query.update(GVI_CLIENTS.get(client))
2375         return query
2376
2377     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2378
2379         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2380         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2381         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2382         headers = self.generate_api_headers(
2383             player_ytcfg, identity_token, syncid,
2384             default_client=self._YT_CLIENTS[client], session_index=session_index)
2385
2386         yt_query = {'videoId': video_id}
2387         yt_query.update(self._generate_player_context(sts))
2388         return self._extract_response(
2389             item_id=video_id, ep='player', query=yt_query,
2390             ytcfg=player_ytcfg, headers=headers, fatal=False,
2391             default_client=self._YT_CLIENTS[client],
2392             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2393         ) or None
2394
2395     def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
2396         # get_video_info endpoint seems to be completely dead
2397         gvi_client = None # self._YT_CLIENTS.get(f'_{client}_agegate')
2398         if gvi_client:
2399             pr = self._parse_json(traverse_obj(
2400                 compat_parse_qs(self._download_webpage(
2401                     self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
2402                     'Refetching age-gated %s info webpage' % gvi_client.lower(),
2403                     'unable to download video info webpage', fatal=False,
2404                     query=self._get_video_info_params(video_id, client=gvi_client))),
2405                 ('player_response', 0), expected_type=str) or '{}', video_id)
2406             if pr:
2407                 return pr
2408             self.report_warning('Falling back to embedded-only age-gate workaround')
2409
2410         if not self._YT_CLIENTS.get(f'_{client}_embedded'):
2411             return
2412         embed_webpage = None
2413         if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
2414             embed_webpage = self._download_webpage(
2415                 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2416                 video_id=video_id, note=f'Downloading age-gated {client} embed config')
2417
2418         ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
2419         # If we extracted the embed webpage, it'll tell us if we can view the video
2420         embedded_pr = self._parse_json(
2421             traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2422             video_id=video_id)
2423         embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2424         if embedded_ps_reason in self._AGE_GATE_REASONS:
2425             return
2426         return self._extract_player_response(
2427             f'_{client}_embedded', video_id,
2428             ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
2429             identity_token, player_url, initial_pr)
2430
2431     def _get_requested_clients(self, url, smuggled_data):
2432         requested_clients = [client for client in self._configuration_arg('player_client')
2433                              if client[:0] != '_' and client in self._YT_CLIENTS]
2434         if not requested_clients:
2435             requested_clients = ['android', 'web']
2436
2437         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2438             requested_clients.extend(
2439                 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
2440
2441         return orderedSet(requested_clients)
2442
2443     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2444         initial_pr = None
2445         if webpage:
2446             initial_pr = self._extract_yt_initial_variable(
2447                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2448                 video_id, 'initial player response')
2449
2450         for client in clients:
2451             player_ytcfg = master_ytcfg if client == 'web' else {}
2452             if client == 'web' and initial_pr:
2453                 pr = initial_pr
2454             else:
2455                 if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
2456                     ytm_webpage = self._download_webpage(
2457                         'https://music.youtube.com',
2458                         video_id, fatal=False, note='Downloading remix client config')
2459                     player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
2460                 pr = self._extract_player_response(
2461                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2462             if pr:
2463                 yield pr
2464             if traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
2465                 pr = self._extract_age_gated_player_response(
2466                     client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
2467                 if pr:
2468                     yield pr
2469         # Android player_response does not have microFormats which are needed for
2470         # extraction of some data. So we return the initial_pr with formats
2471         # stripped out even if not requested by the user
2472         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2473         if initial_pr and 'web' not in clients:
2474             initial_pr['streamingData'] = None
2475             yield initial_pr
2476
2477     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2478         itags, stream_ids = [], []
2479         itag_qualities = {}
2480         q = qualities([
2481             # "tiny" is the smallest video-only format. But some audio-only formats
2482             # was also labeled "tiny". It is not clear if such formats still exist
2483             'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2484             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2485         ])
2486         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2487
2488         for fmt in streaming_formats:
2489             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2490                 continue
2491
2492             itag = str_or_none(fmt.get('itag'))
2493             audio_track = fmt.get('audioTrack') or {}
2494             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2495             if stream_id in stream_ids:
2496                 continue
2497
2498             quality = fmt.get('quality')
2499             if quality == 'tiny' or not quality:
2500                 quality = fmt.get('audioQuality', '').lower() or quality
2501             if itag and quality:
2502                 itag_qualities[itag] = quality
2503             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2504             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2505             # number of fragment that would subsequently requested with (`&sq=N`)
2506             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2507                 continue
2508
2509             fmt_url = fmt.get('url')
2510             if not fmt_url:
2511                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2512                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2513                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2514                 if not (sc and fmt_url and encrypted_sig):
2515                     continue
2516                 if not player_url:
2517                     continue
2518                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2519                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2520                 fmt_url += '&' + sp + '=' + signature
2521
2522             if itag:
2523                 itags.append(itag)
2524                 stream_ids.append(stream_id)
2525
2526             tbr = float_or_none(
2527                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2528             dct = {
2529                 'asr': int_or_none(fmt.get('audioSampleRate')),
2530                 'filesize': int_or_none(fmt.get('contentLength')),
2531                 'format_id': itag,
2532                 'format_note': ', '.join(filter(None, (
2533                     audio_track.get('displayName'), fmt.get('qualityLabel') or quality))),
2534                 'fps': int_or_none(fmt.get('fps')),
2535                 'height': int_or_none(fmt.get('height')),
2536                 'quality': q(quality),
2537                 'tbr': tbr,
2538                 'url': fmt_url,
2539                 'width': fmt.get('width'),
2540                 'language': audio_track.get('id', '').split('.')[0],
2541             }
2542             mime_mobj = re.match(
2543                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2544             if mime_mobj:
2545                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2546                 dct.update(parse_codecs(mime_mobj.group(2)))
2547                 # The 3gp format in android client has a quality of "small",
2548                 # but is actually worse than all other formats
2549                 if dct['ext'] == '3gp':
2550                     dct['quality'] = q('tiny')
2551                     dct['preference'] = -10
2552             no_audio = dct.get('acodec') == 'none'
2553             no_video = dct.get('vcodec') == 'none'
2554             if no_audio:
2555                 dct['vbr'] = tbr
2556             if no_video:
2557                 dct['abr'] = tbr
2558             if no_audio or no_video:
2559                 dct['downloader_options'] = {
2560                     # Youtube throttles chunks >~10M
2561                     'http_chunk_size': 10485760,
2562                 }
2563                 if dct.get('ext'):
2564                     dct['container'] = dct['ext'] + '_dash'
2565             yield dct
2566
2567         skip_manifests = self._configuration_arg('skip')
2568         get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2569         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2570
2571         for sd in streaming_data:
2572             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2573             if hls_manifest_url:
2574                 for f in self._extract_m3u8_formats(
2575                         hls_manifest_url, video_id, 'mp4', fatal=False):
2576                     itag = self._search_regex(
2577                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2578                     if itag in itags:
2579                         continue
2580                     if itag:
2581                         f['format_id'] = itag
2582                         itags.append(itag)
2583                     yield f
2584
2585             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2586             if dash_manifest_url:
2587                 for f in self._extract_mpd_formats(
2588                         dash_manifest_url, video_id, fatal=False):
2589                     itag = f['format_id']
2590                     if itag in itags:
2591                         continue
2592                     if itag:
2593                         itags.append(itag)
2594                     if itag in itag_qualities:
2595                         f['quality'] = q(itag_qualities[itag])
2596                     filesize = int_or_none(self._search_regex(
2597                         r'/clen/(\d+)', f.get('fragment_base_url')
2598                         or f['url'], 'file size', default=None))
2599                     if filesize:
2600                         f['filesize'] = filesize
2601                     yield f
2602
2603     def _real_extract(self, url):
2604         url, smuggled_data = unsmuggle_url(url, {})
2605         video_id = self._match_id(url)
2606
2607         base_url = self.http_scheme() + '//www.youtube.com/'
2608         webpage_url = base_url + 'watch?v=' + video_id
2609         webpage = self._download_webpage(
2610             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2611
2612         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2613         player_url = self._extract_player_url(master_ytcfg, webpage)
2614         identity_token = self._extract_identity_token(webpage, video_id)
2615
2616         player_responses = list(self._extract_player_responses(
2617             self._get_requested_clients(url, smuggled_data),
2618             video_id, webpage, master_ytcfg, player_url, identity_token))
2619
2620         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2621
2622         playability_statuses = traverse_obj(
2623             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2624
2625         trailer_video_id = get_first(
2626             playability_statuses,
2627             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2628             expected_type=str)
2629         if trailer_video_id:
2630             return self.url_result(
2631                 trailer_video_id, self.ie_key(), trailer_video_id)
2632
2633         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2634                        if webpage else (lambda x: None))
2635
2636         video_details = traverse_obj(
2637             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2638         microformats = traverse_obj(
2639             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2640             expected_type=dict, default=[])
2641         video_title = (
2642             get_first(video_details, 'title')
2643             or self._get_text(microformats, (..., 'title'))
2644             or search_meta(['og:title', 'twitter:title', 'title']))
2645         video_description = get_first(video_details, 'shortDescription')
2646
2647         if not smuggled_data.get('force_singlefeed', False):
2648             if not self.get_param('noplaylist'):
2649                 multifeed_metadata_list = get_first(
2650                     player_responses,
2651                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2652                     expected_type=str)
2653                 if multifeed_metadata_list:
2654                     entries = []
2655                     feed_ids = []
2656                     for feed in multifeed_metadata_list.split(','):
2657                         # Unquote should take place before split on comma (,) since textual
2658                         # fields may contain comma as well (see
2659                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2660                         feed_data = compat_parse_qs(
2661                             compat_urllib_parse_unquote_plus(feed))
2662
2663                         def feed_entry(name):
2664                             return try_get(
2665                                 feed_data, lambda x: x[name][0], compat_str)
2666
2667                         feed_id = feed_entry('id')
2668                         if not feed_id:
2669                             continue
2670                         feed_title = feed_entry('title')
2671                         title = video_title
2672                         if feed_title:
2673                             title += ' (%s)' % feed_title
2674                         entries.append({
2675                             '_type': 'url_transparent',
2676                             'ie_key': 'Youtube',
2677                             'url': smuggle_url(
2678                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2679                                 {'force_singlefeed': True}),
2680                             'title': title,
2681                         })
2682                         feed_ids.append(feed_id)
2683                     self.to_screen(
2684                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2685                         % (', '.join(feed_ids), video_id))
2686                     return self.playlist_result(
2687                         entries, video_id, video_title, video_description)
2688             else:
2689                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2690
2691         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2692         is_live = get_first(video_details, 'isLive')
2693         if is_live is None:
2694             is_live = get_first(live_broadcast_details, 'isLiveNow')
2695
2696         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2697         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2698
2699         if not formats:
2700             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2701                 self.raise_no_formats(
2702                     'This video is DRM protected.', expected=True)
2703             pemr = get_first(
2704                 playability_statuses,
2705                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2706             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2707             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2708             if subreason:
2709                 if subreason == 'The uploader has not made this video available in your country.':
2710                     countries = get_first(microformats, 'availableCountries')
2711                     if not countries:
2712                         regions_allowed = search_meta('regionsAllowed')
2713                         countries = regions_allowed.split(',') if regions_allowed else None
2714                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2715                 reason += f'. {subreason}'
2716             if reason:
2717                 self.raise_no_formats(reason, expected=True)
2718
2719         for f in formats:
2720             # TODO: detect if throttled
2721             if '&n=' in f['url']:  # possibly throttled
2722                 f['source_preference'] = -10
2723                 # note = f.get('format_note')
2724                 # f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2725
2726         self._sort_formats(formats)
2727
2728         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2729         if not keywords and webpage:
2730             keywords = [
2731                 unescapeHTML(m.group('content'))
2732                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2733         for keyword in keywords:
2734             if keyword.startswith('yt:stretch='):
2735                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2736                 if mobj:
2737                     # NB: float is intentional for forcing float division
2738                     w, h = (float(v) for v in mobj.groups())
2739                     if w > 0 and h > 0:
2740                         ratio = w / h
2741                         for f in formats:
2742                             if f.get('vcodec') != 'none':
2743                                 f['stretched_ratio'] = ratio
2744                         break
2745
2746         thumbnails = []
2747         thumbnail_dicts = traverse_obj(
2748             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2749             expected_type=dict, default=[])
2750         for thumbnail in thumbnail_dicts:
2751             thumbnail_url = thumbnail.get('url')
2752             if not thumbnail_url:
2753                 continue
2754             # Sometimes youtube gives a wrong thumbnail URL. See:
2755             # https://github.com/yt-dlp/yt-dlp/issues/233
2756             # https://github.com/ytdl-org/youtube-dl/issues/28023
2757             if 'maxresdefault' in thumbnail_url:
2758                 thumbnail_url = thumbnail_url.split('?')[0]
2759             thumbnails.append({
2760                 'url': thumbnail_url,
2761                 'height': int_or_none(thumbnail.get('height')),
2762                 'width': int_or_none(thumbnail.get('width')),
2763             })
2764         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2765         if thumbnail_url:
2766             thumbnails.append({
2767                 'url': thumbnail_url,
2768             })
2769         # The best resolution thumbnails sometimes does not appear in the webpage
2770         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2771         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2772         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2773         # TODO: Test them also? - For some videos, even these don't exist
2774         guaranteed_thumbnail_names = [
2775             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2776             'mqdefault', 'mq1', 'mq2', 'mq3',
2777             'default', '1', '2', '3'
2778         ]
2779         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2780         n_thumbnail_names = len(thumbnail_names)
2781
2782         thumbnails.extend({
2783             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2784                 video_id=video_id, name=name, ext=ext,
2785                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2786             '_test_url': name in hq_thumbnail_names,
2787         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2788         for thumb in thumbnails:
2789             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2790             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2791         self._remove_duplicate_formats(thumbnails)
2792
2793         category = get_first(microformats, 'category') or search_meta('genre')
2794         channel_id = str_or_none(
2795             get_first(video_details, 'channelId')
2796             or get_first(microformats, 'externalChannelId')
2797             or search_meta('channelId'))
2798         duration = int_or_none(
2799             get_first(video_details, 'lengthSeconds')
2800             or get_first(microformats, 'lengthSeconds')
2801             or parse_duration(search_meta('duration'))) or None
2802         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2803
2804         live_content = get_first(video_details, 'isLiveContent')
2805         is_upcoming = get_first(video_details, 'isUpcoming')
2806         if is_live is None:
2807             if is_upcoming or live_content is False:
2808                 is_live = False
2809         if is_upcoming is None and (live_content or is_live):
2810             is_upcoming = False
2811         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2812         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2813         if not duration and live_endtime and live_starttime:
2814             duration = live_endtime - live_starttime
2815
2816         info = {
2817             'id': video_id,
2818             'title': self._live_title(video_title) if is_live else video_title,
2819             'formats': formats,
2820             'thumbnails': thumbnails,
2821             'description': video_description,
2822             'upload_date': unified_strdate(
2823                 get_first(microformats, 'uploadDate')
2824                 or search_meta('uploadDate')),
2825             'uploader': get_first(video_details, 'author'),
2826             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2827             'uploader_url': owner_profile_url,
2828             'channel_id': channel_id,
2829             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2830             'duration': duration,
2831             'view_count': int_or_none(
2832                 get_first((video_details, microformats), (..., 'viewCount'))
2833                 or search_meta('interactionCount')),
2834             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2835             'age_limit': 18 if (
2836                 get_first(microformats, 'isFamilySafe') is False
2837                 or search_meta('isFamilyFriendly') == 'false'
2838                 or search_meta('og:restrictions:age') == '18+') else 0,
2839             'webpage_url': webpage_url,
2840             'categories': [category] if category else None,
2841             'tags': keywords,
2842             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2843             'is_live': is_live,
2844             'was_live': (False if is_live or is_upcoming or live_content is False
2845                          else None if is_live is None or is_upcoming is None
2846                          else live_content),
2847             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2848             'release_timestamp': live_starttime,
2849         }
2850
2851         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2852         # Converted into dicts to remove duplicates
2853         captions = {
2854             sub.get('baseUrl'): sub
2855             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2856         translation_languages = {
2857             lang.get('languageCode'): lang.get('languageName')
2858             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2859         subtitles = {}
2860         if pctr:
2861             def process_language(container, base_url, lang_code, sub_name, query):
2862                 lang_subs = container.setdefault(lang_code, [])
2863                 for fmt in self._SUBTITLE_FORMATS:
2864                     query.update({
2865                         'fmt': fmt,
2866                     })
2867                     lang_subs.append({
2868                         'ext': fmt,
2869                         'url': update_url_query(base_url, query),
2870                         'name': sub_name,
2871                     })
2872
2873             for base_url, caption_track in captions.items():
2874                 if not base_url:
2875                     continue
2876                 if caption_track.get('kind') != 'asr':
2877                     lang_code = (
2878                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2879                         or caption_track.get('languageCode'))
2880                     if not lang_code:
2881                         continue
2882                     process_language(
2883                         subtitles, base_url, lang_code,
2884                         traverse_obj(caption_track, ('name', 'simpleText')),
2885                         {})
2886                     continue
2887                 automatic_captions = {}
2888                 for trans_code, trans_name in translation_languages.items():
2889                     if not trans_code:
2890                         continue
2891                     process_language(
2892                         automatic_captions, base_url, trans_code,
2893                         self._get_text(trans_name, max_runs=1),
2894                         {'tlang': trans_code})
2895                 info['automatic_captions'] = automatic_captions
2896         info['subtitles'] = subtitles
2897
2898         parsed_url = compat_urllib_parse_urlparse(url)
2899         for component in [parsed_url.fragment, parsed_url.query]:
2900             query = compat_parse_qs(component)
2901             for k, v in query.items():
2902                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2903                     d_k += '_time'
2904                     if d_k not in info and k in s_ks:
2905                         info[d_k] = parse_duration(query[k][0])
2906
2907         # Youtube Music Auto-generated description
2908         if video_description:
2909             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2910             if mobj:
2911                 release_year = mobj.group('release_year')
2912                 release_date = mobj.group('release_date')
2913                 if release_date:
2914                     release_date = release_date.replace('-', '')
2915                     if not release_year:
2916                         release_year = release_date[:4]
2917                 info.update({
2918                     'album': mobj.group('album'.strip()),
2919                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2920                     'track': mobj.group('track').strip(),
2921                     'release_date': release_date,
2922                     'release_year': int_or_none(release_year),
2923                 })
2924
2925         initial_data = None
2926         if webpage:
2927             initial_data = self._extract_yt_initial_variable(
2928                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2929                 'yt initial data')
2930         if not initial_data:
2931             headers = self.generate_api_headers(
2932                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2933                 session_index=self._extract_session_index(master_ytcfg))
2934
2935             initial_data = self._extract_response(
2936                 item_id=video_id, ep='next', fatal=False,
2937                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
2938                 note='Downloading initial data API JSON')
2939
2940         try:
2941             # This will error if there is no livechat
2942             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2943             info['subtitles']['live_chat'] = [{
2944                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2945                 'video_id': video_id,
2946                 'ext': 'json',
2947                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2948             }]
2949         except (KeyError, IndexError, TypeError):
2950             pass
2951
2952         if initial_data:
2953             info['chapters'] = (
2954                 self._extract_chapters_from_json(initial_data, duration)
2955                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2956                 or None)
2957
2958             contents = try_get(
2959                 initial_data,
2960                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2961                 list) or []
2962             for content in contents:
2963                 vpir = content.get('videoPrimaryInfoRenderer')
2964                 if vpir:
2965                     stl = vpir.get('superTitleLink')
2966                     if stl:
2967                         stl = self._get_text(stl)
2968                         if try_get(
2969                                 vpir,
2970                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2971                             info['location'] = stl
2972                         else:
2973                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2974                             if mobj:
2975                                 info.update({
2976                                     'series': mobj.group(1),
2977                                     'season_number': int(mobj.group(2)),
2978                                     'episode_number': int(mobj.group(3)),
2979                                 })
2980                     for tlb in (try_get(
2981                             vpir,
2982                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2983                             list) or []):
2984                         tbr = tlb.get('toggleButtonRenderer') or {}
2985                         for getter, regex in [(
2986                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2987                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2988                                     lambda x: x['accessibility'],
2989                                     lambda x: x['accessibilityData']['accessibilityData'],
2990                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2991                             label = (try_get(tbr, getter, dict) or {}).get('label')
2992                             if label:
2993                                 mobj = re.match(regex, label)
2994                                 if mobj:
2995                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2996                                     break
2997                     sbr_tooltip = try_get(
2998                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2999                     if sbr_tooltip:
3000                         like_count, dislike_count = sbr_tooltip.split(' / ')
3001                         info.update({
3002                             'like_count': str_to_int(like_count),
3003                             'dislike_count': str_to_int(dislike_count),
3004                         })
3005                 vsir = content.get('videoSecondaryInfoRenderer')
3006                 if vsir:
3007                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3008                     rows = try_get(
3009                         vsir,
3010                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3011                         list) or []
3012                     multiple_songs = False
3013                     for row in rows:
3014                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3015                             multiple_songs = True
3016                             break
3017                     for row in rows:
3018                         mrr = row.get('metadataRowRenderer') or {}
3019                         mrr_title = mrr.get('title')
3020                         if not mrr_title:
3021                             continue
3022                         mrr_title = self._get_text(mrr, 'title')
3023                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3024                         if mrr_title == 'License':
3025                             info['license'] = mrr_contents_text
3026                         elif not multiple_songs:
3027                             if mrr_title == 'Album':
3028                                 info['album'] = mrr_contents_text
3029                             elif mrr_title == 'Artist':
3030                                 info['artist'] = mrr_contents_text
3031                             elif mrr_title == 'Song':
3032                                 info['track'] = mrr_contents_text
3033
3034         fallbacks = {
3035             'channel': 'uploader',
3036             'channel_id': 'uploader_id',
3037             'channel_url': 'uploader_url',
3038         }
3039         for to, frm in fallbacks.items():
3040             if not info.get(to):
3041                 info[to] = info.get(frm)
3042
3043         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3044             v = info.get(s_k)
3045             if v:
3046                 info[d_k] = v
3047
3048         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3049         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3050         is_membersonly = None
3051         is_premium = None
3052         if initial_data and is_private is not None:
3053             is_membersonly = False
3054             is_premium = False
3055             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3056             badge_labels = set()
3057             for content in contents:
3058                 if not isinstance(content, dict):
3059                     continue
3060                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3061             for badge_label in badge_labels:
3062                 if badge_label.lower() == 'members only':
3063                     is_membersonly = True
3064                 elif badge_label.lower() == 'premium':
3065                     is_premium = True
3066                 elif badge_label.lower() == 'unlisted':
3067                     is_unlisted = True
3068
3069         info['availability'] = self._availability(
3070             is_private=is_private,
3071             needs_premium=is_premium,
3072             needs_subscription=is_membersonly,
3073             needs_auth=info['age_limit'] >= 18,
3074             is_unlisted=None if is_private is None else is_unlisted)
3075
3076         # get xsrf for annotations or comments
3077         get_annotations = self.get_param('writeannotations', False)
3078         get_comments = self.get_param('getcomments', False)
3079         if get_annotations or get_comments:
3080             xsrf_token = None
3081             if master_ytcfg:
3082                 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
3083             if not xsrf_token:
3084                 xsrf_token = self._search_regex(
3085                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3086                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
3087
3088         # annotations
3089         if get_annotations:
3090             invideo_url = get_first(
3091                 player_responses,
3092                 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3093                 expected_type=str)
3094             if xsrf_token and invideo_url:
3095                 xsrf_field_name = None
3096                 if master_ytcfg:
3097                     xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3098                 if not xsrf_field_name:
3099                     xsrf_field_name = self._search_regex(
3100                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3101                         webpage, 'xsrf field name',
3102                         group='xsrf_field_name', default='session_token')
3103                 info['annotations'] = self._download_webpage(
3104                     self._proto_relative_url(invideo_url),
3105                     video_id, note='Downloading annotations',
3106                     errnote='Unable to download video annotations', fatal=False,
3107                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3108
3109         if get_comments:
3110             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3111
3112         self.mark_watched(video_id, player_responses)
3113
3114         return info
3115
3116
3117 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3118     IE_DESC = 'YouTube.com tab'
3119     _VALID_URL = r'''(?x)
3120                     https?://
3121                         (?:\w+\.)?
3122                         (?:
3123                             youtube(?:kids)?\.com|
3124                             invidio\.us
3125                         )/
3126                         (?:
3127                             (?P<channel_type>channel|c|user|browse)/|
3128                             (?P<not_channel>
3129                                 feed/|hashtag/|
3130                                 (?:playlist|watch)\?.*?\blist=
3131                             )|
3132                             (?!(?:%s)\b)  # Direct URLs
3133                         )
3134                         (?P<id>[^/?\#&]+)
3135                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3136     IE_NAME = 'youtube:tab'
3137
3138     _TESTS = [{
3139         'note': 'playlists, multipage',
3140         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3141         'playlist_mincount': 94,
3142         'info_dict': {
3143             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3144             'title': 'Игорь Клейнер - Playlists',
3145             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3146             'uploader': 'Игорь Клейнер',
3147             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3148         },
3149     }, {
3150         'note': 'playlists, multipage, different order',
3151         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3152         'playlist_mincount': 94,
3153         'info_dict': {
3154             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3155             'title': 'Игорь Клейнер - Playlists',
3156             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3157             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3158             'uploader': 'Игорь Клейнер',
3159         },
3160     }, {
3161         'note': 'playlists, series',
3162         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3163         'playlist_mincount': 5,
3164         'info_dict': {
3165             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3166             'title': '3Blue1Brown - Playlists',
3167             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3168             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3169             'uploader': '3Blue1Brown',
3170         },
3171     }, {
3172         'note': 'playlists, singlepage',
3173         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3174         'playlist_mincount': 4,
3175         'info_dict': {
3176             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3177             'title': 'ThirstForScience - Playlists',
3178             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3179             'uploader': 'ThirstForScience',
3180             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3181         }
3182     }, {
3183         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3184         'only_matching': True,
3185     }, {
3186         'note': 'basic, single video playlist',
3187         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3188         'info_dict': {
3189             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3190             'uploader': 'Sergey M.',
3191             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3192             'title': 'youtube-dl public playlist',
3193         },
3194         'playlist_count': 1,
3195     }, {
3196         'note': 'empty playlist',
3197         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3198         'info_dict': {
3199             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3200             'uploader': 'Sergey M.',
3201             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3202             'title': 'youtube-dl empty playlist',
3203         },
3204         'playlist_count': 0,
3205     }, {
3206         'note': 'Home tab',
3207         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3208         'info_dict': {
3209             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3210             'title': 'lex will - Home',
3211             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3212             'uploader': 'lex will',
3213             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3214         },
3215         'playlist_mincount': 2,
3216     }, {
3217         'note': 'Videos tab',
3218         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3219         'info_dict': {
3220             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3221             'title': 'lex will - Videos',
3222             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3223             'uploader': 'lex will',
3224             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3225         },
3226         'playlist_mincount': 975,
3227     }, {
3228         'note': 'Videos tab, sorted by popular',
3229         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3230         'info_dict': {
3231             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3232             'title': 'lex will - Videos',
3233             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3234             'uploader': 'lex will',
3235             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3236         },
3237         'playlist_mincount': 199,
3238     }, {
3239         'note': 'Playlists tab',
3240         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3241         'info_dict': {
3242             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3243             'title': 'lex will - Playlists',
3244             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3245             'uploader': 'lex will',
3246             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3247         },
3248         'playlist_mincount': 17,
3249     }, {
3250         'note': 'Community tab',
3251         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3252         'info_dict': {
3253             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3254             'title': 'lex will - Community',
3255             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3256             'uploader': 'lex will',
3257             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3258         },
3259         'playlist_mincount': 18,
3260     }, {
3261         'note': 'Channels tab',
3262         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3263         'info_dict': {
3264             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3265             'title': 'lex will - Channels',
3266             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3267             'uploader': 'lex will',
3268             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3269         },
3270         'playlist_mincount': 12,
3271     }, {
3272         'note': 'Search tab',
3273         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3274         'playlist_mincount': 40,
3275         'info_dict': {
3276             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3277             'title': '3Blue1Brown - Search - linear algebra',
3278             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3279             'uploader': '3Blue1Brown',
3280             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3281         },
3282     }, {
3283         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3284         'only_matching': True,
3285     }, {
3286         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3287         'only_matching': True,
3288     }, {
3289         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3290         'only_matching': True,
3291     }, {
3292         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3293         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3294         'info_dict': {
3295             'title': '29C3: Not my department',
3296             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3297             'uploader': 'Christiaan008',
3298             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3299             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3300         },
3301         'playlist_count': 96,
3302     }, {
3303         'note': 'Large playlist',
3304         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3305         'info_dict': {
3306             'title': 'Uploads from Cauchemar',
3307             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3308             'uploader': 'Cauchemar',
3309             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3310         },
3311         'playlist_mincount': 1123,
3312     }, {
3313         'note': 'even larger playlist, 8832 videos',
3314         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3315         'only_matching': True,
3316     }, {
3317         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3318         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3319         'info_dict': {
3320             'title': 'Uploads from Interstellar Movie',
3321             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3322             'uploader': 'Interstellar Movie',
3323             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3324         },
3325         'playlist_mincount': 21,
3326     }, {
3327         'note': 'Playlist with "show unavailable videos" button',
3328         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3329         'info_dict': {
3330             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3331             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3332             'uploader': 'Phim Siêu Nhân Nhật Bản',
3333             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3334         },
3335         'playlist_mincount': 200,
3336     }, {
3337         'note': 'Playlist with unavailable videos in page 7',
3338         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3339         'info_dict': {
3340             'title': 'Uploads from BlankTV',
3341             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3342             'uploader': 'BlankTV',
3343             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3344         },
3345         'playlist_mincount': 1000,
3346     }, {
3347         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3348         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3349         'info_dict': {
3350             'title': 'Data Analysis with Dr Mike Pound',
3351             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3352             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3353             'uploader': 'Computerphile',
3354             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3355         },
3356         'playlist_mincount': 11,
3357     }, {
3358         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3359         'only_matching': True,
3360     }, {
3361         'note': 'Playlist URL that does not actually serve a playlist',
3362         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3363         'info_dict': {
3364             'id': 'FqZTN594JQw',
3365             'ext': 'webm',
3366             'title': "Smiley's People 01 detective, Adventure Series, Action",
3367             'uploader': 'STREEM',
3368             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3369             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3370             'upload_date': '20150526',
3371             'license': 'Standard YouTube License',
3372             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3373             'categories': ['People & Blogs'],
3374             'tags': list,
3375             'view_count': int,
3376             'like_count': int,
3377             'dislike_count': int,
3378         },
3379         'params': {
3380             'skip_download': True,
3381         },
3382         'skip': 'This video is not available.',
3383         'add_ie': [YoutubeIE.ie_key()],
3384     }, {
3385         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3386         'only_matching': True,
3387     }, {
3388         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3389         'only_matching': True,
3390     }, {
3391         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3392         'info_dict': {
3393             'id': 'FMtPN8yp5LU',  # This will keep changing
3394             'ext': 'mp4',
3395             'title': compat_str,
3396             'uploader': 'Sky News',
3397             'uploader_id': 'skynews',
3398             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3399             'upload_date': r're:\d{8}',
3400             'description': compat_str,
3401             'categories': ['News & Politics'],
3402             'tags': list,
3403             'like_count': int,
3404             'dislike_count': int,
3405         },
3406         'params': {
3407             'skip_download': True,
3408         },
3409         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3410     }, {
3411         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3412         'info_dict': {
3413             'id': 'a48o2S1cPoo',
3414             'ext': 'mp4',
3415             'title': 'The Young Turks - Live Main Show',
3416             'uploader': 'The Young Turks',
3417             'uploader_id': 'TheYoungTurks',
3418             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3419             'upload_date': '20150715',
3420             'license': 'Standard YouTube License',
3421             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3422             'categories': ['News & Politics'],
3423             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3424             'like_count': int,
3425             'dislike_count': int,
3426         },
3427         'params': {
3428             'skip_download': True,
3429         },
3430         'only_matching': True,
3431     }, {
3432         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3433         'only_matching': True,
3434     }, {
3435         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3436         'only_matching': True,
3437     }, {
3438         'note': 'A channel that is not live. Should raise error',
3439         'url': 'https://www.youtube.com/user/numberphile/live',
3440         'only_matching': True,
3441     }, {
3442         'url': 'https://www.youtube.com/feed/trending',
3443         'only_matching': True,
3444     }, {
3445         'url': 'https://www.youtube.com/feed/library',
3446         'only_matching': True,
3447     }, {
3448         'url': 'https://www.youtube.com/feed/history',
3449         'only_matching': True,
3450     }, {
3451         'url': 'https://www.youtube.com/feed/subscriptions',
3452         'only_matching': True,
3453     }, {
3454         'url': 'https://www.youtube.com/feed/watch_later',
3455         'only_matching': True,
3456     }, {
3457         'note': 'Recommended - redirects to home page',
3458         'url': 'https://www.youtube.com/feed/recommended',
3459         'only_matching': True,
3460     }, {
3461         'note': 'inline playlist with not always working continuations',
3462         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3463         'only_matching': True,
3464     }, {
3465         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3466         'only_matching': True,
3467     }, {
3468         'url': 'https://www.youtube.com/course',
3469         'only_matching': True,
3470     }, {
3471         'url': 'https://www.youtube.com/zsecurity',
3472         'only_matching': True,
3473     }, {
3474         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3475         'only_matching': True,
3476     }, {
3477         'url': 'https://www.youtube.com/TheYoungTurks/live',
3478         'only_matching': True,
3479     }, {
3480         'url': 'https://www.youtube.com/hashtag/cctv9',
3481         'info_dict': {
3482             'id': 'cctv9',
3483             'title': '#cctv9',
3484         },
3485         'playlist_mincount': 350,
3486     }, {
3487         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3488         'only_matching': True,
3489     }, {
3490         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3491         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3492         'only_matching': True
3493     }, {
3494         'note': '/browse/ should redirect to /channel/',
3495         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3496         'only_matching': True
3497     }, {
3498         'note': 'VLPL, should redirect to playlist?list=PL...',
3499         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3500         'info_dict': {
3501             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3502             'uploader': 'NoCopyrightSounds',
3503             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3504             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3505             'title': 'NCS Releases',
3506         },
3507         'playlist_mincount': 166,
3508     }, {
3509         'note': 'Topic, should redirect to playlist?list=UU...',
3510         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3511         'info_dict': {
3512             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3513             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3514             'title': 'Uploads from Royalty Free Music - Topic',
3515             'uploader': 'Royalty Free Music - Topic',
3516         },
3517         'expected_warnings': [
3518             'A channel/user page was given',
3519             'The URL does not have a videos tab',
3520         ],
3521         'playlist_mincount': 101,
3522     }, {
3523         'note': 'Topic without a UU playlist',
3524         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3525         'info_dict': {
3526             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3527             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3528         },
3529         'expected_warnings': [
3530             'A channel/user page was given',
3531             'The URL does not have a videos tab',
3532             'Falling back to channel URL',
3533         ],
3534         'playlist_mincount': 9,
3535     }, {
3536         'note': 'Youtube music Album',
3537         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3538         'info_dict': {
3539             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3540             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3541         },
3542         'playlist_count': 50,
3543     }, {
3544         'note': 'unlisted single video playlist',
3545         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3546         'info_dict': {
3547             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3548             'uploader': 'colethedj',
3549             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3550             'title': 'yt-dlp unlisted playlist test',
3551             'availability': 'unlisted'
3552         },
3553         'playlist_count': 1,
3554     }]
3555
3556     @classmethod
3557     def suitable(cls, url):
3558         return False if YoutubeIE.suitable(url) else super(
3559             YoutubeTabIE, cls).suitable(url)
3560
3561     def _extract_channel_id(self, webpage):
3562         channel_id = self._html_search_meta(
3563             'channelId', webpage, 'channel id', default=None)
3564         if channel_id:
3565             return channel_id
3566         channel_url = self._html_search_meta(
3567             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3568              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3569              'twitter:app:url:googleplay'), webpage, 'channel url')
3570         return self._search_regex(
3571             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3572             channel_url, 'channel id')
3573
3574     @staticmethod
3575     def _extract_basic_item_renderer(item):
3576         # Modified from _extract_grid_item_renderer
3577         known_basic_renderers = (
3578             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3579         )
3580         for key, renderer in item.items():
3581             if not isinstance(renderer, dict):
3582                 continue
3583             elif key in known_basic_renderers:
3584                 return renderer
3585             elif key.startswith('grid') and key.endswith('Renderer'):
3586                 return renderer
3587
3588     def _grid_entries(self, grid_renderer):
3589         for item in grid_renderer['items']:
3590             if not isinstance(item, dict):
3591                 continue
3592             renderer = self._extract_basic_item_renderer(item)
3593             if not isinstance(renderer, dict):
3594                 continue
3595             title = self._get_text(renderer, 'title')
3596
3597             # playlist
3598             playlist_id = renderer.get('playlistId')
3599             if playlist_id:
3600                 yield self.url_result(
3601                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3602                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3603                     video_title=title)
3604                 continue
3605             # video
3606             video_id = renderer.get('videoId')
3607             if video_id:
3608                 yield self._extract_video(renderer)
3609                 continue
3610             # channel
3611             channel_id = renderer.get('channelId')
3612             if channel_id:
3613                 yield self.url_result(
3614                     'https://www.youtube.com/channel/%s' % channel_id,
3615                     ie=YoutubeTabIE.ie_key(), video_title=title)
3616                 continue
3617             # generic endpoint URL support
3618             ep_url = urljoin('https://www.youtube.com/', try_get(
3619                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3620                 compat_str))
3621             if ep_url:
3622                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3623                     if ie.suitable(ep_url):
3624                         yield self.url_result(
3625                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3626                         break
3627
3628     def _shelf_entries_from_content(self, shelf_renderer):
3629         content = shelf_renderer.get('content')
3630         if not isinstance(content, dict):
3631             return
3632         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3633         if renderer:
3634             # TODO: add support for nested playlists so each shelf is processed
3635             # as separate playlist
3636             # TODO: this includes only first N items
3637             for entry in self._grid_entries(renderer):
3638                 yield entry
3639         renderer = content.get('horizontalListRenderer')
3640         if renderer:
3641             # TODO
3642             pass
3643
3644     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3645         ep = try_get(
3646             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3647             compat_str)
3648         shelf_url = urljoin('https://www.youtube.com', ep)
3649         if shelf_url:
3650             # Skipping links to another channels, note that checking for
3651             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3652             # will not work
3653             if skip_channels and '/channels?' in shelf_url:
3654                 return
3655             title = self._get_text(shelf_renderer, 'title')
3656             yield self.url_result(shelf_url, video_title=title)
3657         # Shelf may not contain shelf URL, fallback to extraction from content
3658         for entry in self._shelf_entries_from_content(shelf_renderer):
3659             yield entry
3660
3661     def _playlist_entries(self, video_list_renderer):
3662         for content in video_list_renderer['contents']:
3663             if not isinstance(content, dict):
3664                 continue
3665             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3666             if not isinstance(renderer, dict):
3667                 continue
3668             video_id = renderer.get('videoId')
3669             if not video_id:
3670                 continue
3671             yield self._extract_video(renderer)
3672
3673     def _rich_entries(self, rich_grid_renderer):
3674         renderer = try_get(
3675             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3676         video_id = renderer.get('videoId')
3677         if not video_id:
3678             return
3679         yield self._extract_video(renderer)
3680
3681     def _video_entry(self, video_renderer):
3682         video_id = video_renderer.get('videoId')
3683         if video_id:
3684             return self._extract_video(video_renderer)
3685
3686     def _post_thread_entries(self, post_thread_renderer):
3687         post_renderer = try_get(
3688             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3689         if not post_renderer:
3690             return
3691         # video attachment
3692         video_renderer = try_get(
3693             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3694         video_id = video_renderer.get('videoId')
3695         if video_id:
3696             entry = self._extract_video(video_renderer)
3697             if entry:
3698                 yield entry
3699         # playlist attachment
3700         playlist_id = try_get(
3701             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3702         if playlist_id:
3703             yield self.url_result(
3704                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3705                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3706         # inline video links
3707         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3708         for run in runs:
3709             if not isinstance(run, dict):
3710                 continue
3711             ep_url = try_get(
3712                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3713             if not ep_url:
3714                 continue
3715             if not YoutubeIE.suitable(ep_url):
3716                 continue
3717             ep_video_id = YoutubeIE._match_id(ep_url)
3718             if video_id == ep_video_id:
3719                 continue
3720             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3721
3722     def _post_thread_continuation_entries(self, post_thread_continuation):
3723         contents = post_thread_continuation.get('contents')
3724         if not isinstance(contents, list):
3725             return
3726         for content in contents:
3727             renderer = content.get('backstagePostThreadRenderer')
3728             if not isinstance(renderer, dict):
3729                 continue
3730             for entry in self._post_thread_entries(renderer):
3731                 yield entry
3732
3733     r''' # unused
3734     def _rich_grid_entries(self, contents):
3735         for content in contents:
3736             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3737             if video_renderer:
3738                 entry = self._video_entry(video_renderer)
3739                 if entry:
3740                     yield entry
3741     '''
3742     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3743
3744         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3745             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3746             for content in contents:
3747                 if not isinstance(content, dict):
3748                     continue
3749                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3750                 if not is_renderer:
3751                     renderer = content.get('richItemRenderer')
3752                     if renderer:
3753                         for entry in self._rich_entries(renderer):
3754                             yield entry
3755                         continuation_list[0] = self._extract_continuation(parent_renderer)
3756                     continue
3757                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3758                 for isr_content in isr_contents:
3759                     if not isinstance(isr_content, dict):
3760                         continue
3761
3762                     known_renderers = {
3763                         'playlistVideoListRenderer': self._playlist_entries,
3764                         'gridRenderer': self._grid_entries,
3765                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3766                         'backstagePostThreadRenderer': self._post_thread_entries,
3767                         'videoRenderer': lambda x: [self._video_entry(x)],
3768                     }
3769                     for key, renderer in isr_content.items():
3770                         if key not in known_renderers:
3771                             continue
3772                         for entry in known_renderers[key](renderer):
3773                             if entry:
3774                                 yield entry
3775                         continuation_list[0] = self._extract_continuation(renderer)
3776                         break
3777
3778                 if not continuation_list[0]:
3779                     continuation_list[0] = self._extract_continuation(is_renderer)
3780
3781             if not continuation_list[0]:
3782                 continuation_list[0] = self._extract_continuation(parent_renderer)
3783
3784         continuation_list = [None]  # Python 2 doesnot support nonlocal
3785         tab_content = try_get(tab, lambda x: x['content'], dict)
3786         if not tab_content:
3787             return
3788         parent_renderer = (
3789             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3790             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3791         for entry in extract_entries(parent_renderer):
3792             yield entry
3793         continuation = continuation_list[0]
3794         visitor_data = None
3795
3796         for page_num in itertools.count(1):
3797             if not continuation:
3798                 break
3799             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3800             response = self._extract_response(
3801                 item_id='%s page %s' % (item_id, page_num),
3802                 query=continuation, headers=headers, ytcfg=ytcfg,
3803                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3804
3805             if not response:
3806                 break
3807             visitor_data = try_get(
3808                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3809
3810             known_continuation_renderers = {
3811                 'playlistVideoListContinuation': self._playlist_entries,
3812                 'gridContinuation': self._grid_entries,
3813                 'itemSectionContinuation': self._post_thread_continuation_entries,
3814                 'sectionListContinuation': extract_entries,  # for feeds
3815             }
3816             continuation_contents = try_get(
3817                 response, lambda x: x['continuationContents'], dict) or {}
3818             continuation_renderer = None
3819             for key, value in continuation_contents.items():
3820                 if key not in known_continuation_renderers:
3821                     continue
3822                 continuation_renderer = value
3823                 continuation_list = [None]
3824                 for entry in known_continuation_renderers[key](continuation_renderer):
3825                     yield entry
3826                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3827                 break
3828             if continuation_renderer:
3829                 continue
3830
3831             known_renderers = {
3832                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3833                 'gridVideoRenderer': (self._grid_entries, 'items'),
3834                 'gridChannelRenderer': (self._grid_entries, 'items'),
3835                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3836                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3837                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3838                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3839             }
3840             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3841             continuation_items = try_get(
3842                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3843             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3844             video_items_renderer = None
3845             for key, value in continuation_item.items():
3846                 if key not in known_renderers:
3847                     continue
3848                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3849                 continuation_list = [None]
3850                 for entry in known_renderers[key][0](video_items_renderer):
3851                     yield entry
3852                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3853                 break
3854             if video_items_renderer:
3855                 continue
3856             break
3857
3858     @staticmethod
3859     def _extract_selected_tab(tabs):
3860         for tab in tabs:
3861             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3862             if renderer.get('selected') is True:
3863                 return renderer
3864         else:
3865             raise ExtractorError('Unable to find selected tab')
3866
3867     @classmethod
3868     def _extract_uploader(cls, data):
3869         uploader = {}
3870         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3871         owner = try_get(
3872             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3873         if owner:
3874             uploader['uploader'] = owner.get('text')
3875             uploader['uploader_id'] = try_get(
3876                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3877             uploader['uploader_url'] = urljoin(
3878                 'https://www.youtube.com/',
3879                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3880         return {k: v for k, v in uploader.items() if v is not None}
3881
3882     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3883         playlist_id = title = description = channel_url = channel_name = channel_id = None
3884         thumbnails_list = tags = []
3885
3886         selected_tab = self._extract_selected_tab(tabs)
3887         renderer = try_get(
3888             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3889         if renderer:
3890             channel_name = renderer.get('title')
3891             channel_url = renderer.get('channelUrl')
3892             channel_id = renderer.get('externalId')
3893         else:
3894             renderer = try_get(
3895                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3896
3897         if renderer:
3898             title = renderer.get('title')
3899             description = renderer.get('description', '')
3900             playlist_id = channel_id
3901             tags = renderer.get('keywords', '').split()
3902             thumbnails_list = (
3903                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3904                 or try_get(
3905                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3906                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3907                     list)
3908                 or [])
3909
3910         thumbnails = []
3911         for t in thumbnails_list:
3912             if not isinstance(t, dict):
3913                 continue
3914             thumbnail_url = url_or_none(t.get('url'))
3915             if not thumbnail_url:
3916                 continue
3917             thumbnails.append({
3918                 'url': thumbnail_url,
3919                 'width': int_or_none(t.get('width')),
3920                 'height': int_or_none(t.get('height')),
3921             })
3922         if playlist_id is None:
3923             playlist_id = item_id
3924         if title is None:
3925             title = (
3926                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3927                 or playlist_id)
3928         title += format_field(selected_tab, 'title', ' - %s')
3929         title += format_field(selected_tab, 'expandedText', ' - %s')
3930         metadata = {
3931             'playlist_id': playlist_id,
3932             'playlist_title': title,
3933             'playlist_description': description,
3934             'uploader': channel_name,
3935             'uploader_id': channel_id,
3936             'uploader_url': channel_url,
3937             'thumbnails': thumbnails,
3938             'tags': tags,
3939         }
3940         availability = self._extract_availability(data)
3941         if availability:
3942             metadata['availability'] = availability
3943         if not channel_id:
3944             metadata.update(self._extract_uploader(data))
3945         metadata.update({
3946             'channel': metadata['uploader'],
3947             'channel_id': metadata['uploader_id'],
3948             'channel_url': metadata['uploader_url']})
3949         ytcfg = self.extract_ytcfg(item_id, webpage)
3950         return self.playlist_result(
3951             self._entries(
3952                 selected_tab, playlist_id,
3953                 self._extract_identity_token(webpage, item_id),
3954                 self._extract_account_syncid(ytcfg, data), ytcfg),
3955             **metadata)
3956
3957     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3958         first_id = last_id = None
3959         ytcfg = self.extract_ytcfg(playlist_id, webpage)
3960         headers = self.generate_api_headers(
3961             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3962             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
3963         for page_num in itertools.count(1):
3964             videos = list(self._playlist_entries(playlist))
3965             if not videos:
3966                 return
3967             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3968             if start >= len(videos):
3969                 return
3970             for video in videos[start:]:
3971                 if video['id'] == first_id:
3972                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3973                     return
3974                 yield video
3975             first_id = first_id or videos[0]['id']
3976             last_id = videos[-1]['id']
3977             watch_endpoint = try_get(
3978                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3979             query = {
3980                 'playlistId': playlist_id,
3981                 'videoId': watch_endpoint.get('videoId') or last_id,
3982                 'index': watch_endpoint.get('index') or len(videos),
3983                 'params': watch_endpoint.get('params') or 'OAE%3D'
3984             }
3985             response = self._extract_response(
3986                 item_id='%s page %d' % (playlist_id, page_num),
3987                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3988                 check_get_keys='contents'
3989             )
3990             playlist = try_get(
3991                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3992
3993     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3994         title = playlist.get('title') or try_get(
3995             data, lambda x: x['titleText']['simpleText'], compat_str)
3996         playlist_id = playlist.get('playlistId') or item_id
3997
3998         # Delegating everything except mix playlists to regular tab-based playlist URL
3999         playlist_url = urljoin(url, try_get(
4000             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4001             compat_str))
4002         if playlist_url and playlist_url != url:
4003             return self.url_result(
4004                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4005                 video_title=title)
4006
4007         return self.playlist_result(
4008             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4009             playlist_id=playlist_id, playlist_title=title)
4010
4011     def _extract_availability(self, data):
4012         """
4013         Gets the availability of a given playlist/tab.
4014         Note: Unless YouTube tells us explicitly, we do not assume it is public
4015         @param data: response
4016         """
4017         is_private = is_unlisted = None
4018         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4019         badge_labels = self._extract_badges(renderer)
4020
4021         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4022         privacy_dropdown_entries = try_get(
4023             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4024         for renderer_dict in privacy_dropdown_entries:
4025             is_selected = try_get(
4026                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4027             if not is_selected:
4028                 continue
4029             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4030             if label:
4031                 badge_labels.add(label.lower())
4032                 break
4033
4034         for badge_label in badge_labels:
4035             if badge_label == 'unlisted':
4036                 is_unlisted = True
4037             elif badge_label == 'private':
4038                 is_private = True
4039             elif badge_label == 'public':
4040                 is_unlisted = is_private = False
4041         return self._availability(is_private, False, False, False, is_unlisted)
4042
4043     @staticmethod
4044     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4045         sidebar_renderer = try_get(
4046             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4047         for item in sidebar_renderer:
4048             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4049             if renderer:
4050                 return renderer
4051
4052     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4053         """
4054         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4055         """
4056         browse_id = params = None
4057         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4058         if not renderer:
4059             return
4060         menu_renderer = try_get(
4061             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4062         for menu_item in menu_renderer:
4063             if not isinstance(menu_item, dict):
4064                 continue
4065             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4066             text = try_get(
4067                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4068             if not text or text.lower() != 'show unavailable videos':
4069                 continue
4070             browse_endpoint = try_get(
4071                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4072             browse_id = browse_endpoint.get('browseId')
4073             params = browse_endpoint.get('params')
4074             break
4075
4076         ytcfg = self.extract_ytcfg(item_id, webpage)
4077         headers = self.generate_api_headers(
4078             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4079             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4080             visitor_data=try_get(
4081                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4082         query = {
4083             'params': params or 'wgYCCAA=',
4084             'browseId': browse_id or 'VL%s' % item_id
4085         }
4086         return self._extract_response(
4087             item_id=item_id, headers=headers, query=query,
4088             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4089             note='Downloading API JSON with unavailable videos')
4090
4091     def _extract_webpage(self, url, item_id):
4092         retries = self.get_param('extractor_retries', 3)
4093         count = -1
4094         last_error = 'Incomplete yt initial data recieved'
4095         while count < retries:
4096             count += 1
4097             # Sometimes youtube returns a webpage with incomplete ytInitialData
4098             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4099             if count:
4100                 self.report_warning('%s. Retrying ...' % last_error)
4101             webpage = self._download_webpage(
4102                 url, item_id,
4103                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4104             data = self.extract_yt_initial_data(item_id, webpage)
4105             if data.get('contents') or data.get('currentVideoEndpoint'):
4106                 break
4107             # Extract alerts here only when there is error
4108             self._extract_and_report_alerts(data)
4109             if count >= retries:
4110                 raise ExtractorError(last_error)
4111         return webpage, data
4112
4113     @staticmethod
4114     def _smuggle_data(entries, data):
4115         for entry in entries:
4116             if data:
4117                 entry['url'] = smuggle_url(entry['url'], data)
4118             yield entry
4119
4120     def _real_extract(self, url):
4121         url, smuggled_data = unsmuggle_url(url, {})
4122         if self.is_music_url(url):
4123             smuggled_data['is_music_url'] = True
4124         info_dict = self.__real_extract(url, smuggled_data)
4125         if info_dict.get('entries'):
4126             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4127         return info_dict
4128
4129     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4130
4131     def __real_extract(self, url, smuggled_data):
4132         item_id = self._match_id(url)
4133         url = compat_urlparse.urlunparse(
4134             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4135         compat_opts = self.get_param('compat_opts', [])
4136
4137         def get_mobj(url):
4138             mobj = self._url_re.match(url).groupdict()
4139             mobj.update((k, '') for k, v in mobj.items() if v is None)
4140             return mobj
4141
4142         mobj = get_mobj(url)
4143         # Youtube returns incomplete data if tabname is not lower case
4144         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4145
4146         if is_channel:
4147             if smuggled_data.get('is_music_url'):
4148                 if item_id[:2] == 'VL':
4149                     # Youtube music VL channels have an equivalent playlist
4150                     item_id = item_id[2:]
4151                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4152                 elif item_id[:2] == 'MP':
4153                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4154                     item_id = self._search_regex(
4155                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4156                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4157                         'playlist id')
4158                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4159                 elif mobj['channel_type'] == 'browse':
4160                     # Youtube music /browse/ should be changed to /channel/
4161                     pre = 'https://www.youtube.com/channel/%s' % item_id
4162         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4163             # Home URLs should redirect to /videos/
4164             self.report_warning(
4165                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4166                 'To download only the videos in the home page, add a "/featured" to the URL')
4167             tab = '/videos'
4168
4169         url = ''.join((pre, tab, post))
4170         mobj = get_mobj(url)
4171
4172         # Handle both video/playlist URLs
4173         qs = parse_qs(url)
4174         video_id = qs.get('v', [None])[0]
4175         playlist_id = qs.get('list', [None])[0]
4176
4177         if not video_id and mobj['not_channel'].startswith('watch'):
4178             if not playlist_id:
4179                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4180                 raise ExtractorError('Unable to recognize tab page')
4181             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4182             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4183             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4184             mobj = get_mobj(url)
4185
4186         if video_id and playlist_id:
4187             if self.get_param('noplaylist'):
4188                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4189                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4190             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4191
4192         webpage, data = self._extract_webpage(url, item_id)
4193
4194         tabs = try_get(
4195             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4196         if tabs:
4197             selected_tab = self._extract_selected_tab(tabs)
4198             tab_name = selected_tab.get('title', '')
4199             if 'no-youtube-channel-redirect' not in compat_opts:
4200                 if mobj['tab'] == '/live':
4201                     # Live tab should have redirected to the video
4202                     raise ExtractorError('The channel is not currently live', expected=True)
4203                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4204                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4205                         # Topic channels don't have /videos. Use the equivalent playlist instead
4206                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4207                         pl_id = 'UU%s' % item_id[2:]
4208                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4209                         try:
4210                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4211                             for alert_type, alert_message in self._extract_alerts(pl_data):
4212                                 if alert_type == 'error':
4213                                     raise ExtractorError('Youtube said: %s' % alert_message)
4214                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4215                         except ExtractorError:
4216                             self.report_warning('The playlist gave error. Falling back to channel URL')
4217                     else:
4218                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4219
4220         self.write_debug('Final URL: %s' % url)
4221
4222         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4223         if 'no-youtube-unavailable-videos' not in compat_opts:
4224             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4225         self._extract_and_report_alerts(data)
4226         tabs = try_get(
4227             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4228         if tabs:
4229             return self._extract_from_tabs(item_id, webpage, data, tabs)
4230
4231         playlist = try_get(
4232             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4233         if playlist:
4234             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4235
4236         video_id = try_get(
4237             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4238             compat_str) or video_id
4239         if video_id:
4240             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4241                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4242             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4243
4244         raise ExtractorError('Unable to recognize tab page')
4245
4246
4247 class YoutubePlaylistIE(InfoExtractor):
4248     IE_DESC = 'YouTube.com playlists'
4249     _VALID_URL = r'''(?x)(?:
4250                         (?:https?://)?
4251                         (?:\w+\.)?
4252                         (?:
4253                             (?:
4254                                 youtube(?:kids)?\.com|
4255                                 invidio\.us
4256                             )
4257                             /.*?\?.*?\blist=
4258                         )?
4259                         (?P<id>%(playlist_id)s)
4260                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4261     IE_NAME = 'youtube:playlist'
4262     _TESTS = [{
4263         'note': 'issue #673',
4264         'url': 'PLBB231211A4F62143',
4265         'info_dict': {
4266             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4267             'id': 'PLBB231211A4F62143',
4268             'uploader': 'Wickydoo',
4269             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4270             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4271         },
4272         'playlist_mincount': 29,
4273     }, {
4274         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4275         'info_dict': {
4276             'title': 'YDL_safe_search',
4277             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4278         },
4279         'playlist_count': 2,
4280         'skip': 'This playlist is private',
4281     }, {
4282         'note': 'embedded',
4283         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4284         'playlist_count': 4,
4285         'info_dict': {
4286             'title': 'JODA15',
4287             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4288             'uploader': 'milan',
4289             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4290         }
4291     }, {
4292         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4293         'playlist_mincount': 654,
4294         'info_dict': {
4295             'title': '2018 Chinese New Singles (11/6 updated)',
4296             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4297             'uploader': 'LBK',
4298             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4299             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4300         }
4301     }, {
4302         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4303         'only_matching': True,
4304     }, {
4305         # music album playlist
4306         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4307         'only_matching': True,
4308     }]
4309
4310     @classmethod
4311     def suitable(cls, url):
4312         if YoutubeTabIE.suitable(url):
4313             return False
4314         # Hack for lazy extractors until more generic solution is implemented
4315         # (see #28780)
4316         from .youtube import parse_qs
4317         qs = parse_qs(url)
4318         if qs.get('v', [None])[0]:
4319             return False
4320         return super(YoutubePlaylistIE, cls).suitable(url)
4321
4322     def _real_extract(self, url):
4323         playlist_id = self._match_id(url)
4324         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4325         url = update_url_query(
4326             'https://www.youtube.com/playlist',
4327             parse_qs(url) or {'list': playlist_id})
4328         if is_music_url:
4329             url = smuggle_url(url, {'is_music_url': True})
4330         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4331
4332
4333 class YoutubeYtBeIE(InfoExtractor):
4334     IE_DESC = 'youtu.be'
4335     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4336     _TESTS = [{
4337         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4338         'info_dict': {
4339             'id': 'yeWKywCrFtk',
4340             'ext': 'mp4',
4341             'title': 'Small Scale Baler and Braiding Rugs',
4342             'uploader': 'Backus-Page House Museum',
4343             'uploader_id': 'backuspagemuseum',
4344             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4345             'upload_date': '20161008',
4346             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4347             'categories': ['Nonprofits & Activism'],
4348             'tags': list,
4349             'like_count': int,
4350             'dislike_count': int,
4351         },
4352         'params': {
4353             'noplaylist': True,
4354             'skip_download': True,
4355         },
4356     }, {
4357         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4358         'only_matching': True,
4359     }]
4360
4361     def _real_extract(self, url):
4362         mobj = re.match(self._VALID_URL, url)
4363         video_id = mobj.group('id')
4364         playlist_id = mobj.group('playlist_id')
4365         return self.url_result(
4366             update_url_query('https://www.youtube.com/watch', {
4367                 'v': video_id,
4368                 'list': playlist_id,
4369                 'feature': 'youtu.be',
4370             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4371
4372
4373 class YoutubeYtUserIE(InfoExtractor):
4374     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4375     _VALID_URL = r'ytuser:(?P<id>.+)'
4376     _TESTS = [{
4377         'url': 'ytuser:phihag',
4378         'only_matching': True,
4379     }]
4380
4381     def _real_extract(self, url):
4382         user_id = self._match_id(url)
4383         return self.url_result(
4384             'https://www.youtube.com/user/%s' % user_id,
4385             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4386
4387
4388 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4389     IE_NAME = 'youtube:favorites'
4390     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4391     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4392     _LOGIN_REQUIRED = True
4393     _TESTS = [{
4394         'url': ':ytfav',
4395         'only_matching': True,
4396     }, {
4397         'url': ':ytfavorites',
4398         'only_matching': True,
4399     }]
4400
4401     def _real_extract(self, url):
4402         return self.url_result(
4403             'https://www.youtube.com/playlist?list=LL',
4404             ie=YoutubeTabIE.ie_key())
4405
4406
4407 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4408     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4409     # there doesn't appear to be a real limit, for example if you search for
4410     # 'python' you get more than 8.000.000 results
4411     _MAX_RESULTS = float('inf')
4412     IE_NAME = 'youtube:search'
4413     _SEARCH_KEY = 'ytsearch'
4414     _SEARCH_PARAMS = None
4415     _TESTS = []
4416
4417     def _entries(self, query, n):
4418         data = {'query': query}
4419         if self._SEARCH_PARAMS:
4420             data['params'] = self._SEARCH_PARAMS
4421         total = 0
4422         continuation = {}
4423         for page_num in itertools.count(1):
4424             data.update(continuation)
4425             search = self._extract_response(
4426                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4427                 check_get_keys=('contents', 'onResponseReceivedCommands')
4428             )
4429             if not search:
4430                 break
4431             slr_contents = try_get(
4432                 search,
4433                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4434                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4435                 list)
4436             if not slr_contents:
4437                 break
4438
4439             # Youtube sometimes adds promoted content to searches,
4440             # changing the index location of videos and token.
4441             # So we search through all entries till we find them.
4442             continuation = None
4443             for slr_content in slr_contents:
4444                 if not continuation:
4445                     continuation = self._extract_continuation({'contents': [slr_content]})
4446
4447                 isr_contents = try_get(
4448                     slr_content,
4449                     lambda x: x['itemSectionRenderer']['contents'],
4450                     list)
4451                 if not isr_contents:
4452                     continue
4453                 for content in isr_contents:
4454                     if not isinstance(content, dict):
4455                         continue
4456                     video = content.get('videoRenderer')
4457                     if not isinstance(video, dict):
4458                         continue
4459                     video_id = video.get('videoId')
4460                     if not video_id:
4461                         continue
4462
4463                     yield self._extract_video(video)
4464                     total += 1
4465                     if total == n:
4466                         return
4467
4468             if not continuation:
4469                 break
4470
4471     def _get_n_results(self, query, n):
4472         """Get a specified number of results for a query"""
4473         return self.playlist_result(self._entries(query, n), query, query)
4474
4475
4476 class YoutubeSearchDateIE(YoutubeSearchIE):
4477     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4478     _SEARCH_KEY = 'ytsearchdate'
4479     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4480     _SEARCH_PARAMS = 'CAI%3D'
4481
4482
4483 class YoutubeSearchURLIE(YoutubeSearchIE):
4484     IE_DESC = 'YouTube.com search URLs'
4485     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4486     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4487     # _MAX_RESULTS = 100
4488     _TESTS = [{
4489         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4490         'playlist_mincount': 5,
4491         'info_dict': {
4492             'id': 'youtube-dl test video',
4493             'title': 'youtube-dl test video',
4494         }
4495     }, {
4496         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4497         'only_matching': True,
4498     }]
4499
4500     @classmethod
4501     def _make_valid_url(cls):
4502         return cls._VALID_URL
4503
4504     def _real_extract(self, url):
4505         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4506         query = (qs.get('search_query') or qs.get('q'))[0]
4507         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4508         return self._get_n_results(query, self._MAX_RESULTS)
4509
4510
4511 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4512     """
4513     Base class for feed extractors
4514     Subclasses must define the _FEED_NAME property.
4515     """
4516     _LOGIN_REQUIRED = True
4517     _TESTS = []
4518
4519     @property
4520     def IE_NAME(self):
4521         return 'youtube:%s' % self._FEED_NAME
4522
4523     def _real_extract(self, url):
4524         return self.url_result(
4525             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4526             ie=YoutubeTabIE.ie_key())
4527
4528
4529 class YoutubeWatchLaterIE(InfoExtractor):
4530     IE_NAME = 'youtube:watchlater'
4531     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4532     _VALID_URL = r':ytwatchlater'
4533     _TESTS = [{
4534         'url': ':ytwatchlater',
4535         'only_matching': True,
4536     }]
4537
4538     def _real_extract(self, url):
4539         return self.url_result(
4540             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4541
4542
4543 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4544     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4545     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4546     _FEED_NAME = 'recommended'
4547     _LOGIN_REQUIRED = False
4548     _TESTS = [{
4549         'url': ':ytrec',
4550         'only_matching': True,
4551     }, {
4552         'url': ':ytrecommended',
4553         'only_matching': True,
4554     }, {
4555         'url': 'https://youtube.com',
4556         'only_matching': True,
4557     }]
4558
4559
4560 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4561     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4562     _VALID_URL = r':ytsub(?:scription)?s?'
4563     _FEED_NAME = 'subscriptions'
4564     _TESTS = [{
4565         'url': ':ytsubs',
4566         'only_matching': True,
4567     }, {
4568         'url': ':ytsubscriptions',
4569         'only_matching': True,
4570     }]
4571
4572
4573 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4574     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4575     _VALID_URL = r':ythis(?:tory)?'
4576     _FEED_NAME = 'history'
4577     _TESTS = [{
4578         'url': ':ythistory',
4579         'only_matching': True,
4580     }]
4581
4582
4583 class YoutubeTruncatedURLIE(InfoExtractor):
4584     IE_NAME = 'youtube:truncated_url'
4585     IE_DESC = False  # Do not list
4586     _VALID_URL = r'''(?x)
4587         (?:https?://)?
4588         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4589         (?:watch\?(?:
4590             feature=[a-z_]+|
4591             annotation_id=annotation_[^&]+|
4592             x-yt-cl=[0-9]+|
4593             hl=[^&]*|
4594             t=[0-9]+
4595         )?
4596         |
4597             attribution_link\?a=[^&]+
4598         )
4599         $
4600     '''
4601
4602     _TESTS = [{
4603         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4604         'only_matching': True,
4605     }, {
4606         'url': 'https://www.youtube.com/watch?',
4607         'only_matching': True,
4608     }, {
4609         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4610         'only_matching': True,
4611     }, {
4612         'url': 'https://www.youtube.com/watch?feature=foo',
4613         'only_matching': True,
4614     }, {
4615         'url': 'https://www.youtube.com/watch?hl=en-GB',
4616         'only_matching': True,
4617     }, {
4618         'url': 'https://www.youtube.com/watch?t=2372',
4619         'only_matching': True,
4620     }]
4621
4622     def _real_extract(self, url):
4623         raise ExtractorError(
4624             'Did you forget to quote the URL? Remember that & is a meta '
4625             'character in most shells, so you want to put the URL in quotes, '
4626             'like  youtube-dl '
4627             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4628             ' or simply  youtube-dl BaW_jenozKc  .',
4629             expected=True)
4630
4631
4632 class YoutubeTruncatedIDIE(InfoExtractor):
4633     IE_NAME = 'youtube:truncated_id'
4634     IE_DESC = False  # Do not list
4635     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4636
4637     _TESTS = [{
4638         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4639         'only_matching': True,
4640     }]
4641
4642     def _real_extract(self, url):
4643         video_id = self._match_id(url)
4644         raise ExtractorError(
4645             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4646             expected=True)