yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     mimetype2ext,
  42     network_exceptions,
  43     orderedSet,
  44     parse_codecs,
  45     parse_count,
  46     parse_duration,
  47     parse_iso8601,
  48     qualities,
  49     remove_start,
  50     smuggle_url,
  51     str_or_none,
  52     str_to_int,
  53     traverse_obj,
  54     try_get,
  55     unescapeHTML,
  56     unified_strdate,
  57     unsmuggle_url,
  58     update_url_query,
  59     url_or_none,
  60     urlencode_postdata,
  61     urljoin,
  62     variadic,
  63 )
  64
  65
  66 def parse_qs(url):
  67     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  68
  69
  70 class YoutubeBaseInfoExtractor(InfoExtractor):
  71     """Provide base functions for Youtube extractors"""
  72     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  73     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  74
  75     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  76     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  77     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  78
  79     _RESERVED_NAMES = (
  80         r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
  81         r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
  82         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
  83
  84     _NETRC_MACHINE = 'youtube'
  85     # If True it will raise an error if no login info is provided
  86     _LOGIN_REQUIRED = False
  87
  88     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  89
  90     def _login(self):
  91         """
  92         Attempt to log in to YouTube.
  93         True is returned if successful or skipped.
  94         False is returned if login failed.
  95
  96         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  97         """
  98
  99         def warn(message):
 100             self.report_warning(message)
 101
 102         # username+password login is broken
 103         if (self._LOGIN_REQUIRED
 104                 and self.get_param('cookiefile') is None
 105                 and self.get_param('cookiesfrombrowser') is None):
 106             self.raise_login_required(
 107                 'Login details are needed to download this content', method='cookies')
 108         username, password = self._get_login_info()
 109         if username:
 110             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 111         return
 112
 113         # Everything below this is broken!
 114         r'''
 115         # No authentication to be performed
 116         if username is None:
 117             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 118                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 119             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 120             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 121             return True
 122
 123         login_page = self._download_webpage(
 124             self._LOGIN_URL, None,
 125             note='Downloading login page',
 126             errnote='unable to fetch login page', fatal=False)
 127         if login_page is False:
 128             return
 129
 130         login_form = self._hidden_inputs(login_page)
 131
 132         def req(url, f_req, note, errnote):
 133             data = login_form.copy()
 134             data.update({
 135                 'pstMsg': 1,
 136                 'checkConnection': 'youtube',
 137                 'checkedDomains': 'youtube',
 138                 'hl': 'en',
 139                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 140                 'f.req': json.dumps(f_req),
 141                 'flowName': 'GlifWebSignIn',
 142                 'flowEntry': 'ServiceLogin',
 143                 # TODO: reverse actual botguard identifier generation algo
 144                 'bgRequest': '["identifier",""]',
 145             })
 146             return self._download_json(
 147                 url, None, note=note, errnote=errnote,
 148                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 149                 fatal=False,
 150                 data=urlencode_postdata(data), headers={
 151                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 152                     'Google-Accounts-XSRF': 1,
 153                 })
 154
 155         lookup_req = [
 156             username,
 157             None, [], None, 'US', None, None, 2, False, True,
 158             [
 159                 None, None,
 160                 [2, 1, None, 1,
 161                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 162                  None, [], 4],
 163                 1, [None, None, []], None, None, None, True
 164             ],
 165             username,
 166         ]
 167
 168         lookup_results = req(
 169             self._LOOKUP_URL, lookup_req,
 170             'Looking up account info', 'Unable to look up account info')
 171
 172         if lookup_results is False:
 173             return False
 174
 175         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 176         if not user_hash:
 177             warn('Unable to extract user hash')
 178             return False
 179
 180         challenge_req = [
 181             user_hash,
 182             None, 1, None, [1, None, None, None, [password, None, True]],
 183             [
 184                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 185                 1, [None, None, []], None, None, None, True
 186             ]]
 187
 188         challenge_results = req(
 189             self._CHALLENGE_URL, challenge_req,
 190             'Logging in', 'Unable to log in')
 191
 192         if challenge_results is False:
 193             return
 194
 195         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 196         if login_res:
 197             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 198             warn(
 199                 'Unable to login: %s' % 'Invalid password'
 200                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 201             return False
 202
 203         res = try_get(challenge_results, lambda x: x[0][-1], list)
 204         if not res:
 205             warn('Unable to extract result entry')
 206             return False
 207
 208         login_challenge = try_get(res, lambda x: x[0][0], list)
 209         if login_challenge:
 210             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 211             if challenge_str == 'TWO_STEP_VERIFICATION':
 212                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 213                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 214                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 215                 if status == 'QUOTA_EXCEEDED':
 216                     warn('Exceeded the limit of TFA codes, try later')
 217                     return False
 218
 219                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 220                 if not tl:
 221                     warn('Unable to extract TL')
 222                     return False
 223
 224                 tfa_code = self._get_tfa_info('2-step verification code')
 225
 226                 if not tfa_code:
 227                     warn(
 228                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 229                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 230                     return False
 231
 232                 tfa_code = remove_start(tfa_code, 'G-')
 233
 234                 tfa_req = [
 235                     user_hash, None, 2, None,
 236                     [
 237                         9, None, None, None, None, None, None, None,
 238                         [None, tfa_code, True, 2]
 239                     ]]
 240
 241                 tfa_results = req(
 242                     self._TFA_URL.format(tl), tfa_req,
 243                     'Submitting TFA code', 'Unable to submit TFA code')
 244
 245                 if tfa_results is False:
 246                     return False
 247
 248                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 249                 if tfa_res:
 250                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 251                     warn(
 252                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 253                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 254                     return False
 255
 256                 check_cookie_url = try_get(
 257                     tfa_results, lambda x: x[0][-1][2], compat_str)
 258             else:
 259                 CHALLENGES = {
 260                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 261                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 262                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 263                 }
 264                 challenge = CHALLENGES.get(
 265                     challenge_str,
 266                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 267                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 268                 return False
 269         else:
 270             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 271
 272         if not check_cookie_url:
 273             warn('Unable to extract CheckCookie URL')
 274             return False
 275
 276         check_cookie_results = self._download_webpage(
 277             check_cookie_url, None, 'Checking cookie', fatal=False)
 278
 279         if check_cookie_results is False:
 280             return False
 281
 282         if 'https://myaccount.google.com/' not in check_cookie_results:
 283             warn('Unable to log in')
 284             return False
 285
 286         return True
 287         '''
 288
 289     def _initialize_consent(self):
 290         cookies = self._get_cookies('https://www.youtube.com/')
 291         if cookies.get('__Secure-3PSID'):
 292             return
 293         consent_id = None
 294         consent = cookies.get('CONSENT')
 295         if consent:
 296             if 'YES' in consent.value:
 297                 return
 298             consent_id = self._search_regex(
 299                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 300         if not consent_id:
 301             consent_id = random.randint(100, 999)
 302         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 303
 304     def _real_initialize(self):
 305         self._initialize_consent()
 306         if self._downloader is None:
 307             return
 308         if not self._login():
 309             return
 310
 311     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 312     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 313     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 314
 315     _YT_DEFAULT_YTCFGS = {
 316         'WEB': {
 317             'INNERTUBE_API_VERSION': 'v1',
 318             'INNERTUBE_CLIENT_NAME': 'WEB',
 319             'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
 320             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 321             'INNERTUBE_CONTEXT': {
 322                 'client': {
 323                     'clientName': 'WEB',
 324                     'clientVersion': '2.20210622.10.00',
 325                     'hl': 'en',
 326                 }
 327             },
 328             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
 329         },
 330         'WEB_REMIX': {
 331             'INNERTUBE_API_VERSION': 'v1',
 332             'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
 333             'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
 334             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 335             'INNERTUBE_CONTEXT': {
 336                 'client': {
 337                     'clientName': 'WEB_REMIX',
 338                     'clientVersion': '1.20210621.00.00',
 339                     'hl': 'en',
 340                 }
 341             },
 342             'INNERTUBE_CONTEXT_CLIENT_NAME': 67
 343         },
 344         'WEB_EMBEDDED_PLAYER': {
 345             'INNERTUBE_API_VERSION': 'v1',
 346             'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
 347             'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
 348             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 349             'INNERTUBE_CONTEXT': {
 350                 'client': {
 351                     'clientName': 'WEB_EMBEDDED_PLAYER',
 352                     'clientVersion': '1.20210620.0.1',
 353                     'hl': 'en',
 354                 }
 355             },
 356             'INNERTUBE_CONTEXT_CLIENT_NAME': 56
 357         },
 358         'ANDROID': {
 359             'INNERTUBE_API_VERSION': 'v1',
 360             'INNERTUBE_CLIENT_NAME': 'ANDROID',
 361             'INNERTUBE_CLIENT_VERSION': '16.20',
 362             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 363             'INNERTUBE_CONTEXT': {
 364                 'client': {
 365                     'clientName': 'ANDROID',
 366                     'clientVersion': '16.20',
 367                     'hl': 'en',
 368                 }
 369             },
 370             'INNERTUBE_CONTEXT_CLIENT_NAME': 3
 371         },
 372         'ANDROID_EMBEDDED_PLAYER': {
 373             'INNERTUBE_API_VERSION': 'v1',
 374             'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
 375             'INNERTUBE_CLIENT_VERSION': '16.20',
 376             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 377             'INNERTUBE_CONTEXT': {
 378                 'client': {
 379                     'clientName': 'ANDROID_EMBEDDED_PLAYER',
 380                     'clientVersion': '16.20',
 381                     'hl': 'en',
 382                 }
 383             },
 384             'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 385         },
 386         'ANDROID_MUSIC': {
 387             'INNERTUBE_API_VERSION': 'v1',
 388             'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
 389             'INNERTUBE_CLIENT_VERSION': '4.32',
 390             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 391             'INNERTUBE_CONTEXT': {
 392                 'client': {
 393                     'clientName': 'ANDROID_MUSIC',
 394                     'clientVersion': '4.32',
 395                     'hl': 'en',
 396                 }
 397             },
 398             'INNERTUBE_CONTEXT_CLIENT_NAME': 21
 399         },
 400         'IOS': {
 401             'INNERTUBE_API_VERSION': 'v1',
 402             'INNERTUBE_CLIENT_NAME': 'IOS',
 403             'INNERTUBE_CLIENT_VERSION': '16.20',
 404             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 405             'INNERTUBE_CONTEXT': {
 406                 'client': {
 407                     'clientName': 'IOS',
 408                     'clientVersion': '16.20',
 409                     'hl': 'en',
 410                 }
 411             },
 412             'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 413
 414         },
 415         'IOS_MUSIC': {
 416             'INNERTUBE_API_VERSION': 'v1',
 417             'INNERTUBE_CLIENT_NAME': 'IOS_MUSIC',
 418             'INNERTUBE_CLIENT_VERSION': '4.32',
 419             'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 420             'INNERTUBE_CONTEXT': {
 421                 'client': {
 422                     'clientName': 'IOS_MUSIC',
 423                     'clientVersion': '4.32',
 424                     'hl': 'en',
 425                 }
 426             },
 427             'INNERTUBE_CONTEXT_CLIENT_NAME': 26
 428         },
 429         'IOS_MESSAGES_EXTENSION': {
 430             'INNERTUBE_API_VERSION': 'v1',
 431             'INNERTUBE_CLIENT_NAME': 'IOS_MESSAGES_EXTENSION',
 432             'INNERTUBE_CLIENT_VERSION': '16.20',
 433             'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 434             'INNERTUBE_CONTEXT': {
 435                 'client': {
 436                     'clientName': 'IOS_MESSAGES_EXTENSION',
 437                     'clientVersion': '16.20',
 438                     'hl': 'en',
 439                 }
 440             },
 441             'INNERTUBE_CONTEXT_CLIENT_NAME': 66
 442         }
 443     }
 444
 445     _YT_DEFAULT_INNERTUBE_HOSTS = {
 446         'DIRECT': 'youtubei.googleapis.com',
 447         'WEB': 'www.youtube.com',
 448         'WEB_REMIX': 'music.youtube.com',
 449         'ANDROID_MUSIC': 'music.youtube.com'
 450     }
 451
 452     # clients starting with _ cannot be explicity requested by the user
 453     _YT_CLIENTS = {
 454         'web': 'WEB',
 455         'web_music': 'WEB_REMIX',
 456         '_web_embedded': 'WEB_EMBEDDED_PLAYER',
 457         '_web_agegate': 'TVHTML5',
 458         'android': 'ANDROID',
 459         'android_music': 'ANDROID_MUSIC',
 460         '_android_embedded': 'ANDROID_EMBEDDED_PLAYER',
 461         '_android_agegate': 'ANDROID',
 462         'ios': 'IOS',
 463         'ios_music': 'IOS_MUSIC',
 464         '_ios_embedded': 'IOS_MESSAGES_EXTENSION',
 465         '_ios_agegate': 'IOS'
 466     }
 467
 468     def _get_default_ytcfg(self, client='WEB'):
 469         if client in self._YT_DEFAULT_YTCFGS:
 470             return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
 471         self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
 472         return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
 473
 474     def _get_innertube_host(self, client='WEB'):
 475         return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
 476
 477     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
 478         # try_get but with fallback to default ytcfg client values when present
 479         _func = lambda y: try_get(y, getter, expected_type)
 480         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 481
 482     def _extract_client_name(self, ytcfg, default_client='WEB'):
 483         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
 484
 485     @staticmethod
 486     def _extract_session_index(*data):
 487         for ytcfg in data:
 488             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 489             if session_index is not None:
 490                 return session_index
 491
 492     def _extract_client_version(self, ytcfg, default_client='WEB'):
 493         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
 494
 495     def _extract_api_key(self, ytcfg=None, default_client='WEB'):
 496         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 497
 498     def _extract_context(self, ytcfg=None, default_client='WEB'):
 499         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 500         context = _get_context(ytcfg)
 501         if context:
 502             return context
 503
 504         context = _get_context(self._get_default_ytcfg(default_client))
 505         if not ytcfg:
 506             return context
 507
 508         # Recreate the client context (required)
 509         context['client'].update({
 510             'clientVersion': self._extract_client_version(ytcfg, default_client),
 511             'clientName': self._extract_client_name(ytcfg, default_client),
 512         })
 513         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 514         if visitor_data:
 515             context['client']['visitorData'] = visitor_data
 516         return context
 517
 518     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 519         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 520         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 521         yt_cookies = self._get_cookies('https://www.youtube.com')
 522         sapisid_cookie = dict_get(
 523             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 524         if sapisid_cookie is None or not sapisid_cookie.value:
 525             return
 526         time_now = round(time.time())
 527         # SAPISID cookie is required if not already present
 528         if not yt_cookies.get('SAPISID'):
 529             self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
 530             self._set_cookie(
 531                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 532         self.write_debug('Extracted SAPISID cookie', only_once=True)
 533         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 534         sapisidhash = hashlib.sha1(
 535             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 536         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 537
 538     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 539                   note='Downloading API JSON', errnote='Unable to download API page',
 540                   context=None, api_key=None, api_hostname=None, default_client='WEB'):
 541
 542         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 543         data.update(query)
 544         real_headers = self.generate_api_headers(default_client=default_client)
 545         real_headers.update({'content-type': 'application/json'})
 546         if headers:
 547             real_headers.update(headers)
 548         return self._download_json(
 549             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 550             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 551             data=json.dumps(data).encode('utf8'), headers=real_headers,
 552             query={'key': api_key or self._extract_api_key()})
 553
 554     def extract_yt_initial_data(self, video_id, webpage):
 555         return self._parse_json(
 556             self._search_regex(
 557                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 558                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 559             video_id)
 560
 561     def _extract_identity_token(self, webpage, item_id):
 562         if not webpage:
 563             return None
 564         ytcfg = self.extract_ytcfg(item_id, webpage)
 565         if ytcfg:
 566             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 567             if token:
 568                 return token
 569         return self._search_regex(
 570             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 571             'identity token', default=None)
 572
 573     @staticmethod
 574     def _extract_account_syncid(*args):
 575         """
 576         Extract syncId required to download private playlists of secondary channels
 577         @params response and/or ytcfg
 578         """
 579         for data in args:
 580             # ytcfg includes channel_syncid if on secondary channel
 581             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 582             if delegated_sid:
 583                 return delegated_sid
 584             sync_ids = (try_get(
 585                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 586                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 587             if len(sync_ids) >= 2 and sync_ids[1]:
 588                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 589                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 590                 return sync_ids[0]
 591
 592     def extract_ytcfg(self, video_id, webpage):
 593         if not webpage:
 594             return {}
 595         return self._parse_json(
 596             self._search_regex(
 597                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 598                 default='{}'), video_id, fatal=False) or {}
 599
 600     def generate_api_headers(
 601             self, ytcfg=None, identity_token=None, account_syncid=None,
 602             visitor_data=None, api_hostname=None, default_client='WEB', session_index=None):
 603         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 604         headers = {
 605             'X-YouTube-Client-Name': compat_str(
 606                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 607             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 608             'Origin': origin
 609         }
 610         if not visitor_data and ytcfg:
 611             visitor_data = try_get(
 612                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 613         if identity_token:
 614             headers['X-Youtube-Identity-Token'] = identity_token
 615         if account_syncid:
 616             headers['X-Goog-PageId'] = account_syncid
 617         if session_index is None and ytcfg:
 618             session_index = self._extract_session_index(ytcfg)
 619         if account_syncid or session_index is not None:
 620             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 621         if visitor_data:
 622             headers['X-Goog-Visitor-Id'] = visitor_data
 623         auth = self._generate_sapisidhash_header(origin)
 624         if auth is not None:
 625             headers['Authorization'] = auth
 626             headers['X-Origin'] = origin
 627         return headers
 628
 629     @staticmethod
 630     def _build_api_continuation_query(continuation, ctp=None):
 631         query = {
 632             'continuation': continuation
 633         }
 634         # TODO: Inconsistency with clickTrackingParams.
 635         # Currently we have a fixed ctp contained within context (from ytcfg)
 636         # and a ctp in root query for continuation.
 637         if ctp:
 638             query['clickTracking'] = {'clickTrackingParams': ctp}
 639         return query
 640
 641     @classmethod
 642     def _extract_next_continuation_data(cls, renderer):
 643         next_continuation = try_get(
 644             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 645                        lambda x: x['continuation']['reloadContinuationData']), dict)
 646         if not next_continuation:
 647             return
 648         continuation = next_continuation.get('continuation')
 649         if not continuation:
 650             return
 651         ctp = next_continuation.get('clickTrackingParams')
 652         return cls._build_api_continuation_query(continuation, ctp)
 653
 654     @classmethod
 655     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 656         if isinstance(continuation_ep, dict):
 657             continuation = try_get(
 658                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 659             if not continuation:
 660                 return
 661             ctp = continuation_ep.get('clickTrackingParams')
 662             return cls._build_api_continuation_query(continuation, ctp)
 663
 664     @classmethod
 665     def _extract_continuation(cls, renderer):
 666         next_continuation = cls._extract_next_continuation_data(renderer)
 667         if next_continuation:
 668             return next_continuation
 669
 670         contents = []
 671         for key in ('contents', 'items'):
 672             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 673
 674         for content in contents:
 675             if not isinstance(content, dict):
 676                 continue
 677             continuation_ep = try_get(
 678                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 679                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 680                 dict)
 681             continuation = cls._extract_continuation_ep_data(continuation_ep)
 682             if continuation:
 683                 return continuation
 684
 685     @classmethod
 686     def _extract_alerts(cls, data):
 687         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 688             if not isinstance(alert_dict, dict):
 689                 continue
 690             for alert in alert_dict.values():
 691                 alert_type = alert.get('type')
 692                 if not alert_type:
 693                     continue
 694                 message = cls._get_text(alert, 'text')
 695                 if message:
 696                     yield alert_type, message
 697
 698     def _report_alerts(self, alerts, expected=True):
 699         errors = []
 700         warnings = []
 701         for alert_type, alert_message in alerts:
 702             if alert_type.lower() == 'error':
 703                 errors.append([alert_type, alert_message])
 704             else:
 705                 warnings.append([alert_type, alert_message])
 706
 707         for alert_type, alert_message in (warnings + errors[:-1]):
 708             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 709         if errors:
 710             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 711
 712     def _extract_and_report_alerts(self, data, *args, **kwargs):
 713         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 714
 715     def _extract_badges(self, renderer: dict):
 716         badges = set()
 717         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 718             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 719             if label:
 720                 badges.add(label.lower())
 721         return badges
 722
 723     @staticmethod
 724     def _get_text(data, *path_list, max_runs=None):
 725         for path in path_list or [None]:
 726             if path is None:
 727                 obj = [data]
 728             else:
 729                 obj = traverse_obj(data, path, default=[])
 730                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 731                     obj = [obj]
 732             for item in obj:
 733                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 734                 if text:
 735                     return text
 736                 runs = try_get(item, lambda x: x['runs'], list) or []
 737                 if not runs and isinstance(item, list):
 738                     runs = item
 739
 740                 runs = runs[:min(len(runs), max_runs or len(runs))]
 741                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 742                 if text:
 743                     return text
 744
 745     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 746                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 747                           default_client='WEB'):
 748         response = None
 749         last_error = None
 750         count = -1
 751         retries = self.get_param('extractor_retries', 3)
 752         if check_get_keys is None:
 753             check_get_keys = []
 754         while count < retries:
 755             count += 1
 756             if last_error:
 757                 self.report_warning('%s. Retrying ...' % last_error)
 758             try:
 759                 response = self._call_api(
 760                     ep=ep, fatal=True, headers=headers,
 761                     video_id=item_id, query=query,
 762                     context=self._extract_context(ytcfg, default_client),
 763                     api_key=self._extract_api_key(ytcfg, default_client),
 764                     api_hostname=api_hostname, default_client=default_client,
 765                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 766             except ExtractorError as e:
 767                 if isinstance(e.cause, network_exceptions):
 768                     # Downloading page may result in intermittent 5xx HTTP error
 769                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 770                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 771                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 772                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 773                         last_error = error_to_compat_str(e.cause or e)
 774                         if count < retries:
 775                             continue
 776                 if fatal:
 777                     raise
 778                 else:
 779                     self.report_warning(error_to_compat_str(e))
 780                     return
 781
 782             else:
 783                 # Youtube may send alerts if there was an issue with the continuation page
 784                 try:
 785                     self._extract_and_report_alerts(response, expected=False)
 786                 except ExtractorError as e:
 787                     if fatal:
 788                         raise
 789                     self.report_warning(error_to_compat_str(e))
 790                     return
 791                 if not check_get_keys or dict_get(response, check_get_keys):
 792                     break
 793                 # Youtube sometimes sends incomplete data
 794                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 795                 last_error = 'Incomplete data received'
 796                 if count >= retries:
 797                     if fatal:
 798                         raise ExtractorError(last_error)
 799                     else:
 800                         self.report_warning(last_error)
 801                         return
 802         return response
 803
 804     @staticmethod
 805     def is_music_url(url):
 806         return re.match(r'https?://music\.youtube\.com/', url) is not None
 807
 808     def _extract_video(self, renderer):
 809         video_id = renderer.get('videoId')
 810         title = self._get_text(renderer, 'title')
 811         description = self._get_text(renderer, 'descriptionSnippet')
 812         duration = parse_duration(self._get_text(
 813             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 814         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 815         view_count = str_to_int(self._search_regex(
 816             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 817             'view count', default=None))
 818
 819         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 820
 821         return {
 822             '_type': 'url',
 823             'ie_key': YoutubeIE.ie_key(),
 824             'id': video_id,
 825             'url': video_id,
 826             'title': title,
 827             'description': description,
 828             'duration': duration,
 829             'view_count': view_count,
 830             'uploader': uploader,
 831         }
 832
 833
 834 class YoutubeIE(YoutubeBaseInfoExtractor):
 835     IE_DESC = 'YouTube.com'
 836     _INVIDIOUS_SITES = (
 837         # invidious-redirect websites
 838         r'(?:www\.)?redirect\.invidious\.io',
 839         r'(?:(?:www|dev)\.)?invidio\.us',
 840         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 841         r'(?:www\.)?invidious\.pussthecat\.org',
 842         r'(?:www\.)?invidious\.zee\.li',
 843         r'(?:www\.)?invidious\.ethibox\.fr',
 844         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 845         # youtube-dl invidious instances list
 846         r'(?:(?:www|no)\.)?invidiou\.sh',
 847         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 848         r'(?:www\.)?invidious\.kabi\.tk',
 849         r'(?:www\.)?invidious\.mastodon\.host',
 850         r'(?:www\.)?invidious\.zapashcanon\.fr',
 851         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 852         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 853         r'(?:www\.)?invidious\.himiko\.cloud',
 854         r'(?:www\.)?invidious\.reallyancient\.tech',
 855         r'(?:www\.)?invidious\.tube',
 856         r'(?:www\.)?invidiou\.site',
 857         r'(?:www\.)?invidious\.site',
 858         r'(?:www\.)?invidious\.xyz',
 859         r'(?:www\.)?invidious\.nixnet\.xyz',
 860         r'(?:www\.)?invidious\.048596\.xyz',
 861         r'(?:www\.)?invidious\.drycat\.fr',
 862         r'(?:www\.)?inv\.skyn3t\.in',
 863         r'(?:www\.)?tube\.poal\.co',
 864         r'(?:www\.)?tube\.connect\.cafe',
 865         r'(?:www\.)?vid\.wxzm\.sx',
 866         r'(?:www\.)?vid\.mint\.lgbt',
 867         r'(?:www\.)?vid\.puffyan\.us',
 868         r'(?:www\.)?yewtu\.be',
 869         r'(?:www\.)?yt\.elukerio\.org',
 870         r'(?:www\.)?yt\.lelux\.fi',
 871         r'(?:www\.)?invidious\.ggc-project\.de',
 872         r'(?:www\.)?yt\.maisputain\.ovh',
 873         r'(?:www\.)?ytprivate\.com',
 874         r'(?:www\.)?invidious\.13ad\.de',
 875         r'(?:www\.)?invidious\.toot\.koeln',
 876         r'(?:www\.)?invidious\.fdn\.fr',
 877         r'(?:www\.)?watch\.nettohikari\.com',
 878         r'(?:www\.)?invidious\.namazso\.eu',
 879         r'(?:www\.)?invidious\.silkky\.cloud',
 880         r'(?:www\.)?invidious\.exonip\.de',
 881         r'(?:www\.)?invidious\.riverside\.rocks',
 882         r'(?:www\.)?invidious\.blamefran\.net',
 883         r'(?:www\.)?invidious\.moomoo\.de',
 884         r'(?:www\.)?ytb\.trom\.tf',
 885         r'(?:www\.)?yt\.cyberhost\.uk',
 886         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 887         r'(?:www\.)?qklhadlycap4cnod\.onion',
 888         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 889         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 890         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 891         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 892         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 893         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 894         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 895         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 896         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 897         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 898     )
 899     _VALID_URL = r"""(?x)^
 900                      (
 901                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 902                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 903                             (?:www\.)?deturl\.com/www\.youtube\.com|
 904                             (?:www\.)?pwnyoutube\.com|
 905                             (?:www\.)?hooktube\.com|
 906                             (?:www\.)?yourepeat\.com|
 907                             tube\.majestyc\.net|
 908                             %(invidious)s|
 909                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 910                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 911                          (?:                                                  # the various things that can precede the ID:
 912                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 913                              |(?:                                             # or the v= param in all its forms
 914                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 915                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 916                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 917                                  v=
 918                              )
 919                          ))
 920                          |(?:
 921                             youtu\.be|                                        # just youtu.be/xxxx
 922                             vid\.plus|                                        # or vid.plus/xxxx
 923                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 924                             %(invidious)s
 925                          )/
 926                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 927                          )
 928                      )?                                                       # all until now is optional -> you can pass the naked ID
 929                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 930                      (?(1).+)?                                                # if we found the ID, everything can follow
 931                      (?:\#|$)""" % {
 932         'invidious': '|'.join(_INVIDIOUS_SITES),
 933     }
 934     _PLAYER_INFO_RE = (
 935         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 936         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 937         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 938     )
 939     _formats = {
 940         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 941         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 942         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 943         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 944         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 945         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 946         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 947         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 948         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 949         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 950         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 951         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 952         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 953         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 954         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 955         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 956         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 957         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 958
 959
 960         # 3D videos
 961         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 962         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 963         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 964         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 965         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 966         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 967         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 968
 969         # Apple HTTP Live Streaming
 970         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 971         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 972         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 973         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 974         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 975         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 976         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 977         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 978
 979         # DASH mp4 video
 980         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 981         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 982         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 983         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 984         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 985         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 986         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 987         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 988         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 989         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 990         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 991         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 992
 993         # Dash mp4 audio
 994         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 995         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 996         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 997         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 998         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 999         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1000         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1001
1002         # Dash webm
1003         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1004         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1005         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1006         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1007         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1008         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1009         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1010         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1011         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1012         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1013         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1014         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1015         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1016         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1017         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1018         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1019         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1020         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1021         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1022         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1023         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1024         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1025
1026         # Dash webm audio
1027         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1028         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1029
1030         # Dash webm audio with opus inside
1031         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1032         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1033         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1034
1035         # RTMP (unnamed)
1036         '_rtmp': {'protocol': 'rtmp'},
1037
1038         # av01 video only formats sometimes served with "unknown" codecs
1039         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1040         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1041         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1042         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1043     }
1044     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1045
1046     _AGE_GATE_REASONS = (
1047         'Sign in to confirm your age',
1048         'This video may be inappropriate for some users.',
1049         'Sorry, this content is age-restricted.')
1050
1051     _GEO_BYPASS = False
1052
1053     IE_NAME = 'youtube'
1054     _TESTS = [
1055         {
1056             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1057             'info_dict': {
1058                 'id': 'BaW_jenozKc',
1059                 'ext': 'mp4',
1060                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1061                 'uploader': 'Philipp Hagemeister',
1062                 'uploader_id': 'phihag',
1063                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1064                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1065                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1066                 'upload_date': '20121002',
1067                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1068                 'categories': ['Science & Technology'],
1069                 'tags': ['youtube-dl'],
1070                 'duration': 10,
1071                 'view_count': int,
1072                 'like_count': int,
1073                 'dislike_count': int,
1074                 'start_time': 1,
1075                 'end_time': 9,
1076             }
1077         },
1078         {
1079             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1080             'note': 'Embed-only video (#1746)',
1081             'info_dict': {
1082                 'id': 'yZIXLfi8CZQ',
1083                 'ext': 'mp4',
1084                 'upload_date': '20120608',
1085                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1086                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1087                 'uploader': 'SET India',
1088                 'uploader_id': 'setindia',
1089                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1090                 'age_limit': 18,
1091             },
1092             'skip': 'Private video',
1093         },
1094         {
1095             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1096             'note': 'Use the first video ID in the URL',
1097             'info_dict': {
1098                 'id': 'BaW_jenozKc',
1099                 'ext': 'mp4',
1100                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1101                 'uploader': 'Philipp Hagemeister',
1102                 'uploader_id': 'phihag',
1103                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1104                 'upload_date': '20121002',
1105                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1106                 'categories': ['Science & Technology'],
1107                 'tags': ['youtube-dl'],
1108                 'duration': 10,
1109                 'view_count': int,
1110                 'like_count': int,
1111                 'dislike_count': int,
1112             },
1113             'params': {
1114                 'skip_download': True,
1115             },
1116         },
1117         {
1118             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1119             'note': '256k DASH audio (format 141) via DASH manifest',
1120             'info_dict': {
1121                 'id': 'a9LDPn-MO4I',
1122                 'ext': 'm4a',
1123                 'upload_date': '20121002',
1124                 'uploader_id': '8KVIDEO',
1125                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1126                 'description': '',
1127                 'uploader': '8KVIDEO',
1128                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1129             },
1130             'params': {
1131                 'youtube_include_dash_manifest': True,
1132                 'format': '141',
1133             },
1134             'skip': 'format 141 not served anymore',
1135         },
1136         # DASH manifest with encrypted signature
1137         {
1138             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1139             'info_dict': {
1140                 'id': 'IB3lcPjvWLA',
1141                 'ext': 'm4a',
1142                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1143                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1144                 'duration': 244,
1145                 'uploader': 'AfrojackVEVO',
1146                 'uploader_id': 'AfrojackVEVO',
1147                 'upload_date': '20131011',
1148                 'abr': 129.495,
1149             },
1150             'params': {
1151                 'youtube_include_dash_manifest': True,
1152                 'format': '141/bestaudio[ext=m4a]',
1153             },
1154         },
1155         # Normal age-gate video (embed allowed)
1156         {
1157             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1158             'info_dict': {
1159                 'id': 'HtVdAasjOgU',
1160                 'ext': 'mp4',
1161                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1162                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1163                 'duration': 142,
1164                 'uploader': 'The Witcher',
1165                 'uploader_id': 'WitcherGame',
1166                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1167                 'upload_date': '20140605',
1168                 'age_limit': 18,
1169             },
1170         },
1171         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1172         # YouTube Red ad is not captured for creator
1173         {
1174             'url': '__2ABJjxzNo',
1175             'info_dict': {
1176                 'id': '__2ABJjxzNo',
1177                 'ext': 'mp4',
1178                 'duration': 266,
1179                 'upload_date': '20100430',
1180                 'uploader_id': 'deadmau5',
1181                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1182                 'creator': 'deadmau5',
1183                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1184                 'uploader': 'deadmau5',
1185                 'title': 'Deadmau5 - Some Chords (HD)',
1186                 'alt_title': 'Some Chords',
1187             },
1188             'expected_warnings': [
1189                 'DASH manifest missing',
1190             ]
1191         },
1192         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1193         {
1194             'url': 'lqQg6PlCWgI',
1195             'info_dict': {
1196                 'id': 'lqQg6PlCWgI',
1197                 'ext': 'mp4',
1198                 'duration': 6085,
1199                 'upload_date': '20150827',
1200                 'uploader_id': 'olympic',
1201                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1202                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1203                 'uploader': 'Olympics',
1204                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1205             },
1206             'params': {
1207                 'skip_download': 'requires avconv',
1208             }
1209         },
1210         # Non-square pixels
1211         {
1212             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1213             'info_dict': {
1214                 'id': '_b-2C3KPAM0',
1215                 'ext': 'mp4',
1216                 'stretched_ratio': 16 / 9.,
1217                 'duration': 85,
1218                 'upload_date': '20110310',
1219                 'uploader_id': 'AllenMeow',
1220                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1221                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1222                 'uploader': '孫ᄋᄅ',
1223                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1224             },
1225         },
1226         # url_encoded_fmt_stream_map is empty string
1227         {
1228             'url': 'qEJwOuvDf7I',
1229             'info_dict': {
1230                 'id': 'qEJwOuvDf7I',
1231                 'ext': 'webm',
1232                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1233                 'description': '',
1234                 'upload_date': '20150404',
1235                 'uploader_id': 'spbelect',
1236                 'uploader': 'Наблюдатели Петербурга',
1237             },
1238             'params': {
1239                 'skip_download': 'requires avconv',
1240             },
1241             'skip': 'This live event has ended.',
1242         },
1243         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1244         {
1245             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1246             'info_dict': {
1247                 'id': 'FIl7x6_3R5Y',
1248                 'ext': 'webm',
1249                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1250                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1251                 'duration': 220,
1252                 'upload_date': '20150625',
1253                 'uploader_id': 'dorappi2000',
1254                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1255                 'uploader': 'dorappi2000',
1256                 'formats': 'mincount:31',
1257             },
1258             'skip': 'not actual anymore',
1259         },
1260         # DASH manifest with segment_list
1261         {
1262             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1263             'md5': '8ce563a1d667b599d21064e982ab9e31',
1264             'info_dict': {
1265                 'id': 'CsmdDsKjzN8',
1266                 'ext': 'mp4',
1267                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1268                 'uploader': 'Airtek',
1269                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1270                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1271                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1272             },
1273             'params': {
1274                 'youtube_include_dash_manifest': True,
1275                 'format': '135',  # bestvideo
1276             },
1277             'skip': 'This live event has ended.',
1278         },
1279         {
1280             # Multifeed videos (multiple cameras), URL is for Main Camera
1281             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1282             'info_dict': {
1283                 'id': 'jvGDaLqkpTg',
1284                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1285                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1286             },
1287             'playlist': [{
1288                 'info_dict': {
1289                     'id': 'jvGDaLqkpTg',
1290                     'ext': 'mp4',
1291                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1292                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1293                     'duration': 10643,
1294                     'upload_date': '20161111',
1295                     'uploader': 'Team PGP',
1296                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1297                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1298                 },
1299             }, {
1300                 'info_dict': {
1301                     'id': '3AKt1R1aDnw',
1302                     'ext': 'mp4',
1303                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1304                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1305                     'duration': 10991,
1306                     'upload_date': '20161111',
1307                     'uploader': 'Team PGP',
1308                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1309                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1310                 },
1311             }, {
1312                 'info_dict': {
1313                     'id': 'RtAMM00gpVc',
1314                     'ext': 'mp4',
1315                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1316                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1317                     'duration': 10995,
1318                     'upload_date': '20161111',
1319                     'uploader': 'Team PGP',
1320                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1321                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1322                 },
1323             }, {
1324                 'info_dict': {
1325                     'id': '6N2fdlP3C5U',
1326                     'ext': 'mp4',
1327                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1328                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1329                     'duration': 10990,
1330                     'upload_date': '20161111',
1331                     'uploader': 'Team PGP',
1332                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1333                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1334                 },
1335             }],
1336             'params': {
1337                 'skip_download': True,
1338             },
1339         },
1340         {
1341             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1342             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1343             'info_dict': {
1344                 'id': 'gVfLd0zydlo',
1345                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1346             },
1347             'playlist_count': 2,
1348             'skip': 'Not multifeed anymore',
1349         },
1350         {
1351             'url': 'https://vid.plus/FlRa-iH7PGw',
1352             'only_matching': True,
1353         },
1354         {
1355             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1356             'only_matching': True,
1357         },
1358         {
1359             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1360             # Also tests cut-off URL expansion in video description (see
1361             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1362             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1363             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1364             'info_dict': {
1365                 'id': 'lsguqyKfVQg',
1366                 'ext': 'mp4',
1367                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1368                 'alt_title': 'Dark Walk',
1369                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1370                 'duration': 133,
1371                 'upload_date': '20151119',
1372                 'uploader_id': 'IronSoulElf',
1373                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1374                 'uploader': 'IronSoulElf',
1375                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1376                 'track': 'Dark Walk',
1377                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1378                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1379             },
1380             'params': {
1381                 'skip_download': True,
1382             },
1383         },
1384         {
1385             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1386             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1387             'only_matching': True,
1388         },
1389         {
1390             # Video with yt:stretch=17:0
1391             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1392             'info_dict': {
1393                 'id': 'Q39EVAstoRM',
1394                 'ext': 'mp4',
1395                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1396                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1397                 'upload_date': '20151107',
1398                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1399                 'uploader': 'CH GAMER DROID',
1400             },
1401             'params': {
1402                 'skip_download': True,
1403             },
1404             'skip': 'This video does not exist.',
1405         },
1406         {
1407             # Video with incomplete 'yt:stretch=16:'
1408             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1409             'only_matching': True,
1410         },
1411         {
1412             # Video licensed under Creative Commons
1413             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1414             'info_dict': {
1415                 'id': 'M4gD1WSo5mA',
1416                 'ext': 'mp4',
1417                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1418                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1419                 'duration': 721,
1420                 'upload_date': '20150127',
1421                 'uploader_id': 'BerkmanCenter',
1422                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1423                 'uploader': 'The Berkman Klein Center for Internet & Society',
1424                 'license': 'Creative Commons Attribution license (reuse allowed)',
1425             },
1426             'params': {
1427                 'skip_download': True,
1428             },
1429         },
1430         {
1431             # Channel-like uploader_url
1432             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1433             'info_dict': {
1434                 'id': 'eQcmzGIKrzg',
1435                 'ext': 'mp4',
1436                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1437                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1438                 'duration': 4060,
1439                 'upload_date': '20151119',
1440                 'uploader': 'Bernie Sanders',
1441                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1442                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1443                 'license': 'Creative Commons Attribution license (reuse allowed)',
1444             },
1445             'params': {
1446                 'skip_download': True,
1447             },
1448         },
1449         {
1450             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1451             'only_matching': True,
1452         },
1453         {
1454             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1455             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1456             'only_matching': True,
1457         },
1458         {
1459             # Rental video preview
1460             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1461             'info_dict': {
1462                 'id': 'uGpuVWrhIzE',
1463                 'ext': 'mp4',
1464                 'title': 'Piku - Trailer',
1465                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1466                 'upload_date': '20150811',
1467                 'uploader': 'FlixMatrix',
1468                 'uploader_id': 'FlixMatrixKaravan',
1469                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1470                 'license': 'Standard YouTube License',
1471             },
1472             'params': {
1473                 'skip_download': True,
1474             },
1475             'skip': 'This video is not available.',
1476         },
1477         {
1478             # YouTube Red video with episode data
1479             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1480             'info_dict': {
1481                 'id': 'iqKdEhx-dD4',
1482                 'ext': 'mp4',
1483                 'title': 'Isolation - Mind Field (Ep 1)',
1484                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1485                 'duration': 2085,
1486                 'upload_date': '20170118',
1487                 'uploader': 'Vsauce',
1488                 'uploader_id': 'Vsauce',
1489                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1490                 'series': 'Mind Field',
1491                 'season_number': 1,
1492                 'episode_number': 1,
1493             },
1494             'params': {
1495                 'skip_download': True,
1496             },
1497             'expected_warnings': [
1498                 'Skipping DASH manifest',
1499             ],
1500         },
1501         {
1502             # The following content has been identified by the YouTube community
1503             # as inappropriate or offensive to some audiences.
1504             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1505             'info_dict': {
1506                 'id': '6SJNVb0GnPI',
1507                 'ext': 'mp4',
1508                 'title': 'Race Differences in Intelligence',
1509                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1510                 'duration': 965,
1511                 'upload_date': '20140124',
1512                 'uploader': 'New Century Foundation',
1513                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1514                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1515             },
1516             'params': {
1517                 'skip_download': True,
1518             },
1519             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1520         },
1521         {
1522             # itag 212
1523             'url': '1t24XAntNCY',
1524             'only_matching': True,
1525         },
1526         {
1527             # geo restricted to JP
1528             'url': 'sJL6WA-aGkQ',
1529             'only_matching': True,
1530         },
1531         {
1532             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1533             'only_matching': True,
1534         },
1535         {
1536             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1537             'only_matching': True,
1538         },
1539         {
1540             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1541             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1542             'only_matching': True,
1543         },
1544         {
1545             # DRM protected
1546             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1547             'only_matching': True,
1548         },
1549         {
1550             # Video with unsupported adaptive stream type formats
1551             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1552             'info_dict': {
1553                 'id': 'Z4Vy8R84T1U',
1554                 'ext': 'mp4',
1555                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1556                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1557                 'duration': 433,
1558                 'upload_date': '20130923',
1559                 'uploader': 'Amelia Putri Harwita',
1560                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1561                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1562                 'formats': 'maxcount:10',
1563             },
1564             'params': {
1565                 'skip_download': True,
1566                 'youtube_include_dash_manifest': False,
1567             },
1568             'skip': 'not actual anymore',
1569         },
1570         {
1571             # Youtube Music Auto-generated description
1572             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1573             'info_dict': {
1574                 'id': 'MgNrAu2pzNs',
1575                 'ext': 'mp4',
1576                 'title': 'Voyeur Girl',
1577                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1578                 'upload_date': '20190312',
1579                 'uploader': 'Stephen - Topic',
1580                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1581                 'artist': 'Stephen',
1582                 'track': 'Voyeur Girl',
1583                 'album': 'it\'s too much love to know my dear',
1584                 'release_date': '20190313',
1585                 'release_year': 2019,
1586             },
1587             'params': {
1588                 'skip_download': True,
1589             },
1590         },
1591         {
1592             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1593             'only_matching': True,
1594         },
1595         {
1596             # invalid -> valid video id redirection
1597             'url': 'DJztXj2GPfl',
1598             'info_dict': {
1599                 'id': 'DJztXj2GPfk',
1600                 'ext': 'mp4',
1601                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1602                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1603                 'upload_date': '20090125',
1604                 'uploader': 'Prochorowka',
1605                 'uploader_id': 'Prochorowka',
1606                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1607                 'artist': 'Panjabi MC',
1608                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1609                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1610             },
1611             'params': {
1612                 'skip_download': True,
1613             },
1614             'skip': 'Video unavailable',
1615         },
1616         {
1617             # empty description results in an empty string
1618             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1619             'info_dict': {
1620                 'id': 'x41yOUIvK2k',
1621                 'ext': 'mp4',
1622                 'title': 'IMG 3456',
1623                 'description': '',
1624                 'upload_date': '20170613',
1625                 'uploader_id': 'ElevageOrVert',
1626                 'uploader': 'ElevageOrVert',
1627             },
1628             'params': {
1629                 'skip_download': True,
1630             },
1631         },
1632         {
1633             # with '};' inside yt initial data (see [1])
1634             # see [2] for an example with '};' inside ytInitialPlayerResponse
1635             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1636             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1637             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1638             'info_dict': {
1639                 'id': 'CHqg6qOn4no',
1640                 'ext': 'mp4',
1641                 'title': 'Part 77   Sort a list of simple types in c#',
1642                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1643                 'upload_date': '20130831',
1644                 'uploader_id': 'kudvenkat',
1645                 'uploader': 'kudvenkat',
1646             },
1647             'params': {
1648                 'skip_download': True,
1649             },
1650         },
1651         {
1652             # another example of '};' in ytInitialData
1653             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1654             'only_matching': True,
1655         },
1656         {
1657             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1658             'only_matching': True,
1659         },
1660         {
1661             # https://github.com/ytdl-org/youtube-dl/pull/28094
1662             'url': 'OtqTfy26tG0',
1663             'info_dict': {
1664                 'id': 'OtqTfy26tG0',
1665                 'ext': 'mp4',
1666                 'title': 'Burn Out',
1667                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1668                 'upload_date': '20141120',
1669                 'uploader': 'The Cinematic Orchestra - Topic',
1670                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1671                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1672                 'artist': 'The Cinematic Orchestra',
1673                 'track': 'Burn Out',
1674                 'album': 'Every Day',
1675                 'release_data': None,
1676                 'release_year': None,
1677             },
1678             'params': {
1679                 'skip_download': True,
1680             },
1681         },
1682         {
1683             # controversial video, only works with bpctr when authenticated with cookies
1684             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1685             'only_matching': True,
1686         },
1687         {
1688             # controversial video, requires bpctr/contentCheckOk
1689             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1690             'info_dict': {
1691                 'id': 'SZJvDhaSDnc',
1692                 'ext': 'mp4',
1693                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1694                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1695                 'uploader': 'CBS This Morning',
1696                 'uploader_id': 'CBSThisMorning',
1697                 'upload_date': '20140716',
1698                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1699             }
1700         },
1701         {
1702             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1703             'url': 'cBvYw8_A0vQ',
1704             'info_dict': {
1705                 'id': 'cBvYw8_A0vQ',
1706                 'ext': 'mp4',
1707                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1708                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1709                 'upload_date': '20201120',
1710                 'uploader': 'Walk around Japan',
1711                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1712                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1713             },
1714             'params': {
1715                 'skip_download': True,
1716             },
1717         }, {
1718             # Has multiple audio streams
1719             'url': 'WaOKSUlf4TM',
1720             'only_matching': True
1721         }, {
1722             # Requires Premium: has format 141 when requested using YTM url
1723             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1724             'only_matching': True
1725         }, {
1726             # multiple subtitles with same lang_code
1727             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1728             'only_matching': True,
1729         }, {
1730             # Force use android client fallback
1731             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1732             'info_dict': {
1733                 'id': 'YOelRv7fMxY',
1734                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1735                 'ext': '3gp',
1736                 'upload_date': '20210624',
1737                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1738                 'uploader': 'colinfurze',
1739                 'uploader_id': 'colinfurze',
1740                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1741                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1742             },
1743             'params': {
1744                 'format': '17',  # 3gp format available on android
1745                 'extractor_args': {'youtube': {'player_client': ['android']}},
1746             },
1747         },
1748         {
1749             # Skip download of additional client configs (remix client config in this case)
1750             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1751             'only_matching': True,
1752             'params': {
1753                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1754             },
1755         }
1756     ]
1757
1758     @classmethod
1759     def suitable(cls, url):
1760         # Hack for lazy extractors until more generic solution is implemented
1761         # (see #28780)
1762         from .youtube import parse_qs
1763         qs = parse_qs(url)
1764         if qs.get('list', [None])[0]:
1765             return False
1766         return super(YoutubeIE, cls).suitable(url)
1767
1768     def __init__(self, *args, **kwargs):
1769         super(YoutubeIE, self).__init__(*args, **kwargs)
1770         self._code_cache = {}
1771         self._player_cache = {}
1772
1773     def _extract_player_url(self, ytcfg=None, webpage=None):
1774         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1775         if not player_url and webpage:
1776             player_url = self._search_regex(
1777                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1778                 webpage, 'player URL', fatal=False)
1779         if not player_url:
1780             return None
1781         if player_url.startswith('//'):
1782             player_url = 'https:' + player_url
1783         elif not re.match(r'https?://', player_url):
1784             player_url = compat_urlparse.urljoin(
1785                 'https://www.youtube.com', player_url)
1786         return player_url
1787
1788     def _signature_cache_id(self, example_sig):
1789         """ Return a string representation of a signature """
1790         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1791
1792     @classmethod
1793     def _extract_player_info(cls, player_url):
1794         for player_re in cls._PLAYER_INFO_RE:
1795             id_m = re.search(player_re, player_url)
1796             if id_m:
1797                 break
1798         else:
1799             raise ExtractorError('Cannot identify player %r' % player_url)
1800         return id_m.group('id')
1801
1802     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1803         player_id = self._extract_player_info(player_url)
1804         if player_id not in self._code_cache:
1805             self._code_cache[player_id] = self._download_webpage(
1806                 player_url, video_id, fatal=fatal,
1807                 note='Downloading player ' + player_id,
1808                 errnote='Download of %s failed' % player_url)
1809         return player_id in self._code_cache
1810
1811     def _extract_signature_function(self, video_id, player_url, example_sig):
1812         player_id = self._extract_player_info(player_url)
1813
1814         # Read from filesystem cache
1815         func_id = 'js_%s_%s' % (
1816             player_id, self._signature_cache_id(example_sig))
1817         assert os.path.basename(func_id) == func_id
1818
1819         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1820         if cache_spec is not None:
1821             return lambda s: ''.join(s[i] for i in cache_spec)
1822
1823         if self._load_player(video_id, player_url):
1824             code = self._code_cache[player_id]
1825             res = self._parse_sig_js(code)
1826
1827             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1828             cache_res = res(test_string)
1829             cache_spec = [ord(c) for c in cache_res]
1830
1831             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1832             return res
1833
1834     def _print_sig_code(self, func, example_sig):
1835         def gen_sig_code(idxs):
1836             def _genslice(start, end, step):
1837                 starts = '' if start == 0 else str(start)
1838                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1839                 steps = '' if step == 1 else (':%d' % step)
1840                 return 's[%s%s%s]' % (starts, ends, steps)
1841
1842             step = None
1843             # Quelch pyflakes warnings - start will be set when step is set
1844             start = '(Never used)'
1845             for i, prev in zip(idxs[1:], idxs[:-1]):
1846                 if step is not None:
1847                     if i - prev == step:
1848                         continue
1849                     yield _genslice(start, prev, step)
1850                     step = None
1851                     continue
1852                 if i - prev in [-1, 1]:
1853                     step = i - prev
1854                     start = prev
1855                     continue
1856                 else:
1857                     yield 's[%d]' % prev
1858             if step is None:
1859                 yield 's[%d]' % i
1860             else:
1861                 yield _genslice(start, i, step)
1862
1863         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1864         cache_res = func(test_string)
1865         cache_spec = [ord(c) for c in cache_res]
1866         expr_code = ' + '.join(gen_sig_code(cache_spec))
1867         signature_id_tuple = '(%s)' % (
1868             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1869         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1870                 '    return %s\n') % (signature_id_tuple, expr_code)
1871         self.to_screen('Extracted signature function:\n' + code)
1872
1873     def _parse_sig_js(self, jscode):
1874         funcname = self._search_regex(
1875             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1876              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1877              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1878              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1879              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1880              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1881              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1882              # Obsolete patterns
1883              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1884              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1885              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1886              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1887              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1888              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1889              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1890              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1891             jscode, 'Initial JS player signature function name', group='sig')
1892
1893         jsi = JSInterpreter(jscode)
1894         initial_function = jsi.extract_function(funcname)
1895         return lambda s: initial_function([s])
1896
1897     def _decrypt_signature(self, s, video_id, player_url):
1898         """Turn the encrypted s field into a working signature"""
1899
1900         if player_url is None:
1901             raise ExtractorError('Cannot decrypt signature without player_url')
1902
1903         try:
1904             player_id = (player_url, self._signature_cache_id(s))
1905             if player_id not in self._player_cache:
1906                 func = self._extract_signature_function(
1907                     video_id, player_url, s
1908                 )
1909                 self._player_cache[player_id] = func
1910             func = self._player_cache[player_id]
1911             if self.get_param('youtube_print_sig_code'):
1912                 self._print_sig_code(func, s)
1913             return func(s)
1914         except Exception as e:
1915             tb = traceback.format_exc()
1916             raise ExtractorError(
1917                 'Signature extraction failed: ' + tb, cause=e)
1918
1919     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1920         """
1921         Extract signatureTimestamp (sts)
1922         Required to tell API what sig/player version is in use.
1923         """
1924         sts = None
1925         if isinstance(ytcfg, dict):
1926             sts = int_or_none(ytcfg.get('STS'))
1927
1928         if not sts:
1929             # Attempt to extract from player
1930             if player_url is None:
1931                 error_msg = 'Cannot extract signature timestamp without player_url.'
1932                 if fatal:
1933                     raise ExtractorError(error_msg)
1934                 self.report_warning(error_msg)
1935                 return
1936             if self._load_player(video_id, player_url, fatal=fatal):
1937                 player_id = self._extract_player_info(player_url)
1938                 code = self._code_cache[player_id]
1939                 sts = int_or_none(self._search_regex(
1940                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1941                     'JS player signature timestamp', group='sts', fatal=fatal))
1942         return sts
1943
1944     def _mark_watched(self, video_id, player_responses):
1945         playback_url = traverse_obj(
1946             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1947             expected_type=url_or_none, get_all=False)
1948         if not playback_url:
1949             self.report_warning('Unable to mark watched')
1950             return
1951         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1952         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1953
1954         # cpn generation algorithm is reverse engineered from base.js.
1955         # In fact it works even with dummy cpn.
1956         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1957         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1958
1959         qs.update({
1960             'ver': ['2'],
1961             'cpn': [cpn],
1962         })
1963         playback_url = compat_urlparse.urlunparse(
1964             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1965
1966         self._download_webpage(
1967             playback_url, video_id, 'Marking watched',
1968             'Unable to mark watched', fatal=False)
1969
1970     @staticmethod
1971     def _extract_urls(webpage):
1972         # Embedded YouTube player
1973         entries = [
1974             unescapeHTML(mobj.group('url'))
1975             for mobj in re.finditer(r'''(?x)
1976             (?:
1977                 <iframe[^>]+?src=|
1978                 data-video-url=|
1979                 <embed[^>]+?src=|
1980                 embedSWF\(?:\s*|
1981                 <object[^>]+data=|
1982                 new\s+SWFObject\(
1983             )
1984             (["\'])
1985                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1986                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1987             \1''', webpage)]
1988
1989         # lazyYT YouTube embed
1990         entries.extend(list(map(
1991             unescapeHTML,
1992             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1993
1994         # Wordpress "YouTube Video Importer" plugin
1995         matches = re.findall(r'''(?x)<div[^>]+
1996             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1997             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1998         entries.extend(m[-1] for m in matches)
1999
2000         return entries
2001
2002     @staticmethod
2003     def _extract_url(webpage):
2004         urls = YoutubeIE._extract_urls(webpage)
2005         return urls[0] if urls else None
2006
2007     @classmethod
2008     def extract_id(cls, url):
2009         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2010         if mobj is None:
2011             raise ExtractorError('Invalid URL: %s' % url)
2012         video_id = mobj.group(2)
2013         return video_id
2014
2015     def _extract_chapters_from_json(self, data, duration):
2016         chapter_list = traverse_obj(
2017             data, (
2018                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2019                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2020             ), expected_type=list)
2021
2022         return self._extract_chapters(
2023             chapter_list,
2024             chapter_time=lambda chapter: float_or_none(
2025                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2026             chapter_title=lambda chapter: traverse_obj(
2027                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2028             duration=duration)
2029
2030     def _extract_chapters_from_engagement_panel(self, data, duration):
2031         content_list = traverse_obj(
2032             data,
2033             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2034             expected_type=list, default=[])
2035         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2036         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2037
2038         return next((
2039             filter(None, (
2040                 self._extract_chapters(
2041                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2042                     chapter_time, chapter_title, duration)
2043                 for contents in content_list
2044             ))), [])
2045
2046     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2047         chapters = []
2048         last_chapter = {'start_time': 0}
2049         for idx, chapter in enumerate(chapter_list or []):
2050             title = chapter_title(chapter)
2051             start_time = chapter_time(chapter)
2052             if start_time is None:
2053                 continue
2054             last_chapter['end_time'] = start_time
2055             if start_time < last_chapter['start_time']:
2056                 if idx == 1:
2057                     chapters.pop()
2058                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2059                 else:
2060                     self.report_warning(f'Invalid start time for chapter "{title}"')
2061                     continue
2062             last_chapter = {'start_time': start_time, 'title': title}
2063             chapters.append(last_chapter)
2064         last_chapter['end_time'] = duration
2065         return chapters
2066
2067     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2068         return self._parse_json(self._search_regex(
2069             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2070              regex), webpage, name, default='{}'), video_id, fatal=False)
2071
2072     @staticmethod
2073     def parse_time_text(time_text):
2074         """
2075         Parse the comment time text
2076         time_text is in the format 'X units ago (edited)'
2077         """
2078         time_text_split = time_text.split(' ')
2079         if len(time_text_split) >= 3:
2080             try:
2081                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2082             except ValueError:
2083                 return None
2084
2085     def _extract_comment(self, comment_renderer, parent=None):
2086         comment_id = comment_renderer.get('commentId')
2087         if not comment_id:
2088             return
2089
2090         text = self._get_text(comment_renderer, 'contentText')
2091
2092         # note: timestamp is an estimate calculated from the current time and time_text
2093         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2094         time_text_dt = self.parse_time_text(time_text)
2095         if isinstance(time_text_dt, datetime.datetime):
2096             timestamp = calendar.timegm(time_text_dt.timetuple())
2097         author = self._get_text(comment_renderer, 'authorText')
2098         author_id = try_get(comment_renderer,
2099                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2100
2101         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2102                                                        lambda x: x['likeCount']), compat_str)) or 0
2103         author_thumbnail = try_get(comment_renderer,
2104                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2105
2106         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2107         is_favorited = 'creatorHeart' in (try_get(
2108             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2109         return {
2110             'id': comment_id,
2111             'text': text,
2112             'timestamp': timestamp,
2113             'time_text': time_text,
2114             'like_count': votes,
2115             'is_favorited': is_favorited,
2116             'author': author,
2117             'author_id': author_id,
2118             'author_thumbnail': author_thumbnail,
2119             'author_is_uploader': author_is_uploader,
2120             'parent': parent or 'root'
2121         }
2122
2123     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2124                          ytcfg, video_id, parent=None, comment_counts=None):
2125
2126         def extract_header(contents):
2127             _total_comments = 0
2128             _continuation = None
2129             for content in contents:
2130                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2131                 expected_comment_count = parse_count(self._get_text(
2132                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2133
2134                 if expected_comment_count:
2135                     comment_counts[1] = expected_comment_count
2136                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2137                     _total_comments = comment_counts[1]
2138                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2139                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2140
2141                 sort_menu_item = try_get(
2142                     comments_header_renderer,
2143                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2144                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2145
2146                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2147                 if not _continuation:
2148                     continue
2149
2150                 sort_text = sort_menu_item.get('title')
2151                 if isinstance(sort_text, compat_str):
2152                     sort_text = sort_text.lower()
2153                 else:
2154                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2155                 self.to_screen('Sorting comments by %s' % sort_text)
2156                 break
2157             return _total_comments, _continuation
2158
2159         def extract_thread(contents):
2160             if not parent:
2161                 comment_counts[2] = 0
2162             for content in contents:
2163                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2164                 comment_renderer = try_get(
2165                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2166                     content, (lambda x: x['commentRenderer'], dict))
2167
2168                 if not comment_renderer:
2169                     continue
2170                 comment = self._extract_comment(comment_renderer, parent)
2171                 if not comment:
2172                     continue
2173                 comment_counts[0] += 1
2174                 yield comment
2175                 # Attempt to get the replies
2176                 comment_replies_renderer = try_get(
2177                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2178
2179                 if comment_replies_renderer:
2180                     comment_counts[2] += 1
2181                     comment_entries_iter = self._comment_entries(
2182                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2183                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2184
2185                     for reply_comment in comment_entries_iter:
2186                         yield reply_comment
2187
2188         # YouTube comments have a max depth of 2
2189         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2190         if max_depth == 1 and parent:
2191             return
2192         if not comment_counts:
2193             # comment so far, est. total comments, current comment thread #
2194             comment_counts = [0, 0, 0]
2195
2196         continuation = self._extract_continuation(root_continuation_data)
2197         if continuation and len(continuation['continuation']) < 27:
2198             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2199             continuation_token = self._generate_comment_continuation(video_id)
2200             continuation = self._build_api_continuation_query(continuation_token, None)
2201
2202         visitor_data = None
2203         is_first_continuation = parent is None
2204
2205         for page_num in itertools.count(0):
2206             if not continuation:
2207                 break
2208             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2209             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2210             if page_num == 0:
2211                 if is_first_continuation:
2212                     note_prefix = 'Downloading comment section API JSON'
2213                 else:
2214                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2215                         comment_counts[2], comment_prog_str)
2216             else:
2217                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2218                     '       ' if parent else '', ' replies' if parent else '',
2219                     page_num, comment_prog_str)
2220
2221             response = self._extract_response(
2222                 item_id=None, query=continuation,
2223                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2224                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2225             if not response:
2226                 break
2227             visitor_data = try_get(
2228                 response,
2229                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2230                 compat_str) or visitor_data
2231
2232             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2233
2234             continuation = None
2235             if isinstance(continuation_contents, list):
2236                 for continuation_section in continuation_contents:
2237                     if not isinstance(continuation_section, dict):
2238                         continue
2239                     continuation_items = try_get(
2240                         continuation_section,
2241                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2242                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2243                         list) or []
2244                     if is_first_continuation:
2245                         total_comments, continuation = extract_header(continuation_items)
2246                         if total_comments:
2247                             yield total_comments
2248                         is_first_continuation = False
2249                         if continuation:
2250                             break
2251                         continue
2252                     count = 0
2253                     for count, entry in enumerate(extract_thread(continuation_items)):
2254                         yield entry
2255                     continuation = self._extract_continuation({'contents': continuation_items})
2256                     if continuation:
2257                         # Sometimes YouTube provides a continuation without any comments
2258                         # In most cases we end up just downloading these with very little comments to come.
2259                         if count == 0:
2260                             if not parent:
2261                                 self.report_warning('No comments received - assuming end of comments')
2262                             continuation = None
2263                         break
2264
2265             # Deprecated response structure
2266             elif isinstance(continuation_contents, dict):
2267                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2268                 for key, continuation_renderer in continuation_contents.items():
2269                     if key not in known_continuation_renderers:
2270                         continue
2271                     if not isinstance(continuation_renderer, dict):
2272                         continue
2273                     if is_first_continuation:
2274                         header_continuation_items = [continuation_renderer.get('header') or {}]
2275                         total_comments, continuation = extract_header(header_continuation_items)
2276                         if total_comments:
2277                             yield total_comments
2278                         is_first_continuation = False
2279                         if continuation:
2280                             break
2281
2282                     # Sometimes YouTube provides a continuation without any comments
2283                     # In most cases we end up just downloading these with very little comments to come.
2284                     count = 0
2285                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2286                         yield entry
2287                     continuation = self._extract_continuation(continuation_renderer)
2288                     if count == 0:
2289                         if not parent:
2290                             self.report_warning('No comments received - assuming end of comments')
2291                         continuation = None
2292                     break
2293
2294     @staticmethod
2295     def _generate_comment_continuation(video_id):
2296         """
2297         Generates initial comment section continuation token from given video id
2298         """
2299         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2300         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2301         new_continuation_intlist = list(itertools.chain.from_iterable(
2302             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2303         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2304
2305     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2306         """Entry for comment extraction"""
2307         def _real_comment_extract(contents):
2308             if isinstance(contents, list):
2309                 for entry in contents:
2310                     for key, renderer in entry.items():
2311                         if key not in known_entry_comment_renderers:
2312                             continue
2313                         yield from self._comment_entries(
2314                             renderer, video_id=video_id, ytcfg=ytcfg,
2315                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2316                             account_syncid=self._extract_account_syncid(ytcfg))
2317                         break
2318         comments = []
2319         known_entry_comment_renderers = ('itemSectionRenderer',)
2320         estimated_total = 0
2321         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2322
2323         try:
2324             for comment in _real_comment_extract(contents):
2325                 if len(comments) >= max_comments:
2326                     break
2327                 if isinstance(comment, int):
2328                     estimated_total = comment
2329                     continue
2330                 comments.append(comment)
2331         except KeyboardInterrupt:
2332             self.to_screen('Interrupted by user')
2333         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2334         return {
2335             'comments': comments,
2336             'comment_count': len(comments),
2337         }
2338
2339     @staticmethod
2340     def _generate_player_context(sts=None):
2341         context = {
2342             'html5Preference': 'HTML5_PREF_WANTS',
2343         }
2344         if sts is not None:
2345             context['signatureTimestamp'] = sts
2346         return {
2347             'playbackContext': {
2348                 'contentPlaybackContext': context
2349             },
2350             'contentCheckOk': True,
2351             'racyCheckOk': True
2352         }
2353
2354     @staticmethod
2355     def _get_video_info_params(video_id, client='TVHTML5'):
2356         GVI_CLIENTS = {
2357             'ANDROID': {
2358                 'c': 'ANDROID',
2359                 'cver': '16.20',
2360             },
2361             'TVHTML5': {
2362                 'c': 'TVHTML5',
2363                 'cver': '6.20180913',
2364             },
2365             'IOS': {
2366                 'c': 'IOS',
2367                 'cver': '16.20'
2368             }
2369         }
2370         query = {
2371             'video_id': video_id,
2372             'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2373             'html5': '1'
2374         }
2375         query.update(GVI_CLIENTS.get(client))
2376         return query
2377
2378     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2379
2380         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2381         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2382         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2383         headers = self.generate_api_headers(
2384             player_ytcfg, identity_token, syncid,
2385             default_client=self._YT_CLIENTS[client], session_index=session_index)
2386
2387         yt_query = {'videoId': video_id}
2388         yt_query.update(self._generate_player_context(sts))
2389         return self._extract_response(
2390             item_id=video_id, ep='player', query=yt_query,
2391             ytcfg=player_ytcfg, headers=headers, fatal=False,
2392             default_client=self._YT_CLIENTS[client],
2393             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2394         ) or None
2395
2396     def _extract_age_gated_player_response(self, client, video_id, ytcfg, identity_token, player_url, initial_pr):
2397         # get_video_info endpoint seems to be completely dead
2398         gvi_client = None  # self._YT_CLIENTS.get(f'_{client}_agegate')
2399         if gvi_client:
2400             pr = self._parse_json(traverse_obj(
2401                 compat_parse_qs(self._download_webpage(
2402                     self.http_scheme() + '//www.youtube.com/get_video_info', video_id,
2403                     'Refetching age-gated %s info webpage' % gvi_client.lower(),
2404                     'unable to download video info webpage', fatal=False,
2405                     query=self._get_video_info_params(video_id, client=gvi_client))),
2406                 ('player_response', 0), expected_type=str) or '{}', video_id)
2407             if pr:
2408                 return pr
2409             self.report_warning('Falling back to embedded-only age-gate workaround')
2410
2411         if not self._YT_CLIENTS.get(f'_{client}_embedded'):
2412             return
2413         embed_webpage = None
2414         if client == 'web' and 'configs' not in self._configuration_arg('player_skip'):
2415             embed_webpage = self._download_webpage(
2416                 'https://www.youtube.com/embed/%s?html5=1' % video_id,
2417                 video_id=video_id, note=f'Downloading age-gated {client} embed config')
2418
2419         ytcfg_age = self.extract_ytcfg(video_id, embed_webpage) or {}
2420         # If we extracted the embed webpage, it'll tell us if we can view the video
2421         embedded_pr = self._parse_json(
2422             traverse_obj(ytcfg_age, ('PLAYER_VARS', 'embedded_player_response'), expected_type=str) or '{}',
2423             video_id=video_id)
2424         embedded_ps_reason = traverse_obj(embedded_pr, ('playabilityStatus', 'reason'), expected_type=str) or ''
2425         if embedded_ps_reason in self._AGE_GATE_REASONS:
2426             return
2427         return self._extract_player_response(
2428             f'_{client}_embedded', video_id,
2429             ytcfg_age or ytcfg, ytcfg_age if client == 'web' else {},
2430             identity_token, player_url, initial_pr)
2431
2432     def _get_requested_clients(self, url, smuggled_data):
2433         requested_clients = [client for client in self._configuration_arg('player_client')
2434                              if client[:0] != '_' and client in self._YT_CLIENTS]
2435         if not requested_clients:
2436             requested_clients = ['android', 'web']
2437
2438         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2439             requested_clients.extend(
2440                 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
2441
2442         return orderedSet(requested_clients)
2443
2444     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2445         initial_pr = None
2446         if webpage:
2447             initial_pr = self._extract_yt_initial_variable(
2448                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2449                 video_id, 'initial player response')
2450
2451         for client in clients:
2452             player_ytcfg = master_ytcfg if client == 'web' else {}
2453             if client == 'web' and initial_pr:
2454                 pr = initial_pr
2455             else:
2456                 if client == 'web_music' and 'configs' not in self._configuration_arg('player_skip'):
2457                     ytm_webpage = self._download_webpage(
2458                         'https://music.youtube.com',
2459                         video_id, fatal=False, note='Downloading remix client config')
2460                     player_ytcfg = self.extract_ytcfg(video_id, ytm_webpage) or {}
2461                 pr = self._extract_player_response(
2462                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr)
2463             if pr:
2464                 yield pr
2465             if traverse_obj(pr, ('playabilityStatus', 'reason')) in self._AGE_GATE_REASONS:
2466                 pr = self._extract_age_gated_player_response(
2467                     client, video_id, player_ytcfg or master_ytcfg, identity_token, player_url, initial_pr)
2468                 if pr:
2469                     yield pr
2470         # Android player_response does not have microFormats which are needed for
2471         # extraction of some data. So we return the initial_pr with formats
2472         # stripped out even if not requested by the user
2473         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2474         if initial_pr and 'web' not in clients:
2475             initial_pr['streamingData'] = None
2476             yield initial_pr
2477
2478     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2479         itags, stream_ids = [], []
2480         itag_qualities = {}
2481         q = qualities([
2482             # "tiny" is the smallest video-only format. But some audio-only formats
2483             # was also labeled "tiny". It is not clear if such formats still exist
2484             'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2485             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2486         ])
2487         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2488
2489         for fmt in streaming_formats:
2490             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2491                 continue
2492
2493             itag = str_or_none(fmt.get('itag'))
2494             audio_track = fmt.get('audioTrack') or {}
2495             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2496             if stream_id in stream_ids:
2497                 continue
2498
2499             quality = fmt.get('quality')
2500             if quality == 'tiny' or not quality:
2501                 quality = fmt.get('audioQuality', '').lower() or quality
2502             if itag and quality:
2503                 itag_qualities[itag] = quality
2504             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2505             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2506             # number of fragment that would subsequently requested with (`&sq=N`)
2507             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2508                 continue
2509
2510             fmt_url = fmt.get('url')
2511             if not fmt_url:
2512                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2513                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2514                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2515                 if not (sc and fmt_url and encrypted_sig):
2516                     continue
2517                 if not player_url:
2518                     continue
2519                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2520                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2521                 fmt_url += '&' + sp + '=' + signature
2522
2523             if itag:
2524                 itags.append(itag)
2525                 stream_ids.append(stream_id)
2526
2527             tbr = float_or_none(
2528                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2529             dct = {
2530                 'asr': int_or_none(fmt.get('audioSampleRate')),
2531                 'filesize': int_or_none(fmt.get('contentLength')),
2532                 'format_id': itag,
2533                 'format_note': ', '.join(filter(None, (
2534                     audio_track.get('displayName'), fmt.get('qualityLabel') or quality))),
2535                 'fps': int_or_none(fmt.get('fps')),
2536                 'height': int_or_none(fmt.get('height')),
2537                 'quality': q(quality),
2538                 'tbr': tbr,
2539                 'url': fmt_url,
2540                 'width': fmt.get('width'),
2541                 'language': audio_track.get('id', '').split('.')[0],
2542             }
2543             mime_mobj = re.match(
2544                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2545             if mime_mobj:
2546                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2547                 dct.update(parse_codecs(mime_mobj.group(2)))
2548                 # The 3gp format in android client has a quality of "small",
2549                 # but is actually worse than all other formats
2550                 if dct['ext'] == '3gp':
2551                     dct['quality'] = q('tiny')
2552                     dct['preference'] = -10
2553             no_audio = dct.get('acodec') == 'none'
2554             no_video = dct.get('vcodec') == 'none'
2555             if no_audio:
2556                 dct['vbr'] = tbr
2557             if no_video:
2558                 dct['abr'] = tbr
2559             if no_audio or no_video:
2560                 dct['downloader_options'] = {
2561                     # Youtube throttles chunks >~10M
2562                     'http_chunk_size': 10485760,
2563                 }
2564                 if dct.get('ext'):
2565                     dct['container'] = dct['ext'] + '_dash'
2566             yield dct
2567
2568         skip_manifests = self._configuration_arg('skip')
2569         get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2570         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2571
2572         for sd in streaming_data:
2573             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2574             if hls_manifest_url:
2575                 for f in self._extract_m3u8_formats(
2576                         hls_manifest_url, video_id, 'mp4', fatal=False):
2577                     itag = self._search_regex(
2578                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2579                     if itag in itags:
2580                         continue
2581                     if itag:
2582                         f['format_id'] = itag
2583                         itags.append(itag)
2584                     yield f
2585
2586             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2587             if dash_manifest_url:
2588                 for f in self._extract_mpd_formats(
2589                         dash_manifest_url, video_id, fatal=False):
2590                     itag = f['format_id']
2591                     if itag in itags:
2592                         continue
2593                     if itag:
2594                         itags.append(itag)
2595                     if itag in itag_qualities:
2596                         f['quality'] = q(itag_qualities[itag])
2597                     filesize = int_or_none(self._search_regex(
2598                         r'/clen/(\d+)', f.get('fragment_base_url')
2599                         or f['url'], 'file size', default=None))
2600                     if filesize:
2601                         f['filesize'] = filesize
2602                     yield f
2603
2604     def _real_extract(self, url):
2605         url, smuggled_data = unsmuggle_url(url, {})
2606         video_id = self._match_id(url)
2607
2608         base_url = self.http_scheme() + '//www.youtube.com/'
2609         webpage_url = base_url + 'watch?v=' + video_id
2610         webpage = self._download_webpage(
2611             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2612
2613         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2614         player_url = self._extract_player_url(master_ytcfg, webpage)
2615         identity_token = self._extract_identity_token(webpage, video_id)
2616
2617         player_responses = list(self._extract_player_responses(
2618             self._get_requested_clients(url, smuggled_data),
2619             video_id, webpage, master_ytcfg, player_url, identity_token))
2620
2621         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2622
2623         playability_statuses = traverse_obj(
2624             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2625
2626         trailer_video_id = get_first(
2627             playability_statuses,
2628             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2629             expected_type=str)
2630         if trailer_video_id:
2631             return self.url_result(
2632                 trailer_video_id, self.ie_key(), trailer_video_id)
2633
2634         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2635                        if webpage else (lambda x: None))
2636
2637         video_details = traverse_obj(
2638             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2639         microformats = traverse_obj(
2640             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2641             expected_type=dict, default=[])
2642         video_title = (
2643             get_first(video_details, 'title')
2644             or self._get_text(microformats, (..., 'title'))
2645             or search_meta(['og:title', 'twitter:title', 'title']))
2646         video_description = get_first(video_details, 'shortDescription')
2647
2648         if not smuggled_data.get('force_singlefeed', False):
2649             if not self.get_param('noplaylist'):
2650                 multifeed_metadata_list = get_first(
2651                     player_responses,
2652                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2653                     expected_type=str)
2654                 if multifeed_metadata_list:
2655                     entries = []
2656                     feed_ids = []
2657                     for feed in multifeed_metadata_list.split(','):
2658                         # Unquote should take place before split on comma (,) since textual
2659                         # fields may contain comma as well (see
2660                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2661                         feed_data = compat_parse_qs(
2662                             compat_urllib_parse_unquote_plus(feed))
2663
2664                         def feed_entry(name):
2665                             return try_get(
2666                                 feed_data, lambda x: x[name][0], compat_str)
2667
2668                         feed_id = feed_entry('id')
2669                         if not feed_id:
2670                             continue
2671                         feed_title = feed_entry('title')
2672                         title = video_title
2673                         if feed_title:
2674                             title += ' (%s)' % feed_title
2675                         entries.append({
2676                             '_type': 'url_transparent',
2677                             'ie_key': 'Youtube',
2678                             'url': smuggle_url(
2679                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2680                                 {'force_singlefeed': True}),
2681                             'title': title,
2682                         })
2683                         feed_ids.append(feed_id)
2684                     self.to_screen(
2685                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2686                         % (', '.join(feed_ids), video_id))
2687                     return self.playlist_result(
2688                         entries, video_id, video_title, video_description)
2689             else:
2690                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2691
2692         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2693         is_live = get_first(video_details, 'isLive')
2694         if is_live is None:
2695             is_live = get_first(live_broadcast_details, 'isLiveNow')
2696
2697         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2698         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2699
2700         if not formats:
2701             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2702                 self.raise_no_formats(
2703                     'This video is DRM protected.', expected=True)
2704             pemr = get_first(
2705                 playability_statuses,
2706                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2707             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2708             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2709             if subreason:
2710                 if subreason == 'The uploader has not made this video available in your country.':
2711                     countries = get_first(microformats, 'availableCountries')
2712                     if not countries:
2713                         regions_allowed = search_meta('regionsAllowed')
2714                         countries = regions_allowed.split(',') if regions_allowed else None
2715                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2716                 reason += f'. {subreason}'
2717             if reason:
2718                 self.raise_no_formats(reason, expected=True)
2719
2720         for f in formats:
2721             # TODO: detect if throttled
2722             if '&n=' in f['url']:  # possibly throttled
2723                 f['source_preference'] = -10
2724                 # note = f.get('format_note')
2725                 # f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2726
2727         self._sort_formats(formats)
2728
2729         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2730         if not keywords and webpage:
2731             keywords = [
2732                 unescapeHTML(m.group('content'))
2733                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2734         for keyword in keywords:
2735             if keyword.startswith('yt:stretch='):
2736                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2737                 if mobj:
2738                     # NB: float is intentional for forcing float division
2739                     w, h = (float(v) for v in mobj.groups())
2740                     if w > 0 and h > 0:
2741                         ratio = w / h
2742                         for f in formats:
2743                             if f.get('vcodec') != 'none':
2744                                 f['stretched_ratio'] = ratio
2745                         break
2746
2747         thumbnails = []
2748         thumbnail_dicts = traverse_obj(
2749             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2750             expected_type=dict, default=[])
2751         for thumbnail in thumbnail_dicts:
2752             thumbnail_url = thumbnail.get('url')
2753             if not thumbnail_url:
2754                 continue
2755             # Sometimes youtube gives a wrong thumbnail URL. See:
2756             # https://github.com/yt-dlp/yt-dlp/issues/233
2757             # https://github.com/ytdl-org/youtube-dl/issues/28023
2758             if 'maxresdefault' in thumbnail_url:
2759                 thumbnail_url = thumbnail_url.split('?')[0]
2760             thumbnails.append({
2761                 'url': thumbnail_url,
2762                 'height': int_or_none(thumbnail.get('height')),
2763                 'width': int_or_none(thumbnail.get('width')),
2764             })
2765         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2766         if thumbnail_url:
2767             thumbnails.append({
2768                 'url': thumbnail_url,
2769             })
2770         # The best resolution thumbnails sometimes does not appear in the webpage
2771         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2772         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2773         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2774         # TODO: Test them also? - For some videos, even these don't exist
2775         guaranteed_thumbnail_names = [
2776             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2777             'mqdefault', 'mq1', 'mq2', 'mq3',
2778             'default', '1', '2', '3'
2779         ]
2780         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2781         n_thumbnail_names = len(thumbnail_names)
2782
2783         thumbnails.extend({
2784             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2785                 video_id=video_id, name=name, ext=ext,
2786                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2787             '_test_url': name in hq_thumbnail_names,
2788         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2789         for thumb in thumbnails:
2790             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2791             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2792         self._remove_duplicate_formats(thumbnails)
2793
2794         category = get_first(microformats, 'category') or search_meta('genre')
2795         channel_id = str_or_none(
2796             get_first(video_details, 'channelId')
2797             or get_first(microformats, 'externalChannelId')
2798             or search_meta('channelId'))
2799         duration = int_or_none(
2800             get_first(video_details, 'lengthSeconds')
2801             or get_first(microformats, 'lengthSeconds')
2802             or parse_duration(search_meta('duration'))) or None
2803         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2804
2805         live_content = get_first(video_details, 'isLiveContent')
2806         is_upcoming = get_first(video_details, 'isUpcoming')
2807         if is_live is None:
2808             if is_upcoming or live_content is False:
2809                 is_live = False
2810         if is_upcoming is None and (live_content or is_live):
2811             is_upcoming = False
2812         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2813         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2814         if not duration and live_endtime and live_starttime:
2815             duration = live_endtime - live_starttime
2816
2817         info = {
2818             'id': video_id,
2819             'title': self._live_title(video_title) if is_live else video_title,
2820             'formats': formats,
2821             'thumbnails': thumbnails,
2822             'description': video_description,
2823             'upload_date': unified_strdate(
2824                 get_first(microformats, 'uploadDate')
2825                 or search_meta('uploadDate')),
2826             'uploader': get_first(video_details, 'author'),
2827             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2828             'uploader_url': owner_profile_url,
2829             'channel_id': channel_id,
2830             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2831             'duration': duration,
2832             'view_count': int_or_none(
2833                 get_first((video_details, microformats), (..., 'viewCount'))
2834                 or search_meta('interactionCount')),
2835             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2836             'age_limit': 18 if (
2837                 get_first(microformats, 'isFamilySafe') is False
2838                 or search_meta('isFamilyFriendly') == 'false'
2839                 or search_meta('og:restrictions:age') == '18+') else 0,
2840             'webpage_url': webpage_url,
2841             'categories': [category] if category else None,
2842             'tags': keywords,
2843             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2844             'is_live': is_live,
2845             'was_live': (False if is_live or is_upcoming or live_content is False
2846                          else None if is_live is None or is_upcoming is None
2847                          else live_content),
2848             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2849             'release_timestamp': live_starttime,
2850         }
2851
2852         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2853         # Converted into dicts to remove duplicates
2854         captions = {
2855             sub.get('baseUrl'): sub
2856             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2857         translation_languages = {
2858             lang.get('languageCode'): lang.get('languageName')
2859             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2860         subtitles = {}
2861         if pctr:
2862             def process_language(container, base_url, lang_code, sub_name, query):
2863                 lang_subs = container.setdefault(lang_code, [])
2864                 for fmt in self._SUBTITLE_FORMATS:
2865                     query.update({
2866                         'fmt': fmt,
2867                     })
2868                     lang_subs.append({
2869                         'ext': fmt,
2870                         'url': update_url_query(base_url, query),
2871                         'name': sub_name,
2872                     })
2873
2874             for base_url, caption_track in captions.items():
2875                 if not base_url:
2876                     continue
2877                 if caption_track.get('kind') != 'asr':
2878                     lang_code = (
2879                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2880                         or caption_track.get('languageCode'))
2881                     if not lang_code:
2882                         continue
2883                     process_language(
2884                         subtitles, base_url, lang_code,
2885                         traverse_obj(caption_track, ('name', 'simpleText')),
2886                         {})
2887                     continue
2888                 automatic_captions = {}
2889                 for trans_code, trans_name in translation_languages.items():
2890                     if not trans_code:
2891                         continue
2892                     process_language(
2893                         automatic_captions, base_url, trans_code,
2894                         self._get_text(trans_name, max_runs=1),
2895                         {'tlang': trans_code})
2896                 info['automatic_captions'] = automatic_captions
2897         info['subtitles'] = subtitles
2898
2899         parsed_url = compat_urllib_parse_urlparse(url)
2900         for component in [parsed_url.fragment, parsed_url.query]:
2901             query = compat_parse_qs(component)
2902             for k, v in query.items():
2903                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2904                     d_k += '_time'
2905                     if d_k not in info and k in s_ks:
2906                         info[d_k] = parse_duration(query[k][0])
2907
2908         # Youtube Music Auto-generated description
2909         if video_description:
2910             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2911             if mobj:
2912                 release_year = mobj.group('release_year')
2913                 release_date = mobj.group('release_date')
2914                 if release_date:
2915                     release_date = release_date.replace('-', '')
2916                     if not release_year:
2917                         release_year = release_date[:4]
2918                 info.update({
2919                     'album': mobj.group('album'.strip()),
2920                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2921                     'track': mobj.group('track').strip(),
2922                     'release_date': release_date,
2923                     'release_year': int_or_none(release_year),
2924                 })
2925
2926         initial_data = None
2927         if webpage:
2928             initial_data = self._extract_yt_initial_variable(
2929                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2930                 'yt initial data')
2931         if not initial_data:
2932             headers = self.generate_api_headers(
2933                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2934                 session_index=self._extract_session_index(master_ytcfg))
2935
2936             initial_data = self._extract_response(
2937                 item_id=video_id, ep='next', fatal=False,
2938                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
2939                 note='Downloading initial data API JSON')
2940
2941         try:
2942             # This will error if there is no livechat
2943             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2944             info['subtitles']['live_chat'] = [{
2945                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2946                 'video_id': video_id,
2947                 'ext': 'json',
2948                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2949             }]
2950         except (KeyError, IndexError, TypeError):
2951             pass
2952
2953         if initial_data:
2954             info['chapters'] = (
2955                 self._extract_chapters_from_json(initial_data, duration)
2956                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2957                 or None)
2958
2959             contents = try_get(
2960                 initial_data,
2961                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2962                 list) or []
2963             for content in contents:
2964                 vpir = content.get('videoPrimaryInfoRenderer')
2965                 if vpir:
2966                     stl = vpir.get('superTitleLink')
2967                     if stl:
2968                         stl = self._get_text(stl)
2969                         if try_get(
2970                                 vpir,
2971                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2972                             info['location'] = stl
2973                         else:
2974                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2975                             if mobj:
2976                                 info.update({
2977                                     'series': mobj.group(1),
2978                                     'season_number': int(mobj.group(2)),
2979                                     'episode_number': int(mobj.group(3)),
2980                                 })
2981                     for tlb in (try_get(
2982                             vpir,
2983                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2984                             list) or []):
2985                         tbr = tlb.get('toggleButtonRenderer') or {}
2986                         for getter, regex in [(
2987                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2988                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2989                                     lambda x: x['accessibility'],
2990                                     lambda x: x['accessibilityData']['accessibilityData'],
2991                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2992                             label = (try_get(tbr, getter, dict) or {}).get('label')
2993                             if label:
2994                                 mobj = re.match(regex, label)
2995                                 if mobj:
2996                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2997                                     break
2998                     sbr_tooltip = try_get(
2999                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3000                     if sbr_tooltip:
3001                         like_count, dislike_count = sbr_tooltip.split(' / ')
3002                         info.update({
3003                             'like_count': str_to_int(like_count),
3004                             'dislike_count': str_to_int(dislike_count),
3005                         })
3006                 vsir = content.get('videoSecondaryInfoRenderer')
3007                 if vsir:
3008                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3009                     rows = try_get(
3010                         vsir,
3011                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3012                         list) or []
3013                     multiple_songs = False
3014                     for row in rows:
3015                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3016                             multiple_songs = True
3017                             break
3018                     for row in rows:
3019                         mrr = row.get('metadataRowRenderer') or {}
3020                         mrr_title = mrr.get('title')
3021                         if not mrr_title:
3022                             continue
3023                         mrr_title = self._get_text(mrr, 'title')
3024                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3025                         if mrr_title == 'License':
3026                             info['license'] = mrr_contents_text
3027                         elif not multiple_songs:
3028                             if mrr_title == 'Album':
3029                                 info['album'] = mrr_contents_text
3030                             elif mrr_title == 'Artist':
3031                                 info['artist'] = mrr_contents_text
3032                             elif mrr_title == 'Song':
3033                                 info['track'] = mrr_contents_text
3034
3035         fallbacks = {
3036             'channel': 'uploader',
3037             'channel_id': 'uploader_id',
3038             'channel_url': 'uploader_url',
3039         }
3040         for to, frm in fallbacks.items():
3041             if not info.get(to):
3042                 info[to] = info.get(frm)
3043
3044         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3045             v = info.get(s_k)
3046             if v:
3047                 info[d_k] = v
3048
3049         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3050         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3051         is_membersonly = None
3052         is_premium = None
3053         if initial_data and is_private is not None:
3054             is_membersonly = False
3055             is_premium = False
3056             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3057             badge_labels = set()
3058             for content in contents:
3059                 if not isinstance(content, dict):
3060                     continue
3061                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3062             for badge_label in badge_labels:
3063                 if badge_label.lower() == 'members only':
3064                     is_membersonly = True
3065                 elif badge_label.lower() == 'premium':
3066                     is_premium = True
3067                 elif badge_label.lower() == 'unlisted':
3068                     is_unlisted = True
3069
3070         info['availability'] = self._availability(
3071             is_private=is_private,
3072             needs_premium=is_premium,
3073             needs_subscription=is_membersonly,
3074             needs_auth=info['age_limit'] >= 18,
3075             is_unlisted=None if is_private is None else is_unlisted)
3076
3077         # get xsrf for annotations or comments
3078         get_annotations = self.get_param('writeannotations', False)
3079         get_comments = self.get_param('getcomments', False)
3080         if get_annotations or get_comments:
3081             xsrf_token = None
3082             if master_ytcfg:
3083                 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
3084             if not xsrf_token:
3085                 xsrf_token = self._search_regex(
3086                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3087                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
3088
3089         # annotations
3090         if get_annotations:
3091             invideo_url = get_first(
3092                 player_responses,
3093                 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3094                 expected_type=str)
3095             if xsrf_token and invideo_url:
3096                 xsrf_field_name = None
3097                 if master_ytcfg:
3098                     xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3099                 if not xsrf_field_name:
3100                     xsrf_field_name = self._search_regex(
3101                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3102                         webpage, 'xsrf field name',
3103                         group='xsrf_field_name', default='session_token')
3104                 info['annotations'] = self._download_webpage(
3105                     self._proto_relative_url(invideo_url),
3106                     video_id, note='Downloading annotations',
3107                     errnote='Unable to download video annotations', fatal=False,
3108                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3109
3110         if get_comments:
3111             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3112
3113         self.mark_watched(video_id, player_responses)
3114
3115         return info
3116
3117
3118 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3119     IE_DESC = 'YouTube.com tab'
3120     _VALID_URL = r'''(?x)
3121                     https?://
3122                         (?:\w+\.)?
3123                         (?:
3124                             youtube(?:kids)?\.com|
3125                             invidio\.us
3126                         )/
3127                         (?:
3128                             (?P<channel_type>channel|c|user|browse)/|
3129                             (?P<not_channel>
3130                                 feed/|hashtag/|
3131                                 (?:playlist|watch)\?.*?\blist=
3132                             )|
3133                             (?!(?:%s)\b)  # Direct URLs
3134                         )
3135                         (?P<id>[^/?\#&]+)
3136                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3137     IE_NAME = 'youtube:tab'
3138
3139     _TESTS = [{
3140         'note': 'playlists, multipage',
3141         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3142         'playlist_mincount': 94,
3143         'info_dict': {
3144             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3145             'title': 'Игорь Клейнер - Playlists',
3146             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3147             'uploader': 'Игорь Клейнер',
3148             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3149         },
3150     }, {
3151         'note': 'playlists, multipage, different order',
3152         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3153         'playlist_mincount': 94,
3154         'info_dict': {
3155             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3156             'title': 'Игорь Клейнер - Playlists',
3157             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3158             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3159             'uploader': 'Игорь Клейнер',
3160         },
3161     }, {
3162         'note': 'playlists, series',
3163         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3164         'playlist_mincount': 5,
3165         'info_dict': {
3166             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3167             'title': '3Blue1Brown - Playlists',
3168             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3169             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3170             'uploader': '3Blue1Brown',
3171         },
3172     }, {
3173         'note': 'playlists, singlepage',
3174         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3175         'playlist_mincount': 4,
3176         'info_dict': {
3177             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3178             'title': 'ThirstForScience - Playlists',
3179             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3180             'uploader': 'ThirstForScience',
3181             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3182         }
3183     }, {
3184         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3185         'only_matching': True,
3186     }, {
3187         'note': 'basic, single video playlist',
3188         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3189         'info_dict': {
3190             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3191             'uploader': 'Sergey M.',
3192             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3193             'title': 'youtube-dl public playlist',
3194         },
3195         'playlist_count': 1,
3196     }, {
3197         'note': 'empty playlist',
3198         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3199         'info_dict': {
3200             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3201             'uploader': 'Sergey M.',
3202             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3203             'title': 'youtube-dl empty playlist',
3204         },
3205         'playlist_count': 0,
3206     }, {
3207         'note': 'Home tab',
3208         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3209         'info_dict': {
3210             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3211             'title': 'lex will - Home',
3212             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3213             'uploader': 'lex will',
3214             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3215         },
3216         'playlist_mincount': 2,
3217     }, {
3218         'note': 'Videos tab',
3219         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3220         'info_dict': {
3221             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3222             'title': 'lex will - Videos',
3223             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3224             'uploader': 'lex will',
3225             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3226         },
3227         'playlist_mincount': 975,
3228     }, {
3229         'note': 'Videos tab, sorted by popular',
3230         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3231         'info_dict': {
3232             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3233             'title': 'lex will - Videos',
3234             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3235             'uploader': 'lex will',
3236             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3237         },
3238         'playlist_mincount': 199,
3239     }, {
3240         'note': 'Playlists tab',
3241         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3242         'info_dict': {
3243             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3244             'title': 'lex will - Playlists',
3245             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3246             'uploader': 'lex will',
3247             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3248         },
3249         'playlist_mincount': 17,
3250     }, {
3251         'note': 'Community tab',
3252         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3253         'info_dict': {
3254             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3255             'title': 'lex will - Community',
3256             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3257             'uploader': 'lex will',
3258             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3259         },
3260         'playlist_mincount': 18,
3261     }, {
3262         'note': 'Channels tab',
3263         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3264         'info_dict': {
3265             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3266             'title': 'lex will - Channels',
3267             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3268             'uploader': 'lex will',
3269             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3270         },
3271         'playlist_mincount': 12,
3272     }, {
3273         'note': 'Search tab',
3274         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3275         'playlist_mincount': 40,
3276         'info_dict': {
3277             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3278             'title': '3Blue1Brown - Search - linear algebra',
3279             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3280             'uploader': '3Blue1Brown',
3281             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3282         },
3283     }, {
3284         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3285         'only_matching': True,
3286     }, {
3287         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3288         'only_matching': True,
3289     }, {
3290         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3291         'only_matching': True,
3292     }, {
3293         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3294         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3295         'info_dict': {
3296             'title': '29C3: Not my department',
3297             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3298             'uploader': 'Christiaan008',
3299             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3300             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3301         },
3302         'playlist_count': 96,
3303     }, {
3304         'note': 'Large playlist',
3305         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3306         'info_dict': {
3307             'title': 'Uploads from Cauchemar',
3308             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3309             'uploader': 'Cauchemar',
3310             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3311         },
3312         'playlist_mincount': 1123,
3313     }, {
3314         'note': 'even larger playlist, 8832 videos',
3315         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3316         'only_matching': True,
3317     }, {
3318         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3319         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3320         'info_dict': {
3321             'title': 'Uploads from Interstellar Movie',
3322             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3323             'uploader': 'Interstellar Movie',
3324             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3325         },
3326         'playlist_mincount': 21,
3327     }, {
3328         'note': 'Playlist with "show unavailable videos" button',
3329         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3330         'info_dict': {
3331             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3332             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3333             'uploader': 'Phim Siêu Nhân Nhật Bản',
3334             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3335         },
3336         'playlist_mincount': 200,
3337     }, {
3338         'note': 'Playlist with unavailable videos in page 7',
3339         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3340         'info_dict': {
3341             'title': 'Uploads from BlankTV',
3342             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3343             'uploader': 'BlankTV',
3344             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3345         },
3346         'playlist_mincount': 1000,
3347     }, {
3348         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3349         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3350         'info_dict': {
3351             'title': 'Data Analysis with Dr Mike Pound',
3352             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3353             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3354             'uploader': 'Computerphile',
3355             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3356         },
3357         'playlist_mincount': 11,
3358     }, {
3359         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3360         'only_matching': True,
3361     }, {
3362         'note': 'Playlist URL that does not actually serve a playlist',
3363         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3364         'info_dict': {
3365             'id': 'FqZTN594JQw',
3366             'ext': 'webm',
3367             'title': "Smiley's People 01 detective, Adventure Series, Action",
3368             'uploader': 'STREEM',
3369             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3370             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3371             'upload_date': '20150526',
3372             'license': 'Standard YouTube License',
3373             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3374             'categories': ['People & Blogs'],
3375             'tags': list,
3376             'view_count': int,
3377             'like_count': int,
3378             'dislike_count': int,
3379         },
3380         'params': {
3381             'skip_download': True,
3382         },
3383         'skip': 'This video is not available.',
3384         'add_ie': [YoutubeIE.ie_key()],
3385     }, {
3386         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3387         'only_matching': True,
3388     }, {
3389         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3390         'only_matching': True,
3391     }, {
3392         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3393         'info_dict': {
3394             'id': 'FMtPN8yp5LU',  # This will keep changing
3395             'ext': 'mp4',
3396             'title': compat_str,
3397             'uploader': 'Sky News',
3398             'uploader_id': 'skynews',
3399             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3400             'upload_date': r're:\d{8}',
3401             'description': compat_str,
3402             'categories': ['News & Politics'],
3403             'tags': list,
3404             'like_count': int,
3405             'dislike_count': int,
3406         },
3407         'params': {
3408             'skip_download': True,
3409         },
3410         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3411     }, {
3412         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3413         'info_dict': {
3414             'id': 'a48o2S1cPoo',
3415             'ext': 'mp4',
3416             'title': 'The Young Turks - Live Main Show',
3417             'uploader': 'The Young Turks',
3418             'uploader_id': 'TheYoungTurks',
3419             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3420             'upload_date': '20150715',
3421             'license': 'Standard YouTube License',
3422             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3423             'categories': ['News & Politics'],
3424             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3425             'like_count': int,
3426             'dislike_count': int,
3427         },
3428         'params': {
3429             'skip_download': True,
3430         },
3431         'only_matching': True,
3432     }, {
3433         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3434         'only_matching': True,
3435     }, {
3436         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3437         'only_matching': True,
3438     }, {
3439         'note': 'A channel that is not live. Should raise error',
3440         'url': 'https://www.youtube.com/user/numberphile/live',
3441         'only_matching': True,
3442     }, {
3443         'url': 'https://www.youtube.com/feed/trending',
3444         'only_matching': True,
3445     }, {
3446         'url': 'https://www.youtube.com/feed/library',
3447         'only_matching': True,
3448     }, {
3449         'url': 'https://www.youtube.com/feed/history',
3450         'only_matching': True,
3451     }, {
3452         'url': 'https://www.youtube.com/feed/subscriptions',
3453         'only_matching': True,
3454     }, {
3455         'url': 'https://www.youtube.com/feed/watch_later',
3456         'only_matching': True,
3457     }, {
3458         'note': 'Recommended - redirects to home page',
3459         'url': 'https://www.youtube.com/feed/recommended',
3460         'only_matching': True,
3461     }, {
3462         'note': 'inline playlist with not always working continuations',
3463         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3464         'only_matching': True,
3465     }, {
3466         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3467         'only_matching': True,
3468     }, {
3469         'url': 'https://www.youtube.com/course',
3470         'only_matching': True,
3471     }, {
3472         'url': 'https://www.youtube.com/zsecurity',
3473         'only_matching': True,
3474     }, {
3475         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3476         'only_matching': True,
3477     }, {
3478         'url': 'https://www.youtube.com/TheYoungTurks/live',
3479         'only_matching': True,
3480     }, {
3481         'url': 'https://www.youtube.com/hashtag/cctv9',
3482         'info_dict': {
3483             'id': 'cctv9',
3484             'title': '#cctv9',
3485         },
3486         'playlist_mincount': 350,
3487     }, {
3488         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3489         'only_matching': True,
3490     }, {
3491         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3492         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3493         'only_matching': True
3494     }, {
3495         'note': '/browse/ should redirect to /channel/',
3496         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3497         'only_matching': True
3498     }, {
3499         'note': 'VLPL, should redirect to playlist?list=PL...',
3500         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3501         'info_dict': {
3502             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3503             'uploader': 'NoCopyrightSounds',
3504             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3505             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3506             'title': 'NCS Releases',
3507         },
3508         'playlist_mincount': 166,
3509     }, {
3510         'note': 'Topic, should redirect to playlist?list=UU...',
3511         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3512         'info_dict': {
3513             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3514             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3515             'title': 'Uploads from Royalty Free Music - Topic',
3516             'uploader': 'Royalty Free Music - Topic',
3517         },
3518         'expected_warnings': [
3519             'A channel/user page was given',
3520             'The URL does not have a videos tab',
3521         ],
3522         'playlist_mincount': 101,
3523     }, {
3524         'note': 'Topic without a UU playlist',
3525         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3526         'info_dict': {
3527             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3528             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3529         },
3530         'expected_warnings': [
3531             'A channel/user page was given',
3532             'The URL does not have a videos tab',
3533             'Falling back to channel URL',
3534         ],
3535         'playlist_mincount': 9,
3536     }, {
3537         'note': 'Youtube music Album',
3538         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3539         'info_dict': {
3540             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3541             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3542         },
3543         'playlist_count': 50,
3544     }, {
3545         'note': 'unlisted single video playlist',
3546         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3547         'info_dict': {
3548             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3549             'uploader': 'colethedj',
3550             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3551             'title': 'yt-dlp unlisted playlist test',
3552             'availability': 'unlisted'
3553         },
3554         'playlist_count': 1,
3555     }]
3556
3557     @classmethod
3558     def suitable(cls, url):
3559         return False if YoutubeIE.suitable(url) else super(
3560             YoutubeTabIE, cls).suitable(url)
3561
3562     def _extract_channel_id(self, webpage):
3563         channel_id = self._html_search_meta(
3564             'channelId', webpage, 'channel id', default=None)
3565         if channel_id:
3566             return channel_id
3567         channel_url = self._html_search_meta(
3568             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3569              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3570              'twitter:app:url:googleplay'), webpage, 'channel url')
3571         return self._search_regex(
3572             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3573             channel_url, 'channel id')
3574
3575     @staticmethod
3576     def _extract_basic_item_renderer(item):
3577         # Modified from _extract_grid_item_renderer
3578         known_basic_renderers = (
3579             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3580         )
3581         for key, renderer in item.items():
3582             if not isinstance(renderer, dict):
3583                 continue
3584             elif key in known_basic_renderers:
3585                 return renderer
3586             elif key.startswith('grid') and key.endswith('Renderer'):
3587                 return renderer
3588
3589     def _grid_entries(self, grid_renderer):
3590         for item in grid_renderer['items']:
3591             if not isinstance(item, dict):
3592                 continue
3593             renderer = self._extract_basic_item_renderer(item)
3594             if not isinstance(renderer, dict):
3595                 continue
3596             title = self._get_text(renderer, 'title')
3597
3598             # playlist
3599             playlist_id = renderer.get('playlistId')
3600             if playlist_id:
3601                 yield self.url_result(
3602                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3603                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3604                     video_title=title)
3605                 continue
3606             # video
3607             video_id = renderer.get('videoId')
3608             if video_id:
3609                 yield self._extract_video(renderer)
3610                 continue
3611             # channel
3612             channel_id = renderer.get('channelId')
3613             if channel_id:
3614                 yield self.url_result(
3615                     'https://www.youtube.com/channel/%s' % channel_id,
3616                     ie=YoutubeTabIE.ie_key(), video_title=title)
3617                 continue
3618             # generic endpoint URL support
3619             ep_url = urljoin('https://www.youtube.com/', try_get(
3620                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3621                 compat_str))
3622             if ep_url:
3623                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3624                     if ie.suitable(ep_url):
3625                         yield self.url_result(
3626                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3627                         break
3628
3629     def _shelf_entries_from_content(self, shelf_renderer):
3630         content = shelf_renderer.get('content')
3631         if not isinstance(content, dict):
3632             return
3633         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3634         if renderer:
3635             # TODO: add support for nested playlists so each shelf is processed
3636             # as separate playlist
3637             # TODO: this includes only first N items
3638             for entry in self._grid_entries(renderer):
3639                 yield entry
3640         renderer = content.get('horizontalListRenderer')
3641         if renderer:
3642             # TODO
3643             pass
3644
3645     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3646         ep = try_get(
3647             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3648             compat_str)
3649         shelf_url = urljoin('https://www.youtube.com', ep)
3650         if shelf_url:
3651             # Skipping links to another channels, note that checking for
3652             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3653             # will not work
3654             if skip_channels and '/channels?' in shelf_url:
3655                 return
3656             title = self._get_text(shelf_renderer, 'title')
3657             yield self.url_result(shelf_url, video_title=title)
3658         # Shelf may not contain shelf URL, fallback to extraction from content
3659         for entry in self._shelf_entries_from_content(shelf_renderer):
3660             yield entry
3661
3662     def _playlist_entries(self, video_list_renderer):
3663         for content in video_list_renderer['contents']:
3664             if not isinstance(content, dict):
3665                 continue
3666             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3667             if not isinstance(renderer, dict):
3668                 continue
3669             video_id = renderer.get('videoId')
3670             if not video_id:
3671                 continue
3672             yield self._extract_video(renderer)
3673
3674     def _rich_entries(self, rich_grid_renderer):
3675         renderer = try_get(
3676             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3677         video_id = renderer.get('videoId')
3678         if not video_id:
3679             return
3680         yield self._extract_video(renderer)
3681
3682     def _video_entry(self, video_renderer):
3683         video_id = video_renderer.get('videoId')
3684         if video_id:
3685             return self._extract_video(video_renderer)
3686
3687     def _post_thread_entries(self, post_thread_renderer):
3688         post_renderer = try_get(
3689             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3690         if not post_renderer:
3691             return
3692         # video attachment
3693         video_renderer = try_get(
3694             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3695         video_id = video_renderer.get('videoId')
3696         if video_id:
3697             entry = self._extract_video(video_renderer)
3698             if entry:
3699                 yield entry
3700         # playlist attachment
3701         playlist_id = try_get(
3702             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3703         if playlist_id:
3704             yield self.url_result(
3705                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3706                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3707         # inline video links
3708         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3709         for run in runs:
3710             if not isinstance(run, dict):
3711                 continue
3712             ep_url = try_get(
3713                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3714             if not ep_url:
3715                 continue
3716             if not YoutubeIE.suitable(ep_url):
3717                 continue
3718             ep_video_id = YoutubeIE._match_id(ep_url)
3719             if video_id == ep_video_id:
3720                 continue
3721             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3722
3723     def _post_thread_continuation_entries(self, post_thread_continuation):
3724         contents = post_thread_continuation.get('contents')
3725         if not isinstance(contents, list):
3726             return
3727         for content in contents:
3728             renderer = content.get('backstagePostThreadRenderer')
3729             if not isinstance(renderer, dict):
3730                 continue
3731             for entry in self._post_thread_entries(renderer):
3732                 yield entry
3733
3734     r''' # unused
3735     def _rich_grid_entries(self, contents):
3736         for content in contents:
3737             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3738             if video_renderer:
3739                 entry = self._video_entry(video_renderer)
3740                 if entry:
3741                     yield entry
3742     '''
3743     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3744
3745         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3746             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3747             for content in contents:
3748                 if not isinstance(content, dict):
3749                     continue
3750                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3751                 if not is_renderer:
3752                     renderer = content.get('richItemRenderer')
3753                     if renderer:
3754                         for entry in self._rich_entries(renderer):
3755                             yield entry
3756                         continuation_list[0] = self._extract_continuation(parent_renderer)
3757                     continue
3758                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3759                 for isr_content in isr_contents:
3760                     if not isinstance(isr_content, dict):
3761                         continue
3762
3763                     known_renderers = {
3764                         'playlistVideoListRenderer': self._playlist_entries,
3765                         'gridRenderer': self._grid_entries,
3766                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3767                         'backstagePostThreadRenderer': self._post_thread_entries,
3768                         'videoRenderer': lambda x: [self._video_entry(x)],
3769                     }
3770                     for key, renderer in isr_content.items():
3771                         if key not in known_renderers:
3772                             continue
3773                         for entry in known_renderers[key](renderer):
3774                             if entry:
3775                                 yield entry
3776                         continuation_list[0] = self._extract_continuation(renderer)
3777                         break
3778
3779                 if not continuation_list[0]:
3780                     continuation_list[0] = self._extract_continuation(is_renderer)
3781
3782             if not continuation_list[0]:
3783                 continuation_list[0] = self._extract_continuation(parent_renderer)
3784
3785         continuation_list = [None]  # Python 2 doesnot support nonlocal
3786         tab_content = try_get(tab, lambda x: x['content'], dict)
3787         if not tab_content:
3788             return
3789         parent_renderer = (
3790             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3791             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3792         for entry in extract_entries(parent_renderer):
3793             yield entry
3794         continuation = continuation_list[0]
3795         visitor_data = None
3796
3797         for page_num in itertools.count(1):
3798             if not continuation:
3799                 break
3800             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3801             response = self._extract_response(
3802                 item_id='%s page %s' % (item_id, page_num),
3803                 query=continuation, headers=headers, ytcfg=ytcfg,
3804                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3805
3806             if not response:
3807                 break
3808             visitor_data = try_get(
3809                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3810
3811             known_continuation_renderers = {
3812                 'playlistVideoListContinuation': self._playlist_entries,
3813                 'gridContinuation': self._grid_entries,
3814                 'itemSectionContinuation': self._post_thread_continuation_entries,
3815                 'sectionListContinuation': extract_entries,  # for feeds
3816             }
3817             continuation_contents = try_get(
3818                 response, lambda x: x['continuationContents'], dict) or {}
3819             continuation_renderer = None
3820             for key, value in continuation_contents.items():
3821                 if key not in known_continuation_renderers:
3822                     continue
3823                 continuation_renderer = value
3824                 continuation_list = [None]
3825                 for entry in known_continuation_renderers[key](continuation_renderer):
3826                     yield entry
3827                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3828                 break
3829             if continuation_renderer:
3830                 continue
3831
3832             known_renderers = {
3833                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3834                 'gridVideoRenderer': (self._grid_entries, 'items'),
3835                 'gridChannelRenderer': (self._grid_entries, 'items'),
3836                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3837                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3838                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3839                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3840             }
3841             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3842             continuation_items = try_get(
3843                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3844             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3845             video_items_renderer = None
3846             for key, value in continuation_item.items():
3847                 if key not in known_renderers:
3848                     continue
3849                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3850                 continuation_list = [None]
3851                 for entry in known_renderers[key][0](video_items_renderer):
3852                     yield entry
3853                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3854                 break
3855             if video_items_renderer:
3856                 continue
3857             break
3858
3859     @staticmethod
3860     def _extract_selected_tab(tabs):
3861         for tab in tabs:
3862             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3863             if renderer.get('selected') is True:
3864                 return renderer
3865         else:
3866             raise ExtractorError('Unable to find selected tab')
3867
3868     @classmethod
3869     def _extract_uploader(cls, data):
3870         uploader = {}
3871         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3872         owner = try_get(
3873             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3874         if owner:
3875             uploader['uploader'] = owner.get('text')
3876             uploader['uploader_id'] = try_get(
3877                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3878             uploader['uploader_url'] = urljoin(
3879                 'https://www.youtube.com/',
3880                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3881         return {k: v for k, v in uploader.items() if v is not None}
3882
3883     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3884         playlist_id = title = description = channel_url = channel_name = channel_id = None
3885         thumbnails_list = tags = []
3886
3887         selected_tab = self._extract_selected_tab(tabs)
3888         renderer = try_get(
3889             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3890         if renderer:
3891             channel_name = renderer.get('title')
3892             channel_url = renderer.get('channelUrl')
3893             channel_id = renderer.get('externalId')
3894         else:
3895             renderer = try_get(
3896                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3897
3898         if renderer:
3899             title = renderer.get('title')
3900             description = renderer.get('description', '')
3901             playlist_id = channel_id
3902             tags = renderer.get('keywords', '').split()
3903             thumbnails_list = (
3904                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3905                 or try_get(
3906                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3907                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3908                     list)
3909                 or [])
3910
3911         thumbnails = []
3912         for t in thumbnails_list:
3913             if not isinstance(t, dict):
3914                 continue
3915             thumbnail_url = url_or_none(t.get('url'))
3916             if not thumbnail_url:
3917                 continue
3918             thumbnails.append({
3919                 'url': thumbnail_url,
3920                 'width': int_or_none(t.get('width')),
3921                 'height': int_or_none(t.get('height')),
3922             })
3923         if playlist_id is None:
3924             playlist_id = item_id
3925         if title is None:
3926             title = (
3927                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3928                 or playlist_id)
3929         title += format_field(selected_tab, 'title', ' - %s')
3930         title += format_field(selected_tab, 'expandedText', ' - %s')
3931         metadata = {
3932             'playlist_id': playlist_id,
3933             'playlist_title': title,
3934             'playlist_description': description,
3935             'uploader': channel_name,
3936             'uploader_id': channel_id,
3937             'uploader_url': channel_url,
3938             'thumbnails': thumbnails,
3939             'tags': tags,
3940         }
3941         availability = self._extract_availability(data)
3942         if availability:
3943             metadata['availability'] = availability
3944         if not channel_id:
3945             metadata.update(self._extract_uploader(data))
3946         metadata.update({
3947             'channel': metadata['uploader'],
3948             'channel_id': metadata['uploader_id'],
3949             'channel_url': metadata['uploader_url']})
3950         ytcfg = self.extract_ytcfg(item_id, webpage)
3951         return self.playlist_result(
3952             self._entries(
3953                 selected_tab, playlist_id,
3954                 self._extract_identity_token(webpage, item_id),
3955                 self._extract_account_syncid(ytcfg, data), ytcfg),
3956             **metadata)
3957
3958     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3959         first_id = last_id = None
3960         ytcfg = self.extract_ytcfg(playlist_id, webpage)
3961         headers = self.generate_api_headers(
3962             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3963             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
3964         for page_num in itertools.count(1):
3965             videos = list(self._playlist_entries(playlist))
3966             if not videos:
3967                 return
3968             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3969             if start >= len(videos):
3970                 return
3971             for video in videos[start:]:
3972                 if video['id'] == first_id:
3973                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3974                     return
3975                 yield video
3976             first_id = first_id or videos[0]['id']
3977             last_id = videos[-1]['id']
3978             watch_endpoint = try_get(
3979                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3980             query = {
3981                 'playlistId': playlist_id,
3982                 'videoId': watch_endpoint.get('videoId') or last_id,
3983                 'index': watch_endpoint.get('index') or len(videos),
3984                 'params': watch_endpoint.get('params') or 'OAE%3D'
3985             }
3986             response = self._extract_response(
3987                 item_id='%s page %d' % (playlist_id, page_num),
3988                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3989                 check_get_keys='contents'
3990             )
3991             playlist = try_get(
3992                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3993
3994     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3995         title = playlist.get('title') or try_get(
3996             data, lambda x: x['titleText']['simpleText'], compat_str)
3997         playlist_id = playlist.get('playlistId') or item_id
3998
3999         # Delegating everything except mix playlists to regular tab-based playlist URL
4000         playlist_url = urljoin(url, try_get(
4001             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4002             compat_str))
4003         if playlist_url and playlist_url != url:
4004             return self.url_result(
4005                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4006                 video_title=title)
4007
4008         return self.playlist_result(
4009             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4010             playlist_id=playlist_id, playlist_title=title)
4011
4012     def _extract_availability(self, data):
4013         """
4014         Gets the availability of a given playlist/tab.
4015         Note: Unless YouTube tells us explicitly, we do not assume it is public
4016         @param data: response
4017         """
4018         is_private = is_unlisted = None
4019         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4020         badge_labels = self._extract_badges(renderer)
4021
4022         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4023         privacy_dropdown_entries = try_get(
4024             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4025         for renderer_dict in privacy_dropdown_entries:
4026             is_selected = try_get(
4027                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4028             if not is_selected:
4029                 continue
4030             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4031             if label:
4032                 badge_labels.add(label.lower())
4033                 break
4034
4035         for badge_label in badge_labels:
4036             if badge_label == 'unlisted':
4037                 is_unlisted = True
4038             elif badge_label == 'private':
4039                 is_private = True
4040             elif badge_label == 'public':
4041                 is_unlisted = is_private = False
4042         return self._availability(is_private, False, False, False, is_unlisted)
4043
4044     @staticmethod
4045     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4046         sidebar_renderer = try_get(
4047             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4048         for item in sidebar_renderer:
4049             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4050             if renderer:
4051                 return renderer
4052
4053     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4054         """
4055         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4056         """
4057         browse_id = params = None
4058         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4059         if not renderer:
4060             return
4061         menu_renderer = try_get(
4062             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4063         for menu_item in menu_renderer:
4064             if not isinstance(menu_item, dict):
4065                 continue
4066             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4067             text = try_get(
4068                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4069             if not text or text.lower() != 'show unavailable videos':
4070                 continue
4071             browse_endpoint = try_get(
4072                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4073             browse_id = browse_endpoint.get('browseId')
4074             params = browse_endpoint.get('params')
4075             break
4076
4077         ytcfg = self.extract_ytcfg(item_id, webpage)
4078         headers = self.generate_api_headers(
4079             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4080             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4081             visitor_data=try_get(
4082                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4083         query = {
4084             'params': params or 'wgYCCAA=',
4085             'browseId': browse_id or 'VL%s' % item_id
4086         }
4087         return self._extract_response(
4088             item_id=item_id, headers=headers, query=query,
4089             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4090             note='Downloading API JSON with unavailable videos')
4091
4092     def _extract_webpage(self, url, item_id):
4093         retries = self.get_param('extractor_retries', 3)
4094         count = -1
4095         last_error = 'Incomplete yt initial data recieved'
4096         while count < retries:
4097             count += 1
4098             # Sometimes youtube returns a webpage with incomplete ytInitialData
4099             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4100             if count:
4101                 self.report_warning('%s. Retrying ...' % last_error)
4102             webpage = self._download_webpage(
4103                 url, item_id,
4104                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4105             data = self.extract_yt_initial_data(item_id, webpage)
4106             if data.get('contents') or data.get('currentVideoEndpoint'):
4107                 break
4108             # Extract alerts here only when there is error
4109             self._extract_and_report_alerts(data)
4110             if count >= retries:
4111                 raise ExtractorError(last_error)
4112         return webpage, data
4113
4114     @staticmethod
4115     def _smuggle_data(entries, data):
4116         for entry in entries:
4117             if data:
4118                 entry['url'] = smuggle_url(entry['url'], data)
4119             yield entry
4120
4121     def _real_extract(self, url):
4122         url, smuggled_data = unsmuggle_url(url, {})
4123         if self.is_music_url(url):
4124             smuggled_data['is_music_url'] = True
4125         info_dict = self.__real_extract(url, smuggled_data)
4126         if info_dict.get('entries'):
4127             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4128         return info_dict
4129
4130     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4131
4132     def __real_extract(self, url, smuggled_data):
4133         item_id = self._match_id(url)
4134         url = compat_urlparse.urlunparse(
4135             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4136         compat_opts = self.get_param('compat_opts', [])
4137
4138         def get_mobj(url):
4139             mobj = self._url_re.match(url).groupdict()
4140             mobj.update((k, '') for k, v in mobj.items() if v is None)
4141             return mobj
4142
4143         mobj = get_mobj(url)
4144         # Youtube returns incomplete data if tabname is not lower case
4145         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4146
4147         if is_channel:
4148             if smuggled_data.get('is_music_url'):
4149                 if item_id[:2] == 'VL':
4150                     # Youtube music VL channels have an equivalent playlist
4151                     item_id = item_id[2:]
4152                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4153                 elif item_id[:2] == 'MP':
4154                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4155                     item_id = self._search_regex(
4156                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4157                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4158                         'playlist id')
4159                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4160                 elif mobj['channel_type'] == 'browse':
4161                     # Youtube music /browse/ should be changed to /channel/
4162                     pre = 'https://www.youtube.com/channel/%s' % item_id
4163         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4164             # Home URLs should redirect to /videos/
4165             self.report_warning(
4166                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4167                 'To download only the videos in the home page, add a "/featured" to the URL')
4168             tab = '/videos'
4169
4170         url = ''.join((pre, tab, post))
4171         mobj = get_mobj(url)
4172
4173         # Handle both video/playlist URLs
4174         qs = parse_qs(url)
4175         video_id = qs.get('v', [None])[0]
4176         playlist_id = qs.get('list', [None])[0]
4177
4178         if not video_id and mobj['not_channel'].startswith('watch'):
4179             if not playlist_id:
4180                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4181                 raise ExtractorError('Unable to recognize tab page')
4182             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4183             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4184             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4185             mobj = get_mobj(url)
4186
4187         if video_id and playlist_id:
4188             if self.get_param('noplaylist'):
4189                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4190                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4191             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4192
4193         webpage, data = self._extract_webpage(url, item_id)
4194
4195         tabs = try_get(
4196             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4197         if tabs:
4198             selected_tab = self._extract_selected_tab(tabs)
4199             tab_name = selected_tab.get('title', '')
4200             if 'no-youtube-channel-redirect' not in compat_opts:
4201                 if mobj['tab'] == '/live':
4202                     # Live tab should have redirected to the video
4203                     raise ExtractorError('The channel is not currently live', expected=True)
4204                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4205                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4206                         # Topic channels don't have /videos. Use the equivalent playlist instead
4207                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4208                         pl_id = 'UU%s' % item_id[2:]
4209                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4210                         try:
4211                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4212                             for alert_type, alert_message in self._extract_alerts(pl_data):
4213                                 if alert_type == 'error':
4214                                     raise ExtractorError('Youtube said: %s' % alert_message)
4215                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4216                         except ExtractorError:
4217                             self.report_warning('The playlist gave error. Falling back to channel URL')
4218                     else:
4219                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4220
4221         self.write_debug('Final URL: %s' % url)
4222
4223         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4224         if 'no-youtube-unavailable-videos' not in compat_opts:
4225             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4226         self._extract_and_report_alerts(data)
4227         tabs = try_get(
4228             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4229         if tabs:
4230             return self._extract_from_tabs(item_id, webpage, data, tabs)
4231
4232         playlist = try_get(
4233             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4234         if playlist:
4235             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4236
4237         video_id = try_get(
4238             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4239             compat_str) or video_id
4240         if video_id:
4241             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4242                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4243             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4244
4245         raise ExtractorError('Unable to recognize tab page')
4246
4247
4248 class YoutubePlaylistIE(InfoExtractor):
4249     IE_DESC = 'YouTube.com playlists'
4250     _VALID_URL = r'''(?x)(?:
4251                         (?:https?://)?
4252                         (?:\w+\.)?
4253                         (?:
4254                             (?:
4255                                 youtube(?:kids)?\.com|
4256                                 invidio\.us
4257                             )
4258                             /.*?\?.*?\blist=
4259                         )?
4260                         (?P<id>%(playlist_id)s)
4261                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4262     IE_NAME = 'youtube:playlist'
4263     _TESTS = [{
4264         'note': 'issue #673',
4265         'url': 'PLBB231211A4F62143',
4266         'info_dict': {
4267             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4268             'id': 'PLBB231211A4F62143',
4269             'uploader': 'Wickydoo',
4270             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4271             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4272         },
4273         'playlist_mincount': 29,
4274     }, {
4275         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4276         'info_dict': {
4277             'title': 'YDL_safe_search',
4278             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4279         },
4280         'playlist_count': 2,
4281         'skip': 'This playlist is private',
4282     }, {
4283         'note': 'embedded',
4284         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4285         'playlist_count': 4,
4286         'info_dict': {
4287             'title': 'JODA15',
4288             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4289             'uploader': 'milan',
4290             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4291         }
4292     }, {
4293         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4294         'playlist_mincount': 654,
4295         'info_dict': {
4296             'title': '2018 Chinese New Singles (11/6 updated)',
4297             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4298             'uploader': 'LBK',
4299             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4300             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4301         }
4302     }, {
4303         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4304         'only_matching': True,
4305     }, {
4306         # music album playlist
4307         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4308         'only_matching': True,
4309     }]
4310
4311     @classmethod
4312     def suitable(cls, url):
4313         if YoutubeTabIE.suitable(url):
4314             return False
4315         # Hack for lazy extractors until more generic solution is implemented
4316         # (see #28780)
4317         from .youtube import parse_qs
4318         qs = parse_qs(url)
4319         if qs.get('v', [None])[0]:
4320             return False
4321         return super(YoutubePlaylistIE, cls).suitable(url)
4322
4323     def _real_extract(self, url):
4324         playlist_id = self._match_id(url)
4325         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4326         url = update_url_query(
4327             'https://www.youtube.com/playlist',
4328             parse_qs(url) or {'list': playlist_id})
4329         if is_music_url:
4330             url = smuggle_url(url, {'is_music_url': True})
4331         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4332
4333
4334 class YoutubeYtBeIE(InfoExtractor):
4335     IE_DESC = 'youtu.be'
4336     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4337     _TESTS = [{
4338         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4339         'info_dict': {
4340             'id': 'yeWKywCrFtk',
4341             'ext': 'mp4',
4342             'title': 'Small Scale Baler and Braiding Rugs',
4343             'uploader': 'Backus-Page House Museum',
4344             'uploader_id': 'backuspagemuseum',
4345             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4346             'upload_date': '20161008',
4347             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4348             'categories': ['Nonprofits & Activism'],
4349             'tags': list,
4350             'like_count': int,
4351             'dislike_count': int,
4352         },
4353         'params': {
4354             'noplaylist': True,
4355             'skip_download': True,
4356         },
4357     }, {
4358         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4359         'only_matching': True,
4360     }]
4361
4362     def _real_extract(self, url):
4363         mobj = re.match(self._VALID_URL, url)
4364         video_id = mobj.group('id')
4365         playlist_id = mobj.group('playlist_id')
4366         return self.url_result(
4367             update_url_query('https://www.youtube.com/watch', {
4368                 'v': video_id,
4369                 'list': playlist_id,
4370                 'feature': 'youtu.be',
4371             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4372
4373
4374 class YoutubeYtUserIE(InfoExtractor):
4375     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4376     _VALID_URL = r'ytuser:(?P<id>.+)'
4377     _TESTS = [{
4378         'url': 'ytuser:phihag',
4379         'only_matching': True,
4380     }]
4381
4382     def _real_extract(self, url):
4383         user_id = self._match_id(url)
4384         return self.url_result(
4385             'https://www.youtube.com/user/%s' % user_id,
4386             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4387
4388
4389 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4390     IE_NAME = 'youtube:favorites'
4391     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4392     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4393     _LOGIN_REQUIRED = True
4394     _TESTS = [{
4395         'url': ':ytfav',
4396         'only_matching': True,
4397     }, {
4398         'url': ':ytfavorites',
4399         'only_matching': True,
4400     }]
4401
4402     def _real_extract(self, url):
4403         return self.url_result(
4404             'https://www.youtube.com/playlist?list=LL',
4405             ie=YoutubeTabIE.ie_key())
4406
4407
4408 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4409     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4410     # there doesn't appear to be a real limit, for example if you search for
4411     # 'python' you get more than 8.000.000 results
4412     _MAX_RESULTS = float('inf')
4413     IE_NAME = 'youtube:search'
4414     _SEARCH_KEY = 'ytsearch'
4415     _SEARCH_PARAMS = None
4416     _TESTS = []
4417
4418     def _entries(self, query, n):
4419         data = {'query': query}
4420         if self._SEARCH_PARAMS:
4421             data['params'] = self._SEARCH_PARAMS
4422         total = 0
4423         continuation = {}
4424         for page_num in itertools.count(1):
4425             data.update(continuation)
4426             search = self._extract_response(
4427                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4428                 check_get_keys=('contents', 'onResponseReceivedCommands')
4429             )
4430             if not search:
4431                 break
4432             slr_contents = try_get(
4433                 search,
4434                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4435                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4436                 list)
4437             if not slr_contents:
4438                 break
4439
4440             # Youtube sometimes adds promoted content to searches,
4441             # changing the index location of videos and token.
4442             # So we search through all entries till we find them.
4443             continuation = None
4444             for slr_content in slr_contents:
4445                 if not continuation:
4446                     continuation = self._extract_continuation({'contents': [slr_content]})
4447
4448                 isr_contents = try_get(
4449                     slr_content,
4450                     lambda x: x['itemSectionRenderer']['contents'],
4451                     list)
4452                 if not isr_contents:
4453                     continue
4454                 for content in isr_contents:
4455                     if not isinstance(content, dict):
4456                         continue
4457                     video = content.get('videoRenderer')
4458                     if not isinstance(video, dict):
4459                         continue
4460                     video_id = video.get('videoId')
4461                     if not video_id:
4462                         continue
4463
4464                     yield self._extract_video(video)
4465                     total += 1
4466                     if total == n:
4467                         return
4468
4469             if not continuation:
4470                 break
4471
4472     def _get_n_results(self, query, n):
4473         """Get a specified number of results for a query"""
4474         return self.playlist_result(self._entries(query, n), query, query)
4475
4476
4477 class YoutubeSearchDateIE(YoutubeSearchIE):
4478     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4479     _SEARCH_KEY = 'ytsearchdate'
4480     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4481     _SEARCH_PARAMS = 'CAI%3D'
4482
4483
4484 class YoutubeSearchURLIE(YoutubeSearchIE):
4485     IE_DESC = 'YouTube.com search URLs'
4486     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4487     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4488     # _MAX_RESULTS = 100
4489     _TESTS = [{
4490         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4491         'playlist_mincount': 5,
4492         'info_dict': {
4493             'id': 'youtube-dl test video',
4494             'title': 'youtube-dl test video',
4495         }
4496     }, {
4497         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4498         'only_matching': True,
4499     }]
4500
4501     @classmethod
4502     def _make_valid_url(cls):
4503         return cls._VALID_URL
4504
4505     def _real_extract(self, url):
4506         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4507         query = (qs.get('search_query') or qs.get('q'))[0]
4508         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4509         return self._get_n_results(query, self._MAX_RESULTS)
4510
4511
4512 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4513     """
4514     Base class for feed extractors
4515     Subclasses must define the _FEED_NAME property.
4516     """
4517     _LOGIN_REQUIRED = True
4518     _TESTS = []
4519
4520     @property
4521     def IE_NAME(self):
4522         return 'youtube:%s' % self._FEED_NAME
4523
4524     def _real_extract(self, url):
4525         return self.url_result(
4526             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4527             ie=YoutubeTabIE.ie_key())
4528
4529
4530 class YoutubeWatchLaterIE(InfoExtractor):
4531     IE_NAME = 'youtube:watchlater'
4532     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4533     _VALID_URL = r':ytwatchlater'
4534     _TESTS = [{
4535         'url': ':ytwatchlater',
4536         'only_matching': True,
4537     }]
4538
4539     def _real_extract(self, url):
4540         return self.url_result(
4541             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4542
4543
4544 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4545     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4546     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4547     _FEED_NAME = 'recommended'
4548     _LOGIN_REQUIRED = False
4549     _TESTS = [{
4550         'url': ':ytrec',
4551         'only_matching': True,
4552     }, {
4553         'url': ':ytrecommended',
4554         'only_matching': True,
4555     }, {
4556         'url': 'https://youtube.com',
4557         'only_matching': True,
4558     }]
4559
4560
4561 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4562     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4563     _VALID_URL = r':ytsub(?:scription)?s?'
4564     _FEED_NAME = 'subscriptions'
4565     _TESTS = [{
4566         'url': ':ytsubs',
4567         'only_matching': True,
4568     }, {
4569         'url': ':ytsubscriptions',
4570         'only_matching': True,
4571     }]
4572
4573
4574 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4575     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4576     _VALID_URL = r':ythis(?:tory)?'
4577     _FEED_NAME = 'history'
4578     _TESTS = [{
4579         'url': ':ythistory',
4580         'only_matching': True,
4581     }]
4582
4583
4584 class YoutubeTruncatedURLIE(InfoExtractor):
4585     IE_NAME = 'youtube:truncated_url'
4586     IE_DESC = False  # Do not list
4587     _VALID_URL = r'''(?x)
4588         (?:https?://)?
4589         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4590         (?:watch\?(?:
4591             feature=[a-z_]+|
4592             annotation_id=annotation_[^&]+|
4593             x-yt-cl=[0-9]+|
4594             hl=[^&]*|
4595             t=[0-9]+
4596         )?
4597         |
4598             attribution_link\?a=[^&]+
4599         )
4600         $
4601     '''
4602
4603     _TESTS = [{
4604         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4605         'only_matching': True,
4606     }, {
4607         'url': 'https://www.youtube.com/watch?',
4608         'only_matching': True,
4609     }, {
4610         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4611         'only_matching': True,
4612     }, {
4613         'url': 'https://www.youtube.com/watch?feature=foo',
4614         'only_matching': True,
4615     }, {
4616         'url': 'https://www.youtube.com/watch?hl=en-GB',
4617         'only_matching': True,
4618     }, {
4619         'url': 'https://www.youtube.com/watch?t=2372',
4620         'only_matching': True,
4621     }]
4622
4623     def _real_extract(self, url):
4624         raise ExtractorError(
4625             'Did you forget to quote the URL? Remember that & is a meta '
4626             'character in most shells, so you want to put the URL in quotes, '
4627             'like  youtube-dl '
4628             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4629             ' or simply  youtube-dl BaW_jenozKc  .',
4630             expected=True)
4631
4632
4633 class YoutubeTruncatedIDIE(InfoExtractor):
4634     IE_NAME = 'youtube:truncated_id'
4635     IE_DESC = False  # Do not list
4636     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4637
4638     _TESTS = [{
4639         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4640         'only_matching': True,
4641     }]
4642
4643     def _real_extract(self, url):
4644         video_id = self._match_id(url)
4645         raise ExtractorError(
4646             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4647             expected=True)