yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     mimetype2ext,
  42     network_exceptions,
  43     orderedSet,
  44     parse_codecs,
  45     parse_count,
  46     parse_duration,
  47     parse_iso8601,
  48     qualities,
  49     remove_start,
  50     smuggle_url,
  51     str_or_none,
  52     str_to_int,
  53     traverse_obj,
  54     try_get,
  55     unescapeHTML,
  56     unified_strdate,
  57     unsmuggle_url,
  58     update_url_query,
  59     url_or_none,
  60     urlencode_postdata,
  61     urljoin,
  62     variadic,
  63 )
  64
  65
  66 def parse_qs(url):
  67     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  68
  69
  70 # any clients starting with _ cannot be explicity requested by the user
  71 INNERTUBE_CLIENTS = {
  72     'web': {
  73         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  74         'INNERTUBE_CONTEXT': {
  75             'client': {
  76                 'clientName': 'WEB',
  77                 'clientVersion': '2.20210622.10.00',
  78             }
  79         },
  80         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  81     },
  82     'web_embedded': {
  83         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  84         'INNERTUBE_CONTEXT': {
  85             'client': {
  86                 'clientName': 'WEB_EMBEDDED_PLAYER',
  87                 'clientVersion': '1.20210620.0.1',
  88             },
  89         },
  90         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  91     },
  92     'web_music': {
  93         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  94         'INNERTUBE_HOST': 'music.youtube.com',
  95         'INNERTUBE_CONTEXT': {
  96             'client': {
  97                 'clientName': 'WEB_REMIX',
  98                 'clientVersion': '1.20210621.00.00',
  99             }
 100         },
 101         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 102     },
 103     'web_creator': {
 104         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 105         'INNERTUBE_CONTEXT': {
 106             'client': {
 107                 'clientName': 'WEB_CREATOR',
 108                 'clientVersion': '1.20210621.00.00',
 109             }
 110         },
 111         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 112     },
 113     'android': {
 114         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 115         'INNERTUBE_CONTEXT': {
 116             'client': {
 117                 'clientName': 'ANDROID',
 118                 'clientVersion': '16.20',
 119             }
 120         },
 121         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 122     },
 123     'android_embedded': {
 124         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 125         'INNERTUBE_CONTEXT': {
 126             'client': {
 127                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 128                 'clientVersion': '16.20',
 129             },
 130         },
 131         'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 132     },
 133     'android_music': {
 134         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 135         'INNERTUBE_HOST': 'music.youtube.com',
 136         'INNERTUBE_CONTEXT': {
 137             'client': {
 138                 'clientName': 'ANDROID_MUSIC',
 139                 'clientVersion': '4.32',
 140             }
 141         },
 142         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 143     },
 144     'android_creator': {
 145         'INNERTUBE_CONTEXT': {
 146             'client': {
 147                 'clientName': 'ANDROID_CREATOR',
 148                 'clientVersion': '21.24.100',
 149             },
 150         },
 151         'INNERTUBE_CONTEXT_CLIENT_NAME': 14
 152     },
 153     # ios has HLS live streams
 154     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 155     'ios': {
 156         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 157         'INNERTUBE_CONTEXT': {
 158             'client': {
 159                 'clientName': 'IOS',
 160                 'clientVersion': '16.20',
 161             }
 162         },
 163         'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 164     },
 165     'ios_embedded': {
 166         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 167         'INNERTUBE_CONTEXT': {
 168             'client': {
 169                 'clientName': 'IOS_MESSAGES_EXTENSION',
 170                 'clientVersion': '16.20',
 171             },
 172         },
 173         'INNERTUBE_CONTEXT_CLIENT_NAME': 66
 174     },
 175     'ios_music': {
 176         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 177         'INNERTUBE_HOST': 'music.youtube.com',
 178         'INNERTUBE_CONTEXT': {
 179             'client': {
 180                 'clientName': 'IOS_MUSIC',
 181                 'clientVersion': '4.32',
 182             },
 183         },
 184         'INNERTUBE_CONTEXT_CLIENT_NAME': 26
 185     },
 186     'ios_creator': {
 187         'INNERTUBE_CONTEXT': {
 188             'client': {
 189                 'clientName': 'IOS_CREATOR',
 190                 'clientVersion': '21.24.100',
 191             },
 192         },
 193         'INNERTUBE_CONTEXT_CLIENT_NAME': 15
 194     },
 195     # mweb has 'ultralow' formats
 196     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 197     'mweb': {
 198         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 199         'INNERTUBE_CONTEXT': {
 200             'client': {
 201                 'clientName': 'MWEB',
 202                 'clientVersion': '2.20210721.07.00',
 203             }
 204         },
 205         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 206     },
 207 }
 208
 209
 210 def build_innertube_clients():
 211     third_party = {
 212         'embedUrl': 'https://google.com',  # Can be any valid URL
 213     }
 214     base_clients = ('android', 'web', 'ios', 'mweb')
 215     priority = qualities(base_clients[::-1])
 216
 217     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 218         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 219         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 220         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 221         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 222
 223         if client in base_clients:
 224             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 225             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 226             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 227             agegate_ytcfg['priority'] -= 1
 228         elif client.endswith('_embedded'):
 229             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 230             ytcfg['priority'] -= 2
 231         else:
 232             ytcfg['priority'] -= 3
 233
 234
 235 build_innertube_clients()
 236
 237
 238 class YoutubeBaseInfoExtractor(InfoExtractor):
 239     """Provide base functions for Youtube extractors"""
 240
 241     _RESERVED_NAMES = (
 242         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|'
 243         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 244         r'browse|oembed|get_video_info|iframe_api|s/player|'
 245         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 246
 247     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 248
 249     _NETRC_MACHINE = 'youtube'
 250
 251     # If True it will raise an error if no login info is provided
 252     _LOGIN_REQUIRED = False
 253
 254     r'''  # Unused since login is broken
 255     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 256     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
 257
 258     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
 259     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
 260     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 261     '''
 262
 263     def _login(self):
 264         """
 265         Attempt to log in to YouTube.
 266         True is returned if successful or skipped.
 267         False is returned if login failed.
 268
 269         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 270         """
 271
 272         def warn(message):
 273             self.report_warning(message)
 274
 275         # username+password login is broken
 276         if (self._LOGIN_REQUIRED
 277                 and self.get_param('cookiefile') is None
 278                 and self.get_param('cookiesfrombrowser') is None):
 279             self.raise_login_required(
 280                 'Login details are needed to download this content', method='cookies')
 281         username, password = self._get_login_info()
 282         if username:
 283             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 284         return
 285
 286         # Everything below this is broken!
 287         r'''
 288         # No authentication to be performed
 289         if username is None:
 290             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 291                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 292             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 293             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 294             return True
 295
 296         login_page = self._download_webpage(
 297             self._LOGIN_URL, None,
 298             note='Downloading login page',
 299             errnote='unable to fetch login page', fatal=False)
 300         if login_page is False:
 301             return
 302
 303         login_form = self._hidden_inputs(login_page)
 304
 305         def req(url, f_req, note, errnote):
 306             data = login_form.copy()
 307             data.update({
 308                 'pstMsg': 1,
 309                 'checkConnection': 'youtube',
 310                 'checkedDomains': 'youtube',
 311                 'hl': 'en',
 312                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 313                 'f.req': json.dumps(f_req),
 314                 'flowName': 'GlifWebSignIn',
 315                 'flowEntry': 'ServiceLogin',
 316                 # TODO: reverse actual botguard identifier generation algo
 317                 'bgRequest': '["identifier",""]',
 318             })
 319             return self._download_json(
 320                 url, None, note=note, errnote=errnote,
 321                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 322                 fatal=False,
 323                 data=urlencode_postdata(data), headers={
 324                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 325                     'Google-Accounts-XSRF': 1,
 326                 })
 327
 328         lookup_req = [
 329             username,
 330             None, [], None, 'US', None, None, 2, False, True,
 331             [
 332                 None, None,
 333                 [2, 1, None, 1,
 334                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 335                  None, [], 4],
 336                 1, [None, None, []], None, None, None, True
 337             ],
 338             username,
 339         ]
 340
 341         lookup_results = req(
 342             self._LOOKUP_URL, lookup_req,
 343             'Looking up account info', 'Unable to look up account info')
 344
 345         if lookup_results is False:
 346             return False
 347
 348         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 349         if not user_hash:
 350             warn('Unable to extract user hash')
 351             return False
 352
 353         challenge_req = [
 354             user_hash,
 355             None, 1, None, [1, None, None, None, [password, None, True]],
 356             [
 357                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 358                 1, [None, None, []], None, None, None, True
 359             ]]
 360
 361         challenge_results = req(
 362             self._CHALLENGE_URL, challenge_req,
 363             'Logging in', 'Unable to log in')
 364
 365         if challenge_results is False:
 366             return
 367
 368         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 369         if login_res:
 370             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 371             warn(
 372                 'Unable to login: %s' % 'Invalid password'
 373                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 374             return False
 375
 376         res = try_get(challenge_results, lambda x: x[0][-1], list)
 377         if not res:
 378             warn('Unable to extract result entry')
 379             return False
 380
 381         login_challenge = try_get(res, lambda x: x[0][0], list)
 382         if login_challenge:
 383             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 384             if challenge_str == 'TWO_STEP_VERIFICATION':
 385                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 386                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 387                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 388                 if status == 'QUOTA_EXCEEDED':
 389                     warn('Exceeded the limit of TFA codes, try later')
 390                     return False
 391
 392                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 393                 if not tl:
 394                     warn('Unable to extract TL')
 395                     return False
 396
 397                 tfa_code = self._get_tfa_info('2-step verification code')
 398
 399                 if not tfa_code:
 400                     warn(
 401                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 402                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 403                     return False
 404
 405                 tfa_code = remove_start(tfa_code, 'G-')
 406
 407                 tfa_req = [
 408                     user_hash, None, 2, None,
 409                     [
 410                         9, None, None, None, None, None, None, None,
 411                         [None, tfa_code, True, 2]
 412                     ]]
 413
 414                 tfa_results = req(
 415                     self._TFA_URL.format(tl), tfa_req,
 416                     'Submitting TFA code', 'Unable to submit TFA code')
 417
 418                 if tfa_results is False:
 419                     return False
 420
 421                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 422                 if tfa_res:
 423                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 424                     warn(
 425                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 426                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 427                     return False
 428
 429                 check_cookie_url = try_get(
 430                     tfa_results, lambda x: x[0][-1][2], compat_str)
 431             else:
 432                 CHALLENGES = {
 433                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 434                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 435                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 436                 }
 437                 challenge = CHALLENGES.get(
 438                     challenge_str,
 439                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 440                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 441                 return False
 442         else:
 443             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 444
 445         if not check_cookie_url:
 446             warn('Unable to extract CheckCookie URL')
 447             return False
 448
 449         check_cookie_results = self._download_webpage(
 450             check_cookie_url, None, 'Checking cookie', fatal=False)
 451
 452         if check_cookie_results is False:
 453             return False
 454
 455         if 'https://myaccount.google.com/' not in check_cookie_results:
 456             warn('Unable to log in')
 457             return False
 458
 459         return True
 460         '''
 461
 462     def _initialize_consent(self):
 463         cookies = self._get_cookies('https://www.youtube.com/')
 464         if cookies.get('__Secure-3PSID'):
 465             return
 466         consent_id = None
 467         consent = cookies.get('CONSENT')
 468         if consent:
 469             if 'YES' in consent.value:
 470                 return
 471             consent_id = self._search_regex(
 472                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 473         if not consent_id:
 474             consent_id = random.randint(100, 999)
 475         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 476
 477     def _real_initialize(self):
 478         self._initialize_consent()
 479         if self._downloader is None:
 480             return
 481         if not self._login():
 482             return
 483
 484     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 485     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 486     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 487
 488     def _get_default_ytcfg(self, client='web'):
 489         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 490
 491     def _get_innertube_host(self, client='web'):
 492         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 493
 494     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 495         # try_get but with fallback to default ytcfg client values when present
 496         _func = lambda y: try_get(y, getter, expected_type)
 497         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 498
 499     def _extract_client_name(self, ytcfg, default_client='web'):
 500         return self._ytcfg_get_safe(
 501             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 502                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 503
 504     @staticmethod
 505     def _extract_session_index(*data):
 506         for ytcfg in data:
 507             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 508             if session_index is not None:
 509                 return session_index
 510
 511     def _extract_client_version(self, ytcfg, default_client='web'):
 512         return self._ytcfg_get_safe(
 513             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 514                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 515
 516     def _extract_api_key(self, ytcfg=None, default_client='web'):
 517         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 518
 519     def _extract_context(self, ytcfg=None, default_client='web'):
 520         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 521         context = _get_context(ytcfg)
 522         if context:
 523             return context
 524
 525         context = _get_context(self._get_default_ytcfg(default_client))
 526         if not ytcfg:
 527             return context
 528
 529         # Recreate the client context (required)
 530         context['client'].update({
 531             'clientVersion': self._extract_client_version(ytcfg, default_client),
 532             'clientName': self._extract_client_name(ytcfg, default_client),
 533         })
 534         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 535         if visitor_data:
 536             context['client']['visitorData'] = visitor_data
 537         return context
 538
 539     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 540         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 541         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 542         yt_cookies = self._get_cookies('https://www.youtube.com')
 543         sapisid_cookie = dict_get(
 544             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 545         if sapisid_cookie is None or not sapisid_cookie.value:
 546             return
 547         time_now = round(time.time())
 548         # SAPISID cookie is required if not already present
 549         if not yt_cookies.get('SAPISID'):
 550             self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
 551             self._set_cookie(
 552                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 553         self.write_debug('Extracted SAPISID cookie', only_once=True)
 554         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 555         sapisidhash = hashlib.sha1(
 556             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 557         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 558
 559     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 560                   note='Downloading API JSON', errnote='Unable to download API page',
 561                   context=None, api_key=None, api_hostname=None, default_client='web'):
 562
 563         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 564         data.update(query)
 565         real_headers = self.generate_api_headers(default_client=default_client)
 566         real_headers.update({'content-type': 'application/json'})
 567         if headers:
 568             real_headers.update(headers)
 569         return self._download_json(
 570             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 571             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 572             data=json.dumps(data).encode('utf8'), headers=real_headers,
 573             query={'key': api_key or self._extract_api_key()})
 574
 575     def extract_yt_initial_data(self, video_id, webpage):
 576         return self._parse_json(
 577             self._search_regex(
 578                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 579                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 580             video_id)
 581
 582     def _extract_identity_token(self, webpage, item_id):
 583         if not webpage:
 584             return None
 585         ytcfg = self.extract_ytcfg(item_id, webpage)
 586         if ytcfg:
 587             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 588             if token:
 589                 return token
 590         return self._search_regex(
 591             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 592             'identity token', default=None)
 593
 594     @staticmethod
 595     def _extract_account_syncid(*args):
 596         """
 597         Extract syncId required to download private playlists of secondary channels
 598         @params response and/or ytcfg
 599         """
 600         for data in args:
 601             # ytcfg includes channel_syncid if on secondary channel
 602             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 603             if delegated_sid:
 604                 return delegated_sid
 605             sync_ids = (try_get(
 606                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 607                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 608             if len(sync_ids) >= 2 and sync_ids[1]:
 609                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 610                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 611                 return sync_ids[0]
 612
 613     def extract_ytcfg(self, video_id, webpage):
 614         if not webpage:
 615             return {}
 616         return self._parse_json(
 617             self._search_regex(
 618                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 619                 default='{}'), video_id, fatal=False) or {}
 620
 621     def generate_api_headers(
 622             self, ytcfg=None, identity_token=None, account_syncid=None,
 623             visitor_data=None, api_hostname=None, default_client='web', session_index=None):
 624         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 625         headers = {
 626             'X-YouTube-Client-Name': compat_str(
 627                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 628             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 629             'Origin': origin
 630         }
 631         if not visitor_data and ytcfg:
 632             visitor_data = try_get(
 633                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 634         if identity_token:
 635             headers['X-Youtube-Identity-Token'] = identity_token
 636         if account_syncid:
 637             headers['X-Goog-PageId'] = account_syncid
 638         if session_index is None and ytcfg:
 639             session_index = self._extract_session_index(ytcfg)
 640         if account_syncid or session_index is not None:
 641             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 642         if visitor_data:
 643             headers['X-Goog-Visitor-Id'] = visitor_data
 644         auth = self._generate_sapisidhash_header(origin)
 645         if auth is not None:
 646             headers['Authorization'] = auth
 647             headers['X-Origin'] = origin
 648         return headers
 649
 650     @staticmethod
 651     def _build_api_continuation_query(continuation, ctp=None):
 652         query = {
 653             'continuation': continuation
 654         }
 655         # TODO: Inconsistency with clickTrackingParams.
 656         # Currently we have a fixed ctp contained within context (from ytcfg)
 657         # and a ctp in root query for continuation.
 658         if ctp:
 659             query['clickTracking'] = {'clickTrackingParams': ctp}
 660         return query
 661
 662     @classmethod
 663     def _extract_next_continuation_data(cls, renderer):
 664         next_continuation = try_get(
 665             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 666                        lambda x: x['continuation']['reloadContinuationData']), dict)
 667         if not next_continuation:
 668             return
 669         continuation = next_continuation.get('continuation')
 670         if not continuation:
 671             return
 672         ctp = next_continuation.get('clickTrackingParams')
 673         return cls._build_api_continuation_query(continuation, ctp)
 674
 675     @classmethod
 676     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 677         if isinstance(continuation_ep, dict):
 678             continuation = try_get(
 679                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 680             if not continuation:
 681                 return
 682             ctp = continuation_ep.get('clickTrackingParams')
 683             return cls._build_api_continuation_query(continuation, ctp)
 684
 685     @classmethod
 686     def _extract_continuation(cls, renderer):
 687         next_continuation = cls._extract_next_continuation_data(renderer)
 688         if next_continuation:
 689             return next_continuation
 690
 691         contents = []
 692         for key in ('contents', 'items'):
 693             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 694
 695         for content in contents:
 696             if not isinstance(content, dict):
 697                 continue
 698             continuation_ep = try_get(
 699                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 700                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 701                 dict)
 702             continuation = cls._extract_continuation_ep_data(continuation_ep)
 703             if continuation:
 704                 return continuation
 705
 706     @classmethod
 707     def _extract_alerts(cls, data):
 708         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 709             if not isinstance(alert_dict, dict):
 710                 continue
 711             for alert in alert_dict.values():
 712                 alert_type = alert.get('type')
 713                 if not alert_type:
 714                     continue
 715                 message = cls._get_text(alert, 'text')
 716                 if message:
 717                     yield alert_type, message
 718
 719     def _report_alerts(self, alerts, expected=True):
 720         errors = []
 721         warnings = []
 722         for alert_type, alert_message in alerts:
 723             if alert_type.lower() == 'error':
 724                 errors.append([alert_type, alert_message])
 725             else:
 726                 warnings.append([alert_type, alert_message])
 727
 728         for alert_type, alert_message in (warnings + errors[:-1]):
 729             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 730         if errors:
 731             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 732
 733     def _extract_and_report_alerts(self, data, *args, **kwargs):
 734         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 735
 736     def _extract_badges(self, renderer: dict):
 737         badges = set()
 738         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 739             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 740             if label:
 741                 badges.add(label.lower())
 742         return badges
 743
 744     @staticmethod
 745     def _get_text(data, *path_list, max_runs=None):
 746         for path in path_list or [None]:
 747             if path is None:
 748                 obj = [data]
 749             else:
 750                 obj = traverse_obj(data, path, default=[])
 751                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 752                     obj = [obj]
 753             for item in obj:
 754                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 755                 if text:
 756                     return text
 757                 runs = try_get(item, lambda x: x['runs'], list) or []
 758                 if not runs and isinstance(item, list):
 759                     runs = item
 760
 761                 runs = runs[:min(len(runs), max_runs or len(runs))]
 762                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 763                 if text:
 764                     return text
 765
 766     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 767                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 768                           default_client='web'):
 769         response = None
 770         last_error = None
 771         count = -1
 772         retries = self.get_param('extractor_retries', 3)
 773         if check_get_keys is None:
 774             check_get_keys = []
 775         while count < retries:
 776             count += 1
 777             if last_error:
 778                 self.report_warning('%s. Retrying ...' % last_error)
 779             try:
 780                 response = self._call_api(
 781                     ep=ep, fatal=True, headers=headers,
 782                     video_id=item_id, query=query,
 783                     context=self._extract_context(ytcfg, default_client),
 784                     api_key=self._extract_api_key(ytcfg, default_client),
 785                     api_hostname=api_hostname, default_client=default_client,
 786                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 787             except ExtractorError as e:
 788                 if isinstance(e.cause, network_exceptions):
 789                     # Downloading page may result in intermittent 5xx HTTP error
 790                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 791                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 792                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 793                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 794                         last_error = error_to_compat_str(e.cause or e)
 795                         if count < retries:
 796                             continue
 797                 if fatal:
 798                     raise
 799                 else:
 800                     self.report_warning(error_to_compat_str(e))
 801                     return
 802
 803             else:
 804                 # Youtube may send alerts if there was an issue with the continuation page
 805                 try:
 806                     self._extract_and_report_alerts(response, expected=False)
 807                 except ExtractorError as e:
 808                     if fatal:
 809                         raise
 810                     self.report_warning(error_to_compat_str(e))
 811                     return
 812                 if not check_get_keys or dict_get(response, check_get_keys):
 813                     break
 814                 # Youtube sometimes sends incomplete data
 815                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 816                 last_error = 'Incomplete data received'
 817                 if count >= retries:
 818                     if fatal:
 819                         raise ExtractorError(last_error)
 820                     else:
 821                         self.report_warning(last_error)
 822                         return
 823         return response
 824
 825     @staticmethod
 826     def is_music_url(url):
 827         return re.match(r'https?://music\.youtube\.com/', url) is not None
 828
 829     def _extract_video(self, renderer):
 830         video_id = renderer.get('videoId')
 831         title = self._get_text(renderer, 'title')
 832         description = self._get_text(renderer, 'descriptionSnippet')
 833         duration = parse_duration(self._get_text(
 834             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 835         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 836         view_count = str_to_int(self._search_regex(
 837             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 838             'view count', default=None))
 839
 840         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 841
 842         return {
 843             '_type': 'url',
 844             'ie_key': YoutubeIE.ie_key(),
 845             'id': video_id,
 846             'url': video_id,
 847             'title': title,
 848             'description': description,
 849             'duration': duration,
 850             'view_count': view_count,
 851             'uploader': uploader,
 852         }
 853
 854
 855 class YoutubeIE(YoutubeBaseInfoExtractor):
 856     IE_DESC = 'YouTube.com'
 857     _INVIDIOUS_SITES = (
 858         # invidious-redirect websites
 859         r'(?:www\.)?redirect\.invidious\.io',
 860         r'(?:(?:www|dev)\.)?invidio\.us',
 861         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 862         r'(?:www\.)?invidious\.pussthecat\.org',
 863         r'(?:www\.)?invidious\.zee\.li',
 864         r'(?:www\.)?invidious\.ethibox\.fr',
 865         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 866         # youtube-dl invidious instances list
 867         r'(?:(?:www|no)\.)?invidiou\.sh',
 868         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 869         r'(?:www\.)?invidious\.kabi\.tk',
 870         r'(?:www\.)?invidious\.mastodon\.host',
 871         r'(?:www\.)?invidious\.zapashcanon\.fr',
 872         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 873         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 874         r'(?:www\.)?invidious\.himiko\.cloud',
 875         r'(?:www\.)?invidious\.reallyancient\.tech',
 876         r'(?:www\.)?invidious\.tube',
 877         r'(?:www\.)?invidiou\.site',
 878         r'(?:www\.)?invidious\.site',
 879         r'(?:www\.)?invidious\.xyz',
 880         r'(?:www\.)?invidious\.nixnet\.xyz',
 881         r'(?:www\.)?invidious\.048596\.xyz',
 882         r'(?:www\.)?invidious\.drycat\.fr',
 883         r'(?:www\.)?inv\.skyn3t\.in',
 884         r'(?:www\.)?tube\.poal\.co',
 885         r'(?:www\.)?tube\.connect\.cafe',
 886         r'(?:www\.)?vid\.wxzm\.sx',
 887         r'(?:www\.)?vid\.mint\.lgbt',
 888         r'(?:www\.)?vid\.puffyan\.us',
 889         r'(?:www\.)?yewtu\.be',
 890         r'(?:www\.)?yt\.elukerio\.org',
 891         r'(?:www\.)?yt\.lelux\.fi',
 892         r'(?:www\.)?invidious\.ggc-project\.de',
 893         r'(?:www\.)?yt\.maisputain\.ovh',
 894         r'(?:www\.)?ytprivate\.com',
 895         r'(?:www\.)?invidious\.13ad\.de',
 896         r'(?:www\.)?invidious\.toot\.koeln',
 897         r'(?:www\.)?invidious\.fdn\.fr',
 898         r'(?:www\.)?watch\.nettohikari\.com',
 899         r'(?:www\.)?invidious\.namazso\.eu',
 900         r'(?:www\.)?invidious\.silkky\.cloud',
 901         r'(?:www\.)?invidious\.exonip\.de',
 902         r'(?:www\.)?invidious\.riverside\.rocks',
 903         r'(?:www\.)?invidious\.blamefran\.net',
 904         r'(?:www\.)?invidious\.moomoo\.de',
 905         r'(?:www\.)?ytb\.trom\.tf',
 906         r'(?:www\.)?yt\.cyberhost\.uk',
 907         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 908         r'(?:www\.)?qklhadlycap4cnod\.onion',
 909         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 910         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 911         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 912         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 913         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 914         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 915         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 916         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 917         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 918         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 919     )
 920     _VALID_URL = r"""(?x)^
 921                      (
 922                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 923                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 924                             (?:www\.)?deturl\.com/www\.youtube\.com|
 925                             (?:www\.)?pwnyoutube\.com|
 926                             (?:www\.)?hooktube\.com|
 927                             (?:www\.)?yourepeat\.com|
 928                             tube\.majestyc\.net|
 929                             %(invidious)s|
 930                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 931                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 932                          (?:                                                  # the various things that can precede the ID:
 933                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 934                              |(?:                                             # or the v= param in all its forms
 935                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 936                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 937                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 938                                  v=
 939                              )
 940                          ))
 941                          |(?:
 942                             youtu\.be|                                        # just youtu.be/xxxx
 943                             vid\.plus|                                        # or vid.plus/xxxx
 944                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 945                             %(invidious)s
 946                          )/
 947                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 948                          )
 949                      )?                                                       # all until now is optional -> you can pass the naked ID
 950                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 951                      (?(1).+)?                                                # if we found the ID, everything can follow
 952                      (?:\#|$)""" % {
 953         'invidious': '|'.join(_INVIDIOUS_SITES),
 954     }
 955     _PLAYER_INFO_RE = (
 956         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 957         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 958         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 959     )
 960     _formats = {
 961         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 962         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 963         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 964         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 965         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 966         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 967         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 968         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 969         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 970         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 971         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 972         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 973         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 974         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 975         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 976         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 977         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 978         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 979
 980
 981         # 3D videos
 982         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 983         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 984         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 985         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 986         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 987         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 988         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 989
 990         # Apple HTTP Live Streaming
 991         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 992         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 993         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 994         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 995         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 996         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 997         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 998         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 999
1000         # DASH mp4 video
1001         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
1002         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
1003         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1004         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
1005         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
1006         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
1007         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
1008         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
1009         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
1010         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1011         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
1012         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
1013
1014         # Dash mp4 audio
1015         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
1016         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
1017         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
1018         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1019         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
1020         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
1021         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
1022
1023         # Dash webm
1024         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1025         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1026         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1027         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1028         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1029         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
1030         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
1031         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1032         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1033         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1034         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1035         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1036         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1037         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1038         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1039         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1040         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1041         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1042         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1043         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1044         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1045         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1046
1047         # Dash webm audio
1048         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1049         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1050
1051         # Dash webm audio with opus inside
1052         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1053         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1054         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1055
1056         # RTMP (unnamed)
1057         '_rtmp': {'protocol': 'rtmp'},
1058
1059         # av01 video only formats sometimes served with "unknown" codecs
1060         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1061         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1062         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1063         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1064     }
1065     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1066
1067     _GEO_BYPASS = False
1068
1069     IE_NAME = 'youtube'
1070     _TESTS = [
1071         {
1072             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1073             'info_dict': {
1074                 'id': 'BaW_jenozKc',
1075                 'ext': 'mp4',
1076                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1077                 'uploader': 'Philipp Hagemeister',
1078                 'uploader_id': 'phihag',
1079                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1080                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1081                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1082                 'upload_date': '20121002',
1083                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1084                 'categories': ['Science & Technology'],
1085                 'tags': ['youtube-dl'],
1086                 'duration': 10,
1087                 'view_count': int,
1088                 'like_count': int,
1089                 'dislike_count': int,
1090                 'start_time': 1,
1091                 'end_time': 9,
1092             }
1093         },
1094         {
1095             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1096             'note': 'Embed-only video (#1746)',
1097             'info_dict': {
1098                 'id': 'yZIXLfi8CZQ',
1099                 'ext': 'mp4',
1100                 'upload_date': '20120608',
1101                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1102                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1103                 'uploader': 'SET India',
1104                 'uploader_id': 'setindia',
1105                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1106                 'age_limit': 18,
1107             },
1108             'skip': 'Private video',
1109         },
1110         {
1111             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1112             'note': 'Use the first video ID in the URL',
1113             'info_dict': {
1114                 'id': 'BaW_jenozKc',
1115                 'ext': 'mp4',
1116                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1117                 'uploader': 'Philipp Hagemeister',
1118                 'uploader_id': 'phihag',
1119                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1120                 'upload_date': '20121002',
1121                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1122                 'categories': ['Science & Technology'],
1123                 'tags': ['youtube-dl'],
1124                 'duration': 10,
1125                 'view_count': int,
1126                 'like_count': int,
1127                 'dislike_count': int,
1128             },
1129             'params': {
1130                 'skip_download': True,
1131             },
1132         },
1133         {
1134             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1135             'note': '256k DASH audio (format 141) via DASH manifest',
1136             'info_dict': {
1137                 'id': 'a9LDPn-MO4I',
1138                 'ext': 'm4a',
1139                 'upload_date': '20121002',
1140                 'uploader_id': '8KVIDEO',
1141                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1142                 'description': '',
1143                 'uploader': '8KVIDEO',
1144                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1145             },
1146             'params': {
1147                 'youtube_include_dash_manifest': True,
1148                 'format': '141',
1149             },
1150             'skip': 'format 141 not served anymore',
1151         },
1152         # DASH manifest with encrypted signature
1153         {
1154             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1155             'info_dict': {
1156                 'id': 'IB3lcPjvWLA',
1157                 'ext': 'm4a',
1158                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1159                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1160                 'duration': 244,
1161                 'uploader': 'AfrojackVEVO',
1162                 'uploader_id': 'AfrojackVEVO',
1163                 'upload_date': '20131011',
1164                 'abr': 129.495,
1165             },
1166             'params': {
1167                 'youtube_include_dash_manifest': True,
1168                 'format': '141/bestaudio[ext=m4a]',
1169             },
1170         },
1171         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1172         {
1173             'note': 'Embed allowed age-gate video',
1174             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1175             'info_dict': {
1176                 'id': 'HtVdAasjOgU',
1177                 'ext': 'mp4',
1178                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1179                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1180                 'duration': 142,
1181                 'uploader': 'The Witcher',
1182                 'uploader_id': 'WitcherGame',
1183                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1184                 'upload_date': '20140605',
1185                 'age_limit': 18,
1186             },
1187         },
1188         {
1189             'note': 'Age-gate video with embed allowed in public site',
1190             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1191             'info_dict': {
1192                 'id': 'HsUATh_Nc2U',
1193                 'ext': 'mp4',
1194                 'title': 'Godzilla 2 (Official Video)',
1195                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1196                 'upload_date': '20200408',
1197                 'uploader_id': 'FlyingKitty900',
1198                 'uploader': 'FlyingKitty',
1199                 'age_limit': 18,
1200             },
1201         },
1202         {
1203             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1204             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1205             'info_dict': {
1206                 'id': 'Tq92D6wQ1mg',
1207                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1208                 'ext': 'mp4',
1209                 'upload_date': '20191227',
1210                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1211                 'uploader': 'Projekt Melody',
1212                 'description': 'md5:17eccca93a786d51bc67646756894066',
1213                 'age_limit': 18,
1214             },
1215         },
1216         {
1217             'note': 'Non-Agegated non-embeddable video',
1218             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1219             'info_dict': {
1220                 'id': 'MeJVWBSsPAY',
1221                 'ext': 'mp4',
1222                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1223                 'uploader': 'Herr Lurik',
1224                 'uploader_id': 'st3in234',
1225                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1226                 'upload_date': '20130730',
1227             },
1228         },
1229         {
1230             'note': 'Non-bypassable age-gated video',
1231             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1232             'only_matching': True,
1233         },
1234         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1235         # YouTube Red ad is not captured for creator
1236         {
1237             'url': '__2ABJjxzNo',
1238             'info_dict': {
1239                 'id': '__2ABJjxzNo',
1240                 'ext': 'mp4',
1241                 'duration': 266,
1242                 'upload_date': '20100430',
1243                 'uploader_id': 'deadmau5',
1244                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1245                 'creator': 'deadmau5',
1246                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1247                 'uploader': 'deadmau5',
1248                 'title': 'Deadmau5 - Some Chords (HD)',
1249                 'alt_title': 'Some Chords',
1250             },
1251             'expected_warnings': [
1252                 'DASH manifest missing',
1253             ]
1254         },
1255         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1256         {
1257             'url': 'lqQg6PlCWgI',
1258             'info_dict': {
1259                 'id': 'lqQg6PlCWgI',
1260                 'ext': 'mp4',
1261                 'duration': 6085,
1262                 'upload_date': '20150827',
1263                 'uploader_id': 'olympic',
1264                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1265                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1266                 'uploader': 'Olympics',
1267                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1268             },
1269             'params': {
1270                 'skip_download': 'requires avconv',
1271             }
1272         },
1273         # Non-square pixels
1274         {
1275             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1276             'info_dict': {
1277                 'id': '_b-2C3KPAM0',
1278                 'ext': 'mp4',
1279                 'stretched_ratio': 16 / 9.,
1280                 'duration': 85,
1281                 'upload_date': '20110310',
1282                 'uploader_id': 'AllenMeow',
1283                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1284                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1285                 'uploader': '孫ᄋᄅ',
1286                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1287             },
1288         },
1289         # url_encoded_fmt_stream_map is empty string
1290         {
1291             'url': 'qEJwOuvDf7I',
1292             'info_dict': {
1293                 'id': 'qEJwOuvDf7I',
1294                 'ext': 'webm',
1295                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1296                 'description': '',
1297                 'upload_date': '20150404',
1298                 'uploader_id': 'spbelect',
1299                 'uploader': 'Наблюдатели Петербурга',
1300             },
1301             'params': {
1302                 'skip_download': 'requires avconv',
1303             },
1304             'skip': 'This live event has ended.',
1305         },
1306         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1307         {
1308             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1309             'info_dict': {
1310                 'id': 'FIl7x6_3R5Y',
1311                 'ext': 'webm',
1312                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1313                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1314                 'duration': 220,
1315                 'upload_date': '20150625',
1316                 'uploader_id': 'dorappi2000',
1317                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1318                 'uploader': 'dorappi2000',
1319                 'formats': 'mincount:31',
1320             },
1321             'skip': 'not actual anymore',
1322         },
1323         # DASH manifest with segment_list
1324         {
1325             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1326             'md5': '8ce563a1d667b599d21064e982ab9e31',
1327             'info_dict': {
1328                 'id': 'CsmdDsKjzN8',
1329                 'ext': 'mp4',
1330                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1331                 'uploader': 'Airtek',
1332                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1333                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1334                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1335             },
1336             'params': {
1337                 'youtube_include_dash_manifest': True,
1338                 'format': '135',  # bestvideo
1339             },
1340             'skip': 'This live event has ended.',
1341         },
1342         {
1343             # Multifeed videos (multiple cameras), URL is for Main Camera
1344             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1345             'info_dict': {
1346                 'id': 'jvGDaLqkpTg',
1347                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1348                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1349             },
1350             'playlist': [{
1351                 'info_dict': {
1352                     'id': 'jvGDaLqkpTg',
1353                     'ext': 'mp4',
1354                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1355                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1356                     'duration': 10643,
1357                     'upload_date': '20161111',
1358                     'uploader': 'Team PGP',
1359                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1360                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1361                 },
1362             }, {
1363                 'info_dict': {
1364                     'id': '3AKt1R1aDnw',
1365                     'ext': 'mp4',
1366                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1367                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1368                     'duration': 10991,
1369                     'upload_date': '20161111',
1370                     'uploader': 'Team PGP',
1371                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1372                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1373                 },
1374             }, {
1375                 'info_dict': {
1376                     'id': 'RtAMM00gpVc',
1377                     'ext': 'mp4',
1378                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1379                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1380                     'duration': 10995,
1381                     'upload_date': '20161111',
1382                     'uploader': 'Team PGP',
1383                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1384                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1385                 },
1386             }, {
1387                 'info_dict': {
1388                     'id': '6N2fdlP3C5U',
1389                     'ext': 'mp4',
1390                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1391                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1392                     'duration': 10990,
1393                     'upload_date': '20161111',
1394                     'uploader': 'Team PGP',
1395                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1396                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1397                 },
1398             }],
1399             'params': {
1400                 'skip_download': True,
1401             },
1402             'skip': 'Not multifeed anymore',
1403         },
1404         {
1405             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1406             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1407             'info_dict': {
1408                 'id': 'gVfLd0zydlo',
1409                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1410             },
1411             'playlist_count': 2,
1412             'skip': 'Not multifeed anymore',
1413         },
1414         {
1415             'url': 'https://vid.plus/FlRa-iH7PGw',
1416             'only_matching': True,
1417         },
1418         {
1419             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1420             'only_matching': True,
1421         },
1422         {
1423             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1424             # Also tests cut-off URL expansion in video description (see
1425             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1426             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1427             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1428             'info_dict': {
1429                 'id': 'lsguqyKfVQg',
1430                 'ext': 'mp4',
1431                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1432                 'alt_title': 'Dark Walk',
1433                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1434                 'duration': 133,
1435                 'upload_date': '20151119',
1436                 'uploader_id': 'IronSoulElf',
1437                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1438                 'uploader': 'IronSoulElf',
1439                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1440                 'track': 'Dark Walk',
1441                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1442                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1443             },
1444             'params': {
1445                 'skip_download': True,
1446             },
1447         },
1448         {
1449             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1450             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1451             'only_matching': True,
1452         },
1453         {
1454             # Video with yt:stretch=17:0
1455             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1456             'info_dict': {
1457                 'id': 'Q39EVAstoRM',
1458                 'ext': 'mp4',
1459                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1460                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1461                 'upload_date': '20151107',
1462                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1463                 'uploader': 'CH GAMER DROID',
1464             },
1465             'params': {
1466                 'skip_download': True,
1467             },
1468             'skip': 'This video does not exist.',
1469         },
1470         {
1471             # Video with incomplete 'yt:stretch=16:'
1472             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1473             'only_matching': True,
1474         },
1475         {
1476             # Video licensed under Creative Commons
1477             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1478             'info_dict': {
1479                 'id': 'M4gD1WSo5mA',
1480                 'ext': 'mp4',
1481                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1482                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1483                 'duration': 721,
1484                 'upload_date': '20150127',
1485                 'uploader_id': 'BerkmanCenter',
1486                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1487                 'uploader': 'The Berkman Klein Center for Internet & Society',
1488                 'license': 'Creative Commons Attribution license (reuse allowed)',
1489             },
1490             'params': {
1491                 'skip_download': True,
1492             },
1493         },
1494         {
1495             # Channel-like uploader_url
1496             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1497             'info_dict': {
1498                 'id': 'eQcmzGIKrzg',
1499                 'ext': 'mp4',
1500                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1501                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1502                 'duration': 4060,
1503                 'upload_date': '20151119',
1504                 'uploader': 'Bernie Sanders',
1505                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1506                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1507                 'license': 'Creative Commons Attribution license (reuse allowed)',
1508             },
1509             'params': {
1510                 'skip_download': True,
1511             },
1512         },
1513         {
1514             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1515             'only_matching': True,
1516         },
1517         {
1518             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1519             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1520             'only_matching': True,
1521         },
1522         {
1523             # Rental video preview
1524             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1525             'info_dict': {
1526                 'id': 'uGpuVWrhIzE',
1527                 'ext': 'mp4',
1528                 'title': 'Piku - Trailer',
1529                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1530                 'upload_date': '20150811',
1531                 'uploader': 'FlixMatrix',
1532                 'uploader_id': 'FlixMatrixKaravan',
1533                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1534                 'license': 'Standard YouTube License',
1535             },
1536             'params': {
1537                 'skip_download': True,
1538             },
1539             'skip': 'This video is not available.',
1540         },
1541         {
1542             # YouTube Red video with episode data
1543             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1544             'info_dict': {
1545                 'id': 'iqKdEhx-dD4',
1546                 'ext': 'mp4',
1547                 'title': 'Isolation - Mind Field (Ep 1)',
1548                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1549                 'duration': 2085,
1550                 'upload_date': '20170118',
1551                 'uploader': 'Vsauce',
1552                 'uploader_id': 'Vsauce',
1553                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1554                 'series': 'Mind Field',
1555                 'season_number': 1,
1556                 'episode_number': 1,
1557             },
1558             'params': {
1559                 'skip_download': True,
1560             },
1561             'expected_warnings': [
1562                 'Skipping DASH manifest',
1563             ],
1564         },
1565         {
1566             # The following content has been identified by the YouTube community
1567             # as inappropriate or offensive to some audiences.
1568             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1569             'info_dict': {
1570                 'id': '6SJNVb0GnPI',
1571                 'ext': 'mp4',
1572                 'title': 'Race Differences in Intelligence',
1573                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1574                 'duration': 965,
1575                 'upload_date': '20140124',
1576                 'uploader': 'New Century Foundation',
1577                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1578                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1579             },
1580             'params': {
1581                 'skip_download': True,
1582             },
1583             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1584         },
1585         {
1586             # itag 212
1587             'url': '1t24XAntNCY',
1588             'only_matching': True,
1589         },
1590         {
1591             # geo restricted to JP
1592             'url': 'sJL6WA-aGkQ',
1593             'only_matching': True,
1594         },
1595         {
1596             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1597             'only_matching': True,
1598         },
1599         {
1600             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1601             'only_matching': True,
1602         },
1603         {
1604             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1605             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1606             'only_matching': True,
1607         },
1608         {
1609             # DRM protected
1610             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1611             'only_matching': True,
1612         },
1613         {
1614             # Video with unsupported adaptive stream type formats
1615             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1616             'info_dict': {
1617                 'id': 'Z4Vy8R84T1U',
1618                 'ext': 'mp4',
1619                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1620                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1621                 'duration': 433,
1622                 'upload_date': '20130923',
1623                 'uploader': 'Amelia Putri Harwita',
1624                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1625                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1626                 'formats': 'maxcount:10',
1627             },
1628             'params': {
1629                 'skip_download': True,
1630                 'youtube_include_dash_manifest': False,
1631             },
1632             'skip': 'not actual anymore',
1633         },
1634         {
1635             # Youtube Music Auto-generated description
1636             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1637             'info_dict': {
1638                 'id': 'MgNrAu2pzNs',
1639                 'ext': 'mp4',
1640                 'title': 'Voyeur Girl',
1641                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1642                 'upload_date': '20190312',
1643                 'uploader': 'Stephen - Topic',
1644                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1645                 'artist': 'Stephen',
1646                 'track': 'Voyeur Girl',
1647                 'album': 'it\'s too much love to know my dear',
1648                 'release_date': '20190313',
1649                 'release_year': 2019,
1650             },
1651             'params': {
1652                 'skip_download': True,
1653             },
1654         },
1655         {
1656             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1657             'only_matching': True,
1658         },
1659         {
1660             # invalid -> valid video id redirection
1661             'url': 'DJztXj2GPfl',
1662             'info_dict': {
1663                 'id': 'DJztXj2GPfk',
1664                 'ext': 'mp4',
1665                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1666                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1667                 'upload_date': '20090125',
1668                 'uploader': 'Prochorowka',
1669                 'uploader_id': 'Prochorowka',
1670                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1671                 'artist': 'Panjabi MC',
1672                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1673                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1674             },
1675             'params': {
1676                 'skip_download': True,
1677             },
1678             'skip': 'Video unavailable',
1679         },
1680         {
1681             # empty description results in an empty string
1682             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1683             'info_dict': {
1684                 'id': 'x41yOUIvK2k',
1685                 'ext': 'mp4',
1686                 'title': 'IMG 3456',
1687                 'description': '',
1688                 'upload_date': '20170613',
1689                 'uploader_id': 'ElevageOrVert',
1690                 'uploader': 'ElevageOrVert',
1691             },
1692             'params': {
1693                 'skip_download': True,
1694             },
1695         },
1696         {
1697             # with '};' inside yt initial data (see [1])
1698             # see [2] for an example with '};' inside ytInitialPlayerResponse
1699             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1700             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1701             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1702             'info_dict': {
1703                 'id': 'CHqg6qOn4no',
1704                 'ext': 'mp4',
1705                 'title': 'Part 77   Sort a list of simple types in c#',
1706                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1707                 'upload_date': '20130831',
1708                 'uploader_id': 'kudvenkat',
1709                 'uploader': 'kudvenkat',
1710             },
1711             'params': {
1712                 'skip_download': True,
1713             },
1714         },
1715         {
1716             # another example of '};' in ytInitialData
1717             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1718             'only_matching': True,
1719         },
1720         {
1721             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1722             'only_matching': True,
1723         },
1724         {
1725             # https://github.com/ytdl-org/youtube-dl/pull/28094
1726             'url': 'OtqTfy26tG0',
1727             'info_dict': {
1728                 'id': 'OtqTfy26tG0',
1729                 'ext': 'mp4',
1730                 'title': 'Burn Out',
1731                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1732                 'upload_date': '20141120',
1733                 'uploader': 'The Cinematic Orchestra - Topic',
1734                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1735                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1736                 'artist': 'The Cinematic Orchestra',
1737                 'track': 'Burn Out',
1738                 'album': 'Every Day',
1739                 'release_data': None,
1740                 'release_year': None,
1741             },
1742             'params': {
1743                 'skip_download': True,
1744             },
1745         },
1746         {
1747             # controversial video, only works with bpctr when authenticated with cookies
1748             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1749             'only_matching': True,
1750         },
1751         {
1752             # controversial video, requires bpctr/contentCheckOk
1753             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1754             'info_dict': {
1755                 'id': 'SZJvDhaSDnc',
1756                 'ext': 'mp4',
1757                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1758                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1759                 'uploader': 'CBS This Morning',
1760                 'uploader_id': 'CBSThisMorning',
1761                 'upload_date': '20140716',
1762                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1763             }
1764         },
1765         {
1766             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1767             'url': 'cBvYw8_A0vQ',
1768             'info_dict': {
1769                 'id': 'cBvYw8_A0vQ',
1770                 'ext': 'mp4',
1771                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1772                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1773                 'upload_date': '20201120',
1774                 'uploader': 'Walk around Japan',
1775                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1776                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1777             },
1778             'params': {
1779                 'skip_download': True,
1780             },
1781         }, {
1782             # Has multiple audio streams
1783             'url': 'WaOKSUlf4TM',
1784             'only_matching': True
1785         }, {
1786             # Requires Premium: has format 141 when requested using YTM url
1787             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1788             'only_matching': True
1789         }, {
1790             # multiple subtitles with same lang_code
1791             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1792             'only_matching': True,
1793         }, {
1794             # Force use android client fallback
1795             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1796             'info_dict': {
1797                 'id': 'YOelRv7fMxY',
1798                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1799                 'ext': '3gp',
1800                 'upload_date': '20210624',
1801                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1802                 'uploader': 'colinfurze',
1803                 'uploader_id': 'colinfurze',
1804                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1805                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1806             },
1807             'params': {
1808                 'format': '17',  # 3gp format available on android
1809                 'extractor_args': {'youtube': {'player_client': ['android']}},
1810             },
1811         },
1812         {
1813             # Skip download of additional client configs (remix client config in this case)
1814             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1815             'only_matching': True,
1816             'params': {
1817                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1818             },
1819         }
1820     ]
1821
1822     @classmethod
1823     def suitable(cls, url):
1824         # Hack for lazy extractors until more generic solution is implemented
1825         # (see #28780)
1826         from .youtube import parse_qs
1827         qs = parse_qs(url)
1828         if qs.get('list', [None])[0]:
1829             return False
1830         return super(YoutubeIE, cls).suitable(url)
1831
1832     def __init__(self, *args, **kwargs):
1833         super(YoutubeIE, self).__init__(*args, **kwargs)
1834         self._code_cache = {}
1835         self._player_cache = {}
1836
1837     def _extract_player_url(self, ytcfg=None, webpage=None):
1838         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1839         if not player_url and webpage:
1840             player_url = self._search_regex(
1841                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1842                 webpage, 'player URL', fatal=False)
1843         if not player_url:
1844             return None
1845         if player_url.startswith('//'):
1846             player_url = 'https:' + player_url
1847         elif not re.match(r'https?://', player_url):
1848             player_url = compat_urlparse.urljoin(
1849                 'https://www.youtube.com', player_url)
1850         return player_url
1851
1852     def _signature_cache_id(self, example_sig):
1853         """ Return a string representation of a signature """
1854         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1855
1856     @classmethod
1857     def _extract_player_info(cls, player_url):
1858         for player_re in cls._PLAYER_INFO_RE:
1859             id_m = re.search(player_re, player_url)
1860             if id_m:
1861                 break
1862         else:
1863             raise ExtractorError('Cannot identify player %r' % player_url)
1864         return id_m.group('id')
1865
1866     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1867         player_id = self._extract_player_info(player_url)
1868         if player_id not in self._code_cache:
1869             self._code_cache[player_id] = self._download_webpage(
1870                 player_url, video_id, fatal=fatal,
1871                 note='Downloading player ' + player_id,
1872                 errnote='Download of %s failed' % player_url)
1873         return player_id in self._code_cache
1874
1875     def _extract_signature_function(self, video_id, player_url, example_sig):
1876         player_id = self._extract_player_info(player_url)
1877
1878         # Read from filesystem cache
1879         func_id = 'js_%s_%s' % (
1880             player_id, self._signature_cache_id(example_sig))
1881         assert os.path.basename(func_id) == func_id
1882
1883         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1884         if cache_spec is not None:
1885             return lambda s: ''.join(s[i] for i in cache_spec)
1886
1887         if self._load_player(video_id, player_url):
1888             code = self._code_cache[player_id]
1889             res = self._parse_sig_js(code)
1890
1891             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1892             cache_res = res(test_string)
1893             cache_spec = [ord(c) for c in cache_res]
1894
1895             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1896             return res
1897
1898     def _print_sig_code(self, func, example_sig):
1899         def gen_sig_code(idxs):
1900             def _genslice(start, end, step):
1901                 starts = '' if start == 0 else str(start)
1902                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1903                 steps = '' if step == 1 else (':%d' % step)
1904                 return 's[%s%s%s]' % (starts, ends, steps)
1905
1906             step = None
1907             # Quelch pyflakes warnings - start will be set when step is set
1908             start = '(Never used)'
1909             for i, prev in zip(idxs[1:], idxs[:-1]):
1910                 if step is not None:
1911                     if i - prev == step:
1912                         continue
1913                     yield _genslice(start, prev, step)
1914                     step = None
1915                     continue
1916                 if i - prev in [-1, 1]:
1917                     step = i - prev
1918                     start = prev
1919                     continue
1920                 else:
1921                     yield 's[%d]' % prev
1922             if step is None:
1923                 yield 's[%d]' % i
1924             else:
1925                 yield _genslice(start, i, step)
1926
1927         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1928         cache_res = func(test_string)
1929         cache_spec = [ord(c) for c in cache_res]
1930         expr_code = ' + '.join(gen_sig_code(cache_spec))
1931         signature_id_tuple = '(%s)' % (
1932             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1933         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1934                 '    return %s\n') % (signature_id_tuple, expr_code)
1935         self.to_screen('Extracted signature function:\n' + code)
1936
1937     def _parse_sig_js(self, jscode):
1938         funcname = self._search_regex(
1939             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1940              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1941              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1942              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1943              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1944              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1945              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1946              # Obsolete patterns
1947              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1948              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1949              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1950              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1951              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1952              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1953              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1954              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1955             jscode, 'Initial JS player signature function name', group='sig')
1956
1957         jsi = JSInterpreter(jscode)
1958         initial_function = jsi.extract_function(funcname)
1959         return lambda s: initial_function([s])
1960
1961     def _decrypt_signature(self, s, video_id, player_url):
1962         """Turn the encrypted s field into a working signature"""
1963
1964         if player_url is None:
1965             raise ExtractorError('Cannot decrypt signature without player_url')
1966
1967         try:
1968             player_id = (player_url, self._signature_cache_id(s))
1969             if player_id not in self._player_cache:
1970                 func = self._extract_signature_function(
1971                     video_id, player_url, s
1972                 )
1973                 self._player_cache[player_id] = func
1974             func = self._player_cache[player_id]
1975             if self.get_param('youtube_print_sig_code'):
1976                 self._print_sig_code(func, s)
1977             return func(s)
1978         except Exception as e:
1979             tb = traceback.format_exc()
1980             raise ExtractorError(
1981                 'Signature extraction failed: ' + tb, cause=e)
1982
1983     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1984         """
1985         Extract signatureTimestamp (sts)
1986         Required to tell API what sig/player version is in use.
1987         """
1988         sts = None
1989         if isinstance(ytcfg, dict):
1990             sts = int_or_none(ytcfg.get('STS'))
1991
1992         if not sts:
1993             # Attempt to extract from player
1994             if player_url is None:
1995                 error_msg = 'Cannot extract signature timestamp without player_url.'
1996                 if fatal:
1997                     raise ExtractorError(error_msg)
1998                 self.report_warning(error_msg)
1999                 return
2000             if self._load_player(video_id, player_url, fatal=fatal):
2001                 player_id = self._extract_player_info(player_url)
2002                 code = self._code_cache[player_id]
2003                 sts = int_or_none(self._search_regex(
2004                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
2005                     'JS player signature timestamp', group='sts', fatal=fatal))
2006         return sts
2007
2008     def _mark_watched(self, video_id, player_responses):
2009         playback_url = traverse_obj(
2010             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
2011             expected_type=url_or_none, get_all=False)
2012         if not playback_url:
2013             self.report_warning('Unable to mark watched')
2014             return
2015         parsed_playback_url = compat_urlparse.urlparse(playback_url)
2016         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
2017
2018         # cpn generation algorithm is reverse engineered from base.js.
2019         # In fact it works even with dummy cpn.
2020         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
2021         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
2022
2023         qs.update({
2024             'ver': ['2'],
2025             'cpn': [cpn],
2026         })
2027         playback_url = compat_urlparse.urlunparse(
2028             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2029
2030         self._download_webpage(
2031             playback_url, video_id, 'Marking watched',
2032             'Unable to mark watched', fatal=False)
2033
2034     @staticmethod
2035     def _extract_urls(webpage):
2036         # Embedded YouTube player
2037         entries = [
2038             unescapeHTML(mobj.group('url'))
2039             for mobj in re.finditer(r'''(?x)
2040             (?:
2041                 <iframe[^>]+?src=|
2042                 data-video-url=|
2043                 <embed[^>]+?src=|
2044                 embedSWF\(?:\s*|
2045                 <object[^>]+data=|
2046                 new\s+SWFObject\(
2047             )
2048             (["\'])
2049                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2050                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2051             \1''', webpage)]
2052
2053         # lazyYT YouTube embed
2054         entries.extend(list(map(
2055             unescapeHTML,
2056             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2057
2058         # Wordpress "YouTube Video Importer" plugin
2059         matches = re.findall(r'''(?x)<div[^>]+
2060             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2061             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2062         entries.extend(m[-1] for m in matches)
2063
2064         return entries
2065
2066     @staticmethod
2067     def _extract_url(webpage):
2068         urls = YoutubeIE._extract_urls(webpage)
2069         return urls[0] if urls else None
2070
2071     @classmethod
2072     def extract_id(cls, url):
2073         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2074         if mobj is None:
2075             raise ExtractorError('Invalid URL: %s' % url)
2076         video_id = mobj.group(2)
2077         return video_id
2078
2079     def _extract_chapters_from_json(self, data, duration):
2080         chapter_list = traverse_obj(
2081             data, (
2082                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2083                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2084             ), expected_type=list)
2085
2086         return self._extract_chapters(
2087             chapter_list,
2088             chapter_time=lambda chapter: float_or_none(
2089                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2090             chapter_title=lambda chapter: traverse_obj(
2091                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2092             duration=duration)
2093
2094     def _extract_chapters_from_engagement_panel(self, data, duration):
2095         content_list = traverse_obj(
2096             data,
2097             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2098             expected_type=list, default=[])
2099         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2100         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2101
2102         return next((
2103             filter(None, (
2104                 self._extract_chapters(
2105                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2106                     chapter_time, chapter_title, duration)
2107                 for contents in content_list
2108             ))), [])
2109
2110     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2111         chapters = []
2112         last_chapter = {'start_time': 0}
2113         for idx, chapter in enumerate(chapter_list or []):
2114             title = chapter_title(chapter)
2115             start_time = chapter_time(chapter)
2116             if start_time is None:
2117                 continue
2118             last_chapter['end_time'] = start_time
2119             if start_time < last_chapter['start_time']:
2120                 if idx == 1:
2121                     chapters.pop()
2122                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2123                 else:
2124                     self.report_warning(f'Invalid start time for chapter "{title}"')
2125                     continue
2126             last_chapter = {'start_time': start_time, 'title': title}
2127             chapters.append(last_chapter)
2128         last_chapter['end_time'] = duration
2129         return chapters
2130
2131     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2132         return self._parse_json(self._search_regex(
2133             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2134              regex), webpage, name, default='{}'), video_id, fatal=False)
2135
2136     @staticmethod
2137     def parse_time_text(time_text):
2138         """
2139         Parse the comment time text
2140         time_text is in the format 'X units ago (edited)'
2141         """
2142         time_text_split = time_text.split(' ')
2143         if len(time_text_split) >= 3:
2144             try:
2145                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2146             except ValueError:
2147                 return None
2148
2149     def _extract_comment(self, comment_renderer, parent=None):
2150         comment_id = comment_renderer.get('commentId')
2151         if not comment_id:
2152             return
2153
2154         text = self._get_text(comment_renderer, 'contentText')
2155
2156         # note: timestamp is an estimate calculated from the current time and time_text
2157         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2158         time_text_dt = self.parse_time_text(time_text)
2159         if isinstance(time_text_dt, datetime.datetime):
2160             timestamp = calendar.timegm(time_text_dt.timetuple())
2161         author = self._get_text(comment_renderer, 'authorText')
2162         author_id = try_get(comment_renderer,
2163                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2164
2165         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2166                                                        lambda x: x['likeCount']), compat_str)) or 0
2167         author_thumbnail = try_get(comment_renderer,
2168                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2169
2170         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2171         is_favorited = 'creatorHeart' in (try_get(
2172             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2173         return {
2174             'id': comment_id,
2175             'text': text,
2176             'timestamp': timestamp,
2177             'time_text': time_text,
2178             'like_count': votes,
2179             'is_favorited': is_favorited,
2180             'author': author,
2181             'author_id': author_id,
2182             'author_thumbnail': author_thumbnail,
2183             'author_is_uploader': author_is_uploader,
2184             'parent': parent or 'root'
2185         }
2186
2187     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2188                          ytcfg, video_id, parent=None, comment_counts=None):
2189
2190         def extract_header(contents):
2191             _total_comments = 0
2192             _continuation = None
2193             for content in contents:
2194                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2195                 expected_comment_count = parse_count(self._get_text(
2196                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2197
2198                 if expected_comment_count:
2199                     comment_counts[1] = expected_comment_count
2200                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2201                     _total_comments = comment_counts[1]
2202                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2203                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2204
2205                 sort_menu_item = try_get(
2206                     comments_header_renderer,
2207                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2208                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2209
2210                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2211                 if not _continuation:
2212                     continue
2213
2214                 sort_text = sort_menu_item.get('title')
2215                 if isinstance(sort_text, compat_str):
2216                     sort_text = sort_text.lower()
2217                 else:
2218                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2219                 self.to_screen('Sorting comments by %s' % sort_text)
2220                 break
2221             return _total_comments, _continuation
2222
2223         def extract_thread(contents):
2224             if not parent:
2225                 comment_counts[2] = 0
2226             for content in contents:
2227                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2228                 comment_renderer = try_get(
2229                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2230                     content, (lambda x: x['commentRenderer'], dict))
2231
2232                 if not comment_renderer:
2233                     continue
2234                 comment = self._extract_comment(comment_renderer, parent)
2235                 if not comment:
2236                     continue
2237                 comment_counts[0] += 1
2238                 yield comment
2239                 # Attempt to get the replies
2240                 comment_replies_renderer = try_get(
2241                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2242
2243                 if comment_replies_renderer:
2244                     comment_counts[2] += 1
2245                     comment_entries_iter = self._comment_entries(
2246                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2247                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2248
2249                     for reply_comment in comment_entries_iter:
2250                         yield reply_comment
2251
2252         # YouTube comments have a max depth of 2
2253         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2254         if max_depth == 1 and parent:
2255             return
2256         if not comment_counts:
2257             # comment so far, est. total comments, current comment thread #
2258             comment_counts = [0, 0, 0]
2259
2260         continuation = self._extract_continuation(root_continuation_data)
2261         if continuation and len(continuation['continuation']) < 27:
2262             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2263             continuation_token = self._generate_comment_continuation(video_id)
2264             continuation = self._build_api_continuation_query(continuation_token, None)
2265
2266         visitor_data = None
2267         is_first_continuation = parent is None
2268
2269         for page_num in itertools.count(0):
2270             if not continuation:
2271                 break
2272             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2273             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2274             if page_num == 0:
2275                 if is_first_continuation:
2276                     note_prefix = 'Downloading comment section API JSON'
2277                 else:
2278                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2279                         comment_counts[2], comment_prog_str)
2280             else:
2281                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2282                     '       ' if parent else '', ' replies' if parent else '',
2283                     page_num, comment_prog_str)
2284
2285             response = self._extract_response(
2286                 item_id=None, query=continuation,
2287                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2288                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2289             if not response:
2290                 break
2291             visitor_data = try_get(
2292                 response,
2293                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2294                 compat_str) or visitor_data
2295
2296             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2297
2298             continuation = None
2299             if isinstance(continuation_contents, list):
2300                 for continuation_section in continuation_contents:
2301                     if not isinstance(continuation_section, dict):
2302                         continue
2303                     continuation_items = try_get(
2304                         continuation_section,
2305                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2306                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2307                         list) or []
2308                     if is_first_continuation:
2309                         total_comments, continuation = extract_header(continuation_items)
2310                         if total_comments:
2311                             yield total_comments
2312                         is_first_continuation = False
2313                         if continuation:
2314                             break
2315                         continue
2316                     count = 0
2317                     for count, entry in enumerate(extract_thread(continuation_items)):
2318                         yield entry
2319                     continuation = self._extract_continuation({'contents': continuation_items})
2320                     if continuation:
2321                         # Sometimes YouTube provides a continuation without any comments
2322                         # In most cases we end up just downloading these with very little comments to come.
2323                         if count == 0:
2324                             if not parent:
2325                                 self.report_warning('No comments received - assuming end of comments')
2326                             continuation = None
2327                         break
2328
2329             # Deprecated response structure
2330             elif isinstance(continuation_contents, dict):
2331                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2332                 for key, continuation_renderer in continuation_contents.items():
2333                     if key not in known_continuation_renderers:
2334                         continue
2335                     if not isinstance(continuation_renderer, dict):
2336                         continue
2337                     if is_first_continuation:
2338                         header_continuation_items = [continuation_renderer.get('header') or {}]
2339                         total_comments, continuation = extract_header(header_continuation_items)
2340                         if total_comments:
2341                             yield total_comments
2342                         is_first_continuation = False
2343                         if continuation:
2344                             break
2345
2346                     # Sometimes YouTube provides a continuation without any comments
2347                     # In most cases we end up just downloading these with very little comments to come.
2348                     count = 0
2349                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2350                         yield entry
2351                     continuation = self._extract_continuation(continuation_renderer)
2352                     if count == 0:
2353                         if not parent:
2354                             self.report_warning('No comments received - assuming end of comments')
2355                         continuation = None
2356                     break
2357
2358     @staticmethod
2359     def _generate_comment_continuation(video_id):
2360         """
2361         Generates initial comment section continuation token from given video id
2362         """
2363         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2364         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2365         new_continuation_intlist = list(itertools.chain.from_iterable(
2366             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2367         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2368
2369     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2370         """Entry for comment extraction"""
2371         def _real_comment_extract(contents):
2372             if isinstance(contents, list):
2373                 for entry in contents:
2374                     for key, renderer in entry.items():
2375                         if key not in known_entry_comment_renderers:
2376                             continue
2377                         yield from self._comment_entries(
2378                             renderer, video_id=video_id, ytcfg=ytcfg,
2379                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2380                             account_syncid=self._extract_account_syncid(ytcfg))
2381                         break
2382         comments = []
2383         known_entry_comment_renderers = ('itemSectionRenderer',)
2384         estimated_total = 0
2385         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2386         # Force English regardless of account setting to prevent parsing issues
2387         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2388         ytcfg = copy.deepcopy(ytcfg)
2389         traverse_obj(
2390             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2391         try:
2392             for comment in _real_comment_extract(contents):
2393                 if len(comments) >= max_comments:
2394                     break
2395                 if isinstance(comment, int):
2396                     estimated_total = comment
2397                     continue
2398                 comments.append(comment)
2399         except KeyboardInterrupt:
2400             self.to_screen('Interrupted by user')
2401         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2402         return {
2403             'comments': comments,
2404             'comment_count': len(comments),
2405         }
2406
2407     @staticmethod
2408     def _generate_player_context(sts=None):
2409         context = {
2410             'html5Preference': 'HTML5_PREF_WANTS',
2411         }
2412         if sts is not None:
2413             context['signatureTimestamp'] = sts
2414         return {
2415             'playbackContext': {
2416                 'contentPlaybackContext': context
2417             },
2418             'contentCheckOk': True,
2419             'racyCheckOk': True
2420         }
2421
2422     @staticmethod
2423     def _is_agegated(player_response):
2424         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2425             return True
2426
2427         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2428         AGE_GATE_REASONS = (
2429             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2430             'age_verification_required', 'age_check_required',  # status
2431         )
2432         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2433
2434     @staticmethod
2435     def _is_unplayable(player_response):
2436         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2437
2438     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2439
2440         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2441         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2442         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2443         headers = self.generate_api_headers(
2444             player_ytcfg, identity_token, syncid,
2445             default_client=client, session_index=session_index)
2446
2447         yt_query = {'videoId': video_id}
2448         yt_query.update(self._generate_player_context(sts))
2449         return self._extract_response(
2450             item_id=video_id, ep='player', query=yt_query,
2451             ytcfg=player_ytcfg, headers=headers, fatal=False,
2452             default_client=client,
2453             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2454         ) or None
2455
2456     def _get_requested_clients(self, url, smuggled_data):
2457         requested_clients = []
2458         allowed_clients = sorted(
2459             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2460             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2461         for client in self._configuration_arg('player_client'):
2462             if client in allowed_clients:
2463                 requested_clients.append(client)
2464             elif client == 'all':
2465                 requested_clients.extend(allowed_clients)
2466             else:
2467                 self.report_warning(f'Skipping unsupported client {client}')
2468         if not requested_clients:
2469             requested_clients = ['android', 'web']
2470
2471         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2472             requested_clients.extend(
2473                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2474
2475         return orderedSet(requested_clients)
2476
2477     def _extract_player_ytcfg(self, client, video_id):
2478         url = {
2479             'web_music': 'https://music.youtube.com',
2480             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2481         }.get(client)
2482         if not url:
2483             return {}
2484         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2485         return self.extract_ytcfg(video_id, webpage) or {}
2486
2487     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2488         initial_pr = None
2489         if webpage:
2490             initial_pr = self._extract_yt_initial_variable(
2491                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2492                 video_id, 'initial player response')
2493
2494         original_clients = clients
2495         clients = clients[::-1]
2496
2497         def append_client(client_name):
2498             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2499                 clients.append(client_name)
2500
2501         while clients:
2502             client = clients.pop()
2503             player_ytcfg = master_ytcfg if client == 'web' else {}
2504             if 'configs' not in self._configuration_arg('player_skip'):
2505                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2506
2507             pr = (
2508                 initial_pr if client == 'web' and initial_pr
2509                 else self._extract_player_response(
2510                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
2511             if pr:
2512                 yield pr
2513
2514             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2515             if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header():
2516                 append_client(client.replace('_agegate', '_creator'))
2517             elif self._is_agegated(pr):
2518                 append_client(f'{client}_agegate')
2519
2520         # Android player_response does not have microFormats which are needed for
2521         # extraction of some data. So we return the initial_pr with formats
2522         # stripped out even if not requested by the user
2523         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2524         if initial_pr and 'web' not in original_clients:
2525             initial_pr['streamingData'] = None
2526             yield initial_pr
2527
2528     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2529         itags, stream_ids = [], []
2530         itag_qualities, res_qualities = {}, {}
2531         q = qualities([
2532             # Normally tiny is the smallest video-only formats. But
2533             # audio-only formats with unknown quality may get tagged as tiny
2534             'tiny',
2535             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2536             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2537         ])
2538         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2539
2540         for fmt in streaming_formats:
2541             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2542                 continue
2543
2544             itag = str_or_none(fmt.get('itag'))
2545             audio_track = fmt.get('audioTrack') or {}
2546             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2547             if stream_id in stream_ids:
2548                 continue
2549
2550             quality = fmt.get('quality')
2551             height = int_or_none(fmt.get('height'))
2552             if quality == 'tiny' or not quality:
2553                 quality = fmt.get('audioQuality', '').lower() or quality
2554             # The 3gp format (17) in android client has a quality of "small",
2555             # but is actually worse than other formats
2556             if itag == '17':
2557                 quality = 'tiny'
2558             if quality:
2559                 if itag:
2560                     itag_qualities[itag] = quality
2561                 if height:
2562                     res_qualities[height] = quality
2563             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2564             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2565             # number of fragment that would subsequently requested with (`&sq=N`)
2566             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2567                 continue
2568
2569             fmt_url = fmt.get('url')
2570             if not fmt_url:
2571                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2572                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2573                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2574                 if not (sc and fmt_url and encrypted_sig):
2575                     continue
2576                 if not player_url:
2577                     continue
2578                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2579                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2580                 fmt_url += '&' + sp + '=' + signature
2581
2582             if itag:
2583                 itags.append(itag)
2584                 stream_ids.append(stream_id)
2585
2586             tbr = float_or_none(
2587                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2588             dct = {
2589                 'asr': int_or_none(fmt.get('audioSampleRate')),
2590                 'filesize': int_or_none(fmt.get('contentLength')),
2591                 'format_id': itag,
2592                 'format_note': ', '.join(filter(None, (
2593                     audio_track.get('displayName'),
2594                     fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2595                 'fps': int_or_none(fmt.get('fps')),
2596                 'height': height,
2597                 'quality': q(quality),
2598                 'tbr': tbr,
2599                 'url': fmt_url,
2600                 'width': int_or_none(fmt.get('width')),
2601                 'language': audio_track.get('id', '').split('.')[0],
2602             }
2603             mime_mobj = re.match(
2604                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2605             if mime_mobj:
2606                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2607                 dct.update(parse_codecs(mime_mobj.group(2)))
2608             no_audio = dct.get('acodec') == 'none'
2609             no_video = dct.get('vcodec') == 'none'
2610             if no_audio:
2611                 dct['vbr'] = tbr
2612             if no_video:
2613                 dct['abr'] = tbr
2614             if no_audio or no_video:
2615                 dct['downloader_options'] = {
2616                     # Youtube throttles chunks >~10M
2617                     'http_chunk_size': 10485760,
2618                 }
2619                 if dct.get('ext'):
2620                     dct['container'] = dct['ext'] + '_dash'
2621             yield dct
2622
2623         skip_manifests = self._configuration_arg('skip')
2624         get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2625         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2626
2627         def guess_quality(f):
2628             for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2629                 if val in qdict:
2630                     return q(qdict[val])
2631             return -1
2632
2633         for sd in streaming_data:
2634             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2635             if hls_manifest_url:
2636                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2637                     itag = self._search_regex(
2638                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2639                     if itag in itags:
2640                         continue
2641                     if itag:
2642                         f['format_id'] = itag
2643                         itags.append(itag)
2644                     f['quality'] = guess_quality(f)
2645                     yield f
2646
2647             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2648             if dash_manifest_url:
2649                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2650                     itag = f['format_id']
2651                     if itag in itags:
2652                         continue
2653                     if itag:
2654                         itags.append(itag)
2655                     f['quality'] = guess_quality(f)
2656                     filesize = int_or_none(self._search_regex(
2657                         r'/clen/(\d+)', f.get('fragment_base_url')
2658                         or f['url'], 'file size', default=None))
2659                     if filesize:
2660                         f['filesize'] = filesize
2661                     yield f
2662
2663     def _real_extract(self, url):
2664         url, smuggled_data = unsmuggle_url(url, {})
2665         video_id = self._match_id(url)
2666
2667         base_url = self.http_scheme() + '//www.youtube.com/'
2668         webpage_url = base_url + 'watch?v=' + video_id
2669         webpage = self._download_webpage(
2670             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2671
2672         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2673         player_url = self._extract_player_url(master_ytcfg, webpage)
2674         identity_token = self._extract_identity_token(webpage, video_id)
2675
2676         player_responses = list(self._extract_player_responses(
2677             self._get_requested_clients(url, smuggled_data),
2678             video_id, webpage, master_ytcfg, player_url, identity_token))
2679
2680         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2681
2682         playability_statuses = traverse_obj(
2683             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2684
2685         trailer_video_id = get_first(
2686             playability_statuses,
2687             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2688             expected_type=str)
2689         if trailer_video_id:
2690             return self.url_result(
2691                 trailer_video_id, self.ie_key(), trailer_video_id)
2692
2693         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2694                        if webpage else (lambda x: None))
2695
2696         video_details = traverse_obj(
2697             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2698         microformats = traverse_obj(
2699             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2700             expected_type=dict, default=[])
2701         video_title = (
2702             get_first(video_details, 'title')
2703             or self._get_text(microformats, (..., 'title'))
2704             or search_meta(['og:title', 'twitter:title', 'title']))
2705         video_description = get_first(video_details, 'shortDescription')
2706
2707         if not smuggled_data.get('force_singlefeed', False):
2708             if not self.get_param('noplaylist'):
2709                 multifeed_metadata_list = get_first(
2710                     player_responses,
2711                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2712                     expected_type=str)
2713                 if multifeed_metadata_list:
2714                     entries = []
2715                     feed_ids = []
2716                     for feed in multifeed_metadata_list.split(','):
2717                         # Unquote should take place before split on comma (,) since textual
2718                         # fields may contain comma as well (see
2719                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2720                         feed_data = compat_parse_qs(
2721                             compat_urllib_parse_unquote_plus(feed))
2722
2723                         def feed_entry(name):
2724                             return try_get(
2725                                 feed_data, lambda x: x[name][0], compat_str)
2726
2727                         feed_id = feed_entry('id')
2728                         if not feed_id:
2729                             continue
2730                         feed_title = feed_entry('title')
2731                         title = video_title
2732                         if feed_title:
2733                             title += ' (%s)' % feed_title
2734                         entries.append({
2735                             '_type': 'url_transparent',
2736                             'ie_key': 'Youtube',
2737                             'url': smuggle_url(
2738                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2739                                 {'force_singlefeed': True}),
2740                             'title': title,
2741                         })
2742                         feed_ids.append(feed_id)
2743                     self.to_screen(
2744                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2745                         % (', '.join(feed_ids), video_id))
2746                     return self.playlist_result(
2747                         entries, video_id, video_title, video_description)
2748             else:
2749                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2750
2751         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2752         is_live = get_first(video_details, 'isLive')
2753         if is_live is None:
2754             is_live = get_first(live_broadcast_details, 'isLiveNow')
2755
2756         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2757         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2758
2759         if not formats:
2760             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2761                 self.raise_no_formats(
2762                     'This video is DRM protected.', expected=True)
2763             pemr = get_first(
2764                 playability_statuses,
2765                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2766             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2767             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2768             if subreason:
2769                 if subreason == 'The uploader has not made this video available in your country.':
2770                     countries = get_first(microformats, 'availableCountries')
2771                     if not countries:
2772                         regions_allowed = search_meta('regionsAllowed')
2773                         countries = regions_allowed.split(',') if regions_allowed else None
2774                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2775                 reason += f'. {subreason}'
2776             if reason:
2777                 self.raise_no_formats(reason, expected=True)
2778
2779         for f in formats:
2780             if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']:  # throttled
2781                 f['source_preference'] = -10
2782                 # TODO: this method is not reliable
2783                 f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
2784
2785         # Source is given priority since formats that throttle are given lower source_preference
2786         # When throttling issue is fully fixed, remove this
2787         self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
2788
2789         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2790         if not keywords and webpage:
2791             keywords = [
2792                 unescapeHTML(m.group('content'))
2793                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2794         for keyword in keywords:
2795             if keyword.startswith('yt:stretch='):
2796                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2797                 if mobj:
2798                     # NB: float is intentional for forcing float division
2799                     w, h = (float(v) for v in mobj.groups())
2800                     if w > 0 and h > 0:
2801                         ratio = w / h
2802                         for f in formats:
2803                             if f.get('vcodec') != 'none':
2804                                 f['stretched_ratio'] = ratio
2805                         break
2806
2807         thumbnails = []
2808         thumbnail_dicts = traverse_obj(
2809             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2810             expected_type=dict, default=[])
2811         for thumbnail in thumbnail_dicts:
2812             thumbnail_url = thumbnail.get('url')
2813             if not thumbnail_url:
2814                 continue
2815             # Sometimes youtube gives a wrong thumbnail URL. See:
2816             # https://github.com/yt-dlp/yt-dlp/issues/233
2817             # https://github.com/ytdl-org/youtube-dl/issues/28023
2818             if 'maxresdefault' in thumbnail_url:
2819                 thumbnail_url = thumbnail_url.split('?')[0]
2820             thumbnails.append({
2821                 'url': thumbnail_url,
2822                 'height': int_or_none(thumbnail.get('height')),
2823                 'width': int_or_none(thumbnail.get('width')),
2824             })
2825         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2826         if thumbnail_url:
2827             thumbnails.append({
2828                 'url': thumbnail_url,
2829             })
2830         # The best resolution thumbnails sometimes does not appear in the webpage
2831         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2832         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2833         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2834         # TODO: Test them also? - For some videos, even these don't exist
2835         guaranteed_thumbnail_names = [
2836             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2837             'mqdefault', 'mq1', 'mq2', 'mq3',
2838             'default', '1', '2', '3'
2839         ]
2840         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2841         n_thumbnail_names = len(thumbnail_names)
2842
2843         thumbnails.extend({
2844             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2845                 video_id=video_id, name=name, ext=ext,
2846                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2847             '_test_url': name in hq_thumbnail_names,
2848         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2849         for thumb in thumbnails:
2850             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2851             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2852         self._remove_duplicate_formats(thumbnails)
2853
2854         category = get_first(microformats, 'category') or search_meta('genre')
2855         channel_id = str_or_none(
2856             get_first(video_details, 'channelId')
2857             or get_first(microformats, 'externalChannelId')
2858             or search_meta('channelId'))
2859         duration = int_or_none(
2860             get_first(video_details, 'lengthSeconds')
2861             or get_first(microformats, 'lengthSeconds')
2862             or parse_duration(search_meta('duration'))) or None
2863         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2864
2865         live_content = get_first(video_details, 'isLiveContent')
2866         is_upcoming = get_first(video_details, 'isUpcoming')
2867         if is_live is None:
2868             if is_upcoming or live_content is False:
2869                 is_live = False
2870         if is_upcoming is None and (live_content or is_live):
2871             is_upcoming = False
2872         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2873         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2874         if not duration and live_endtime and live_starttime:
2875             duration = live_endtime - live_starttime
2876
2877         info = {
2878             'id': video_id,
2879             'title': self._live_title(video_title) if is_live else video_title,
2880             'formats': formats,
2881             'thumbnails': thumbnails,
2882             'description': video_description,
2883             'upload_date': unified_strdate(
2884                 get_first(microformats, 'uploadDate')
2885                 or search_meta('uploadDate')),
2886             'uploader': get_first(video_details, 'author'),
2887             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2888             'uploader_url': owner_profile_url,
2889             'channel_id': channel_id,
2890             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2891             'duration': duration,
2892             'view_count': int_or_none(
2893                 get_first((video_details, microformats), (..., 'viewCount'))
2894                 or search_meta('interactionCount')),
2895             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2896             'age_limit': 18 if (
2897                 get_first(microformats, 'isFamilySafe') is False
2898                 or search_meta('isFamilyFriendly') == 'false'
2899                 or search_meta('og:restrictions:age') == '18+') else 0,
2900             'webpage_url': webpage_url,
2901             'categories': [category] if category else None,
2902             'tags': keywords,
2903             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2904             'is_live': is_live,
2905             'was_live': (False if is_live or is_upcoming or live_content is False
2906                          else None if is_live is None or is_upcoming is None
2907                          else live_content),
2908             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2909             'release_timestamp': live_starttime,
2910         }
2911
2912         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2913         # Converted into dicts to remove duplicates
2914         captions = {
2915             sub.get('baseUrl'): sub
2916             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2917         translation_languages = {
2918             lang.get('languageCode'): lang.get('languageName')
2919             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2920         subtitles = {}
2921         if pctr:
2922             def process_language(container, base_url, lang_code, sub_name, query):
2923                 lang_subs = container.setdefault(lang_code, [])
2924                 for fmt in self._SUBTITLE_FORMATS:
2925                     query.update({
2926                         'fmt': fmt,
2927                     })
2928                     lang_subs.append({
2929                         'ext': fmt,
2930                         'url': update_url_query(base_url, query),
2931                         'name': sub_name,
2932                     })
2933
2934             for base_url, caption_track in captions.items():
2935                 if not base_url:
2936                     continue
2937                 if caption_track.get('kind') != 'asr':
2938                     lang_code = (
2939                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2940                         or caption_track.get('languageCode'))
2941                     if not lang_code:
2942                         continue
2943                     process_language(
2944                         subtitles, base_url, lang_code,
2945                         traverse_obj(caption_track, ('name', 'simpleText')),
2946                         {})
2947                     continue
2948                 automatic_captions = {}
2949                 for trans_code, trans_name in translation_languages.items():
2950                     if not trans_code:
2951                         continue
2952                     process_language(
2953                         automatic_captions, base_url, trans_code,
2954                         self._get_text(trans_name, max_runs=1),
2955                         {'tlang': trans_code})
2956                 info['automatic_captions'] = automatic_captions
2957         info['subtitles'] = subtitles
2958
2959         parsed_url = compat_urllib_parse_urlparse(url)
2960         for component in [parsed_url.fragment, parsed_url.query]:
2961             query = compat_parse_qs(component)
2962             for k, v in query.items():
2963                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2964                     d_k += '_time'
2965                     if d_k not in info and k in s_ks:
2966                         info[d_k] = parse_duration(query[k][0])
2967
2968         # Youtube Music Auto-generated description
2969         if video_description:
2970             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2971             if mobj:
2972                 release_year = mobj.group('release_year')
2973                 release_date = mobj.group('release_date')
2974                 if release_date:
2975                     release_date = release_date.replace('-', '')
2976                     if not release_year:
2977                         release_year = release_date[:4]
2978                 info.update({
2979                     'album': mobj.group('album'.strip()),
2980                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2981                     'track': mobj.group('track').strip(),
2982                     'release_date': release_date,
2983                     'release_year': int_or_none(release_year),
2984                 })
2985
2986         initial_data = None
2987         if webpage:
2988             initial_data = self._extract_yt_initial_variable(
2989                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2990                 'yt initial data')
2991         if not initial_data:
2992             headers = self.generate_api_headers(
2993                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2994                 session_index=self._extract_session_index(master_ytcfg))
2995
2996             initial_data = self._extract_response(
2997                 item_id=video_id, ep='next', fatal=False,
2998                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
2999                 note='Downloading initial data API JSON')
3000
3001         try:
3002             # This will error if there is no livechat
3003             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
3004             info['subtitles']['live_chat'] = [{
3005                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
3006                 'video_id': video_id,
3007                 'ext': 'json',
3008                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
3009             }]
3010         except (KeyError, IndexError, TypeError):
3011             pass
3012
3013         if initial_data:
3014             info['chapters'] = (
3015                 self._extract_chapters_from_json(initial_data, duration)
3016                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3017                 or None)
3018
3019             contents = try_get(
3020                 initial_data,
3021                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3022                 list) or []
3023             for content in contents:
3024                 vpir = content.get('videoPrimaryInfoRenderer')
3025                 if vpir:
3026                     stl = vpir.get('superTitleLink')
3027                     if stl:
3028                         stl = self._get_text(stl)
3029                         if try_get(
3030                                 vpir,
3031                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3032                             info['location'] = stl
3033                         else:
3034                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3035                             if mobj:
3036                                 info.update({
3037                                     'series': mobj.group(1),
3038                                     'season_number': int(mobj.group(2)),
3039                                     'episode_number': int(mobj.group(3)),
3040                                 })
3041                     for tlb in (try_get(
3042                             vpir,
3043                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3044                             list) or []):
3045                         tbr = tlb.get('toggleButtonRenderer') or {}
3046                         for getter, regex in [(
3047                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3048                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3049                                     lambda x: x['accessibility'],
3050                                     lambda x: x['accessibilityData']['accessibilityData'],
3051                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3052                             label = (try_get(tbr, getter, dict) or {}).get('label')
3053                             if label:
3054                                 mobj = re.match(regex, label)
3055                                 if mobj:
3056                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3057                                     break
3058                     sbr_tooltip = try_get(
3059                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3060                     if sbr_tooltip:
3061                         like_count, dislike_count = sbr_tooltip.split(' / ')
3062                         info.update({
3063                             'like_count': str_to_int(like_count),
3064                             'dislike_count': str_to_int(dislike_count),
3065                         })
3066                 vsir = content.get('videoSecondaryInfoRenderer')
3067                 if vsir:
3068                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3069                     rows = try_get(
3070                         vsir,
3071                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3072                         list) or []
3073                     multiple_songs = False
3074                     for row in rows:
3075                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3076                             multiple_songs = True
3077                             break
3078                     for row in rows:
3079                         mrr = row.get('metadataRowRenderer') or {}
3080                         mrr_title = mrr.get('title')
3081                         if not mrr_title:
3082                             continue
3083                         mrr_title = self._get_text(mrr, 'title')
3084                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3085                         if mrr_title == 'License':
3086                             info['license'] = mrr_contents_text
3087                         elif not multiple_songs:
3088                             if mrr_title == 'Album':
3089                                 info['album'] = mrr_contents_text
3090                             elif mrr_title == 'Artist':
3091                                 info['artist'] = mrr_contents_text
3092                             elif mrr_title == 'Song':
3093                                 info['track'] = mrr_contents_text
3094
3095         fallbacks = {
3096             'channel': 'uploader',
3097             'channel_id': 'uploader_id',
3098             'channel_url': 'uploader_url',
3099         }
3100         for to, frm in fallbacks.items():
3101             if not info.get(to):
3102                 info[to] = info.get(frm)
3103
3104         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3105             v = info.get(s_k)
3106             if v:
3107                 info[d_k] = v
3108
3109         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3110         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3111         is_membersonly = None
3112         is_premium = None
3113         if initial_data and is_private is not None:
3114             is_membersonly = False
3115             is_premium = False
3116             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3117             badge_labels = set()
3118             for content in contents:
3119                 if not isinstance(content, dict):
3120                     continue
3121                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3122             for badge_label in badge_labels:
3123                 if badge_label.lower() == 'members only':
3124                     is_membersonly = True
3125                 elif badge_label.lower() == 'premium':
3126                     is_premium = True
3127                 elif badge_label.lower() == 'unlisted':
3128                     is_unlisted = True
3129
3130         info['availability'] = self._availability(
3131             is_private=is_private,
3132             needs_premium=is_premium,
3133             needs_subscription=is_membersonly,
3134             needs_auth=info['age_limit'] >= 18,
3135             is_unlisted=None if is_private is None else is_unlisted)
3136
3137         # get xsrf for annotations or comments
3138         get_annotations = self.get_param('writeannotations', False)
3139         get_comments = self.get_param('getcomments', False)
3140         if get_annotations or get_comments:
3141             xsrf_token = None
3142             if master_ytcfg:
3143                 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
3144             if not xsrf_token:
3145                 xsrf_token = self._search_regex(
3146                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3147                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
3148
3149         # annotations
3150         if get_annotations:
3151             invideo_url = get_first(
3152                 player_responses,
3153                 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3154                 expected_type=str)
3155             if xsrf_token and invideo_url:
3156                 xsrf_field_name = None
3157                 if master_ytcfg:
3158                     xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3159                 if not xsrf_field_name:
3160                     xsrf_field_name = self._search_regex(
3161                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3162                         webpage, 'xsrf field name',
3163                         group='xsrf_field_name', default='session_token')
3164                 info['annotations'] = self._download_webpage(
3165                     self._proto_relative_url(invideo_url),
3166                     video_id, note='Downloading annotations',
3167                     errnote='Unable to download video annotations', fatal=False,
3168                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3169
3170         if get_comments:
3171             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3172
3173         self.mark_watched(video_id, player_responses)
3174
3175         return info
3176
3177
3178 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3179     IE_DESC = 'YouTube.com tab'
3180     _VALID_URL = r'''(?x)
3181                     https?://
3182                         (?:\w+\.)?
3183                         (?:
3184                             youtube(?:kids)?\.com|
3185                             invidio\.us
3186                         )/
3187                         (?:
3188                             (?P<channel_type>channel|c|user|browse)/|
3189                             (?P<not_channel>
3190                                 feed/|hashtag/|
3191                                 (?:playlist|watch)\?.*?\blist=
3192                             )|
3193                             (?!(?:%s)\b)  # Direct URLs
3194                         )
3195                         (?P<id>[^/?\#&]+)
3196                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3197     IE_NAME = 'youtube:tab'
3198
3199     _TESTS = [{
3200         'note': 'playlists, multipage',
3201         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3202         'playlist_mincount': 94,
3203         'info_dict': {
3204             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3205             'title': 'Игорь Клейнер - Playlists',
3206             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3207             'uploader': 'Игорь Клейнер',
3208             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3209         },
3210     }, {
3211         'note': 'playlists, multipage, different order',
3212         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3213         'playlist_mincount': 94,
3214         'info_dict': {
3215             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3216             'title': 'Игорь Клейнер - Playlists',
3217             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3218             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3219             'uploader': 'Игорь Клейнер',
3220         },
3221     }, {
3222         'note': 'playlists, series',
3223         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3224         'playlist_mincount': 5,
3225         'info_dict': {
3226             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3227             'title': '3Blue1Brown - Playlists',
3228             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3229             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3230             'uploader': '3Blue1Brown',
3231         },
3232     }, {
3233         'note': 'playlists, singlepage',
3234         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3235         'playlist_mincount': 4,
3236         'info_dict': {
3237             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3238             'title': 'ThirstForScience - Playlists',
3239             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3240             'uploader': 'ThirstForScience',
3241             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3242         }
3243     }, {
3244         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3245         'only_matching': True,
3246     }, {
3247         'note': 'basic, single video playlist',
3248         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3249         'info_dict': {
3250             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3251             'uploader': 'Sergey M.',
3252             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3253             'title': 'youtube-dl public playlist',
3254         },
3255         'playlist_count': 1,
3256     }, {
3257         'note': 'empty playlist',
3258         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3259         'info_dict': {
3260             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3261             'uploader': 'Sergey M.',
3262             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3263             'title': 'youtube-dl empty playlist',
3264         },
3265         'playlist_count': 0,
3266     }, {
3267         'note': 'Home tab',
3268         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3269         'info_dict': {
3270             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3271             'title': 'lex will - Home',
3272             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3273             'uploader': 'lex will',
3274             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3275         },
3276         'playlist_mincount': 2,
3277     }, {
3278         'note': 'Videos tab',
3279         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3280         'info_dict': {
3281             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3282             'title': 'lex will - Videos',
3283             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3284             'uploader': 'lex will',
3285             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3286         },
3287         'playlist_mincount': 975,
3288     }, {
3289         'note': 'Videos tab, sorted by popular',
3290         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3291         'info_dict': {
3292             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3293             'title': 'lex will - Videos',
3294             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3295             'uploader': 'lex will',
3296             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3297         },
3298         'playlist_mincount': 199,
3299     }, {
3300         'note': 'Playlists tab',
3301         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3302         'info_dict': {
3303             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3304             'title': 'lex will - Playlists',
3305             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3306             'uploader': 'lex will',
3307             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3308         },
3309         'playlist_mincount': 17,
3310     }, {
3311         'note': 'Community tab',
3312         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3313         'info_dict': {
3314             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3315             'title': 'lex will - Community',
3316             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3317             'uploader': 'lex will',
3318             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3319         },
3320         'playlist_mincount': 18,
3321     }, {
3322         'note': 'Channels tab',
3323         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3324         'info_dict': {
3325             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3326             'title': 'lex will - Channels',
3327             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3328             'uploader': 'lex will',
3329             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3330         },
3331         'playlist_mincount': 12,
3332     }, {
3333         'note': 'Search tab',
3334         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3335         'playlist_mincount': 40,
3336         'info_dict': {
3337             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3338             'title': '3Blue1Brown - Search - linear algebra',
3339             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3340             'uploader': '3Blue1Brown',
3341             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3342         },
3343     }, {
3344         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3345         'only_matching': True,
3346     }, {
3347         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3348         'only_matching': True,
3349     }, {
3350         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3351         'only_matching': True,
3352     }, {
3353         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3354         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3355         'info_dict': {
3356             'title': '29C3: Not my department',
3357             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3358             'uploader': 'Christiaan008',
3359             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3360             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3361         },
3362         'playlist_count': 96,
3363     }, {
3364         'note': 'Large playlist',
3365         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3366         'info_dict': {
3367             'title': 'Uploads from Cauchemar',
3368             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3369             'uploader': 'Cauchemar',
3370             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3371         },
3372         'playlist_mincount': 1123,
3373     }, {
3374         'note': 'even larger playlist, 8832 videos',
3375         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3376         'only_matching': True,
3377     }, {
3378         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3379         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3380         'info_dict': {
3381             'title': 'Uploads from Interstellar Movie',
3382             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3383             'uploader': 'Interstellar Movie',
3384             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3385         },
3386         'playlist_mincount': 21,
3387     }, {
3388         'note': 'Playlist with "show unavailable videos" button',
3389         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3390         'info_dict': {
3391             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3392             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3393             'uploader': 'Phim Siêu Nhân Nhật Bản',
3394             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3395         },
3396         'playlist_mincount': 200,
3397     }, {
3398         'note': 'Playlist with unavailable videos in page 7',
3399         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3400         'info_dict': {
3401             'title': 'Uploads from BlankTV',
3402             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3403             'uploader': 'BlankTV',
3404             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3405         },
3406         'playlist_mincount': 1000,
3407     }, {
3408         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3409         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3410         'info_dict': {
3411             'title': 'Data Analysis with Dr Mike Pound',
3412             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3413             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3414             'uploader': 'Computerphile',
3415             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3416         },
3417         'playlist_mincount': 11,
3418     }, {
3419         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3420         'only_matching': True,
3421     }, {
3422         'note': 'Playlist URL that does not actually serve a playlist',
3423         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3424         'info_dict': {
3425             'id': 'FqZTN594JQw',
3426             'ext': 'webm',
3427             'title': "Smiley's People 01 detective, Adventure Series, Action",
3428             'uploader': 'STREEM',
3429             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3430             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3431             'upload_date': '20150526',
3432             'license': 'Standard YouTube License',
3433             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3434             'categories': ['People & Blogs'],
3435             'tags': list,
3436             'view_count': int,
3437             'like_count': int,
3438             'dislike_count': int,
3439         },
3440         'params': {
3441             'skip_download': True,
3442         },
3443         'skip': 'This video is not available.',
3444         'add_ie': [YoutubeIE.ie_key()],
3445     }, {
3446         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3447         'only_matching': True,
3448     }, {
3449         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3450         'only_matching': True,
3451     }, {
3452         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3453         'info_dict': {
3454             'id': 'FMtPN8yp5LU',  # This will keep changing
3455             'ext': 'mp4',
3456             'title': compat_str,
3457             'uploader': 'Sky News',
3458             'uploader_id': 'skynews',
3459             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3460             'upload_date': r're:\d{8}',
3461             'description': compat_str,
3462             'categories': ['News & Politics'],
3463             'tags': list,
3464             'like_count': int,
3465             'dislike_count': int,
3466         },
3467         'params': {
3468             'skip_download': True,
3469         },
3470         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3471     }, {
3472         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3473         'info_dict': {
3474             'id': 'a48o2S1cPoo',
3475             'ext': 'mp4',
3476             'title': 'The Young Turks - Live Main Show',
3477             'uploader': 'The Young Turks',
3478             'uploader_id': 'TheYoungTurks',
3479             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3480             'upload_date': '20150715',
3481             'license': 'Standard YouTube License',
3482             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3483             'categories': ['News & Politics'],
3484             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3485             'like_count': int,
3486             'dislike_count': int,
3487         },
3488         'params': {
3489             'skip_download': True,
3490         },
3491         'only_matching': True,
3492     }, {
3493         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3494         'only_matching': True,
3495     }, {
3496         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3497         'only_matching': True,
3498     }, {
3499         'note': 'A channel that is not live. Should raise error',
3500         'url': 'https://www.youtube.com/user/numberphile/live',
3501         'only_matching': True,
3502     }, {
3503         'url': 'https://www.youtube.com/feed/trending',
3504         'only_matching': True,
3505     }, {
3506         'url': 'https://www.youtube.com/feed/library',
3507         'only_matching': True,
3508     }, {
3509         'url': 'https://www.youtube.com/feed/history',
3510         'only_matching': True,
3511     }, {
3512         'url': 'https://www.youtube.com/feed/subscriptions',
3513         'only_matching': True,
3514     }, {
3515         'url': 'https://www.youtube.com/feed/watch_later',
3516         'only_matching': True,
3517     }, {
3518         'note': 'Recommended - redirects to home page',
3519         'url': 'https://www.youtube.com/feed/recommended',
3520         'only_matching': True,
3521     }, {
3522         'note': 'inline playlist with not always working continuations',
3523         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3524         'only_matching': True,
3525     }, {
3526         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3527         'only_matching': True,
3528     }, {
3529         'url': 'https://www.youtube.com/course',
3530         'only_matching': True,
3531     }, {
3532         'url': 'https://www.youtube.com/zsecurity',
3533         'only_matching': True,
3534     }, {
3535         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3536         'only_matching': True,
3537     }, {
3538         'url': 'https://www.youtube.com/TheYoungTurks/live',
3539         'only_matching': True,
3540     }, {
3541         'url': 'https://www.youtube.com/hashtag/cctv9',
3542         'info_dict': {
3543             'id': 'cctv9',
3544             'title': '#cctv9',
3545         },
3546         'playlist_mincount': 350,
3547     }, {
3548         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3549         'only_matching': True,
3550     }, {
3551         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3552         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3553         'only_matching': True
3554     }, {
3555         'note': '/browse/ should redirect to /channel/',
3556         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3557         'only_matching': True
3558     }, {
3559         'note': 'VLPL, should redirect to playlist?list=PL...',
3560         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3561         'info_dict': {
3562             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3563             'uploader': 'NoCopyrightSounds',
3564             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3565             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3566             'title': 'NCS Releases',
3567         },
3568         'playlist_mincount': 166,
3569     }, {
3570         'note': 'Topic, should redirect to playlist?list=UU...',
3571         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3572         'info_dict': {
3573             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3574             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3575             'title': 'Uploads from Royalty Free Music - Topic',
3576             'uploader': 'Royalty Free Music - Topic',
3577         },
3578         'expected_warnings': [
3579             'A channel/user page was given',
3580             'The URL does not have a videos tab',
3581         ],
3582         'playlist_mincount': 101,
3583     }, {
3584         'note': 'Topic without a UU playlist',
3585         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3586         'info_dict': {
3587             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3588             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3589         },
3590         'expected_warnings': [
3591             'A channel/user page was given',
3592             'The URL does not have a videos tab',
3593             'Falling back to channel URL',
3594         ],
3595         'playlist_mincount': 9,
3596     }, {
3597         'note': 'Youtube music Album',
3598         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3599         'info_dict': {
3600             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3601             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3602         },
3603         'playlist_count': 50,
3604     }, {
3605         'note': 'unlisted single video playlist',
3606         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3607         'info_dict': {
3608             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3609             'uploader': 'colethedj',
3610             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3611             'title': 'yt-dlp unlisted playlist test',
3612             'availability': 'unlisted'
3613         },
3614         'playlist_count': 1,
3615     }]
3616
3617     @classmethod
3618     def suitable(cls, url):
3619         return False if YoutubeIE.suitable(url) else super(
3620             YoutubeTabIE, cls).suitable(url)
3621
3622     def _extract_channel_id(self, webpage):
3623         channel_id = self._html_search_meta(
3624             'channelId', webpage, 'channel id', default=None)
3625         if channel_id:
3626             return channel_id
3627         channel_url = self._html_search_meta(
3628             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3629              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3630              'twitter:app:url:googleplay'), webpage, 'channel url')
3631         return self._search_regex(
3632             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3633             channel_url, 'channel id')
3634
3635     @staticmethod
3636     def _extract_basic_item_renderer(item):
3637         # Modified from _extract_grid_item_renderer
3638         known_basic_renderers = (
3639             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3640         )
3641         for key, renderer in item.items():
3642             if not isinstance(renderer, dict):
3643                 continue
3644             elif key in known_basic_renderers:
3645                 return renderer
3646             elif key.startswith('grid') and key.endswith('Renderer'):
3647                 return renderer
3648
3649     def _grid_entries(self, grid_renderer):
3650         for item in grid_renderer['items']:
3651             if not isinstance(item, dict):
3652                 continue
3653             renderer = self._extract_basic_item_renderer(item)
3654             if not isinstance(renderer, dict):
3655                 continue
3656             title = self._get_text(renderer, 'title')
3657
3658             # playlist
3659             playlist_id = renderer.get('playlistId')
3660             if playlist_id:
3661                 yield self.url_result(
3662                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3663                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3664                     video_title=title)
3665                 continue
3666             # video
3667             video_id = renderer.get('videoId')
3668             if video_id:
3669                 yield self._extract_video(renderer)
3670                 continue
3671             # channel
3672             channel_id = renderer.get('channelId')
3673             if channel_id:
3674                 yield self.url_result(
3675                     'https://www.youtube.com/channel/%s' % channel_id,
3676                     ie=YoutubeTabIE.ie_key(), video_title=title)
3677                 continue
3678             # generic endpoint URL support
3679             ep_url = urljoin('https://www.youtube.com/', try_get(
3680                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3681                 compat_str))
3682             if ep_url:
3683                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3684                     if ie.suitable(ep_url):
3685                         yield self.url_result(
3686                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3687                         break
3688
3689     def _shelf_entries_from_content(self, shelf_renderer):
3690         content = shelf_renderer.get('content')
3691         if not isinstance(content, dict):
3692             return
3693         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3694         if renderer:
3695             # TODO: add support for nested playlists so each shelf is processed
3696             # as separate playlist
3697             # TODO: this includes only first N items
3698             for entry in self._grid_entries(renderer):
3699                 yield entry
3700         renderer = content.get('horizontalListRenderer')
3701         if renderer:
3702             # TODO
3703             pass
3704
3705     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3706         ep = try_get(
3707             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3708             compat_str)
3709         shelf_url = urljoin('https://www.youtube.com', ep)
3710         if shelf_url:
3711             # Skipping links to another channels, note that checking for
3712             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3713             # will not work
3714             if skip_channels and '/channels?' in shelf_url:
3715                 return
3716             title = self._get_text(shelf_renderer, 'title')
3717             yield self.url_result(shelf_url, video_title=title)
3718         # Shelf may not contain shelf URL, fallback to extraction from content
3719         for entry in self._shelf_entries_from_content(shelf_renderer):
3720             yield entry
3721
3722     def _playlist_entries(self, video_list_renderer):
3723         for content in video_list_renderer['contents']:
3724             if not isinstance(content, dict):
3725                 continue
3726             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3727             if not isinstance(renderer, dict):
3728                 continue
3729             video_id = renderer.get('videoId')
3730             if not video_id:
3731                 continue
3732             yield self._extract_video(renderer)
3733
3734     def _rich_entries(self, rich_grid_renderer):
3735         renderer = try_get(
3736             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3737         video_id = renderer.get('videoId')
3738         if not video_id:
3739             return
3740         yield self._extract_video(renderer)
3741
3742     def _video_entry(self, video_renderer):
3743         video_id = video_renderer.get('videoId')
3744         if video_id:
3745             return self._extract_video(video_renderer)
3746
3747     def _post_thread_entries(self, post_thread_renderer):
3748         post_renderer = try_get(
3749             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3750         if not post_renderer:
3751             return
3752         # video attachment
3753         video_renderer = try_get(
3754             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3755         video_id = video_renderer.get('videoId')
3756         if video_id:
3757             entry = self._extract_video(video_renderer)
3758             if entry:
3759                 yield entry
3760         # playlist attachment
3761         playlist_id = try_get(
3762             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3763         if playlist_id:
3764             yield self.url_result(
3765                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3766                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3767         # inline video links
3768         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3769         for run in runs:
3770             if not isinstance(run, dict):
3771                 continue
3772             ep_url = try_get(
3773                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3774             if not ep_url:
3775                 continue
3776             if not YoutubeIE.suitable(ep_url):
3777                 continue
3778             ep_video_id = YoutubeIE._match_id(ep_url)
3779             if video_id == ep_video_id:
3780                 continue
3781             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3782
3783     def _post_thread_continuation_entries(self, post_thread_continuation):
3784         contents = post_thread_continuation.get('contents')
3785         if not isinstance(contents, list):
3786             return
3787         for content in contents:
3788             renderer = content.get('backstagePostThreadRenderer')
3789             if not isinstance(renderer, dict):
3790                 continue
3791             for entry in self._post_thread_entries(renderer):
3792                 yield entry
3793
3794     r''' # unused
3795     def _rich_grid_entries(self, contents):
3796         for content in contents:
3797             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3798             if video_renderer:
3799                 entry = self._video_entry(video_renderer)
3800                 if entry:
3801                     yield entry
3802     '''
3803     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3804
3805         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3806             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3807             for content in contents:
3808                 if not isinstance(content, dict):
3809                     continue
3810                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3811                 if not is_renderer:
3812                     renderer = content.get('richItemRenderer')
3813                     if renderer:
3814                         for entry in self._rich_entries(renderer):
3815                             yield entry
3816                         continuation_list[0] = self._extract_continuation(parent_renderer)
3817                     continue
3818                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3819                 for isr_content in isr_contents:
3820                     if not isinstance(isr_content, dict):
3821                         continue
3822
3823                     known_renderers = {
3824                         'playlistVideoListRenderer': self._playlist_entries,
3825                         'gridRenderer': self._grid_entries,
3826                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3827                         'backstagePostThreadRenderer': self._post_thread_entries,
3828                         'videoRenderer': lambda x: [self._video_entry(x)],
3829                     }
3830                     for key, renderer in isr_content.items():
3831                         if key not in known_renderers:
3832                             continue
3833                         for entry in known_renderers[key](renderer):
3834                             if entry:
3835                                 yield entry
3836                         continuation_list[0] = self._extract_continuation(renderer)
3837                         break
3838
3839                 if not continuation_list[0]:
3840                     continuation_list[0] = self._extract_continuation(is_renderer)
3841
3842             if not continuation_list[0]:
3843                 continuation_list[0] = self._extract_continuation(parent_renderer)
3844
3845         continuation_list = [None]  # Python 2 doesnot support nonlocal
3846         tab_content = try_get(tab, lambda x: x['content'], dict)
3847         if not tab_content:
3848             return
3849         parent_renderer = (
3850             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3851             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3852         for entry in extract_entries(parent_renderer):
3853             yield entry
3854         continuation = continuation_list[0]
3855         visitor_data = None
3856
3857         for page_num in itertools.count(1):
3858             if not continuation:
3859                 break
3860             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3861             response = self._extract_response(
3862                 item_id='%s page %s' % (item_id, page_num),
3863                 query=continuation, headers=headers, ytcfg=ytcfg,
3864                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3865
3866             if not response:
3867                 break
3868             visitor_data = try_get(
3869                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3870
3871             known_continuation_renderers = {
3872                 'playlistVideoListContinuation': self._playlist_entries,
3873                 'gridContinuation': self._grid_entries,
3874                 'itemSectionContinuation': self._post_thread_continuation_entries,
3875                 'sectionListContinuation': extract_entries,  # for feeds
3876             }
3877             continuation_contents = try_get(
3878                 response, lambda x: x['continuationContents'], dict) or {}
3879             continuation_renderer = None
3880             for key, value in continuation_contents.items():
3881                 if key not in known_continuation_renderers:
3882                     continue
3883                 continuation_renderer = value
3884                 continuation_list = [None]
3885                 for entry in known_continuation_renderers[key](continuation_renderer):
3886                     yield entry
3887                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3888                 break
3889             if continuation_renderer:
3890                 continue
3891
3892             known_renderers = {
3893                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3894                 'gridVideoRenderer': (self._grid_entries, 'items'),
3895                 'gridChannelRenderer': (self._grid_entries, 'items'),
3896                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3897                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3898                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3899                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3900             }
3901             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3902             continuation_items = try_get(
3903                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3904             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3905             video_items_renderer = None
3906             for key, value in continuation_item.items():
3907                 if key not in known_renderers:
3908                     continue
3909                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3910                 continuation_list = [None]
3911                 for entry in known_renderers[key][0](video_items_renderer):
3912                     yield entry
3913                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3914                 break
3915             if video_items_renderer:
3916                 continue
3917             break
3918
3919     @staticmethod
3920     def _extract_selected_tab(tabs):
3921         for tab in tabs:
3922             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3923             if renderer.get('selected') is True:
3924                 return renderer
3925         else:
3926             raise ExtractorError('Unable to find selected tab')
3927
3928     @classmethod
3929     def _extract_uploader(cls, data):
3930         uploader = {}
3931         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3932         owner = try_get(
3933             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3934         if owner:
3935             uploader['uploader'] = owner.get('text')
3936             uploader['uploader_id'] = try_get(
3937                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3938             uploader['uploader_url'] = urljoin(
3939                 'https://www.youtube.com/',
3940                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3941         return {k: v for k, v in uploader.items() if v is not None}
3942
3943     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3944         playlist_id = title = description = channel_url = channel_name = channel_id = None
3945         thumbnails_list = tags = []
3946
3947         selected_tab = self._extract_selected_tab(tabs)
3948         renderer = try_get(
3949             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3950         if renderer:
3951             channel_name = renderer.get('title')
3952             channel_url = renderer.get('channelUrl')
3953             channel_id = renderer.get('externalId')
3954         else:
3955             renderer = try_get(
3956                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3957
3958         if renderer:
3959             title = renderer.get('title')
3960             description = renderer.get('description', '')
3961             playlist_id = channel_id
3962             tags = renderer.get('keywords', '').split()
3963             thumbnails_list = (
3964                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3965                 or try_get(
3966                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3967                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3968                     list)
3969                 or [])
3970
3971         thumbnails = []
3972         for t in thumbnails_list:
3973             if not isinstance(t, dict):
3974                 continue
3975             thumbnail_url = url_or_none(t.get('url'))
3976             if not thumbnail_url:
3977                 continue
3978             thumbnails.append({
3979                 'url': thumbnail_url,
3980                 'width': int_or_none(t.get('width')),
3981                 'height': int_or_none(t.get('height')),
3982             })
3983         if playlist_id is None:
3984             playlist_id = item_id
3985         if title is None:
3986             title = (
3987                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3988                 or playlist_id)
3989         title += format_field(selected_tab, 'title', ' - %s')
3990         title += format_field(selected_tab, 'expandedText', ' - %s')
3991         metadata = {
3992             'playlist_id': playlist_id,
3993             'playlist_title': title,
3994             'playlist_description': description,
3995             'uploader': channel_name,
3996             'uploader_id': channel_id,
3997             'uploader_url': channel_url,
3998             'thumbnails': thumbnails,
3999             'tags': tags,
4000         }
4001         availability = self._extract_availability(data)
4002         if availability:
4003             metadata['availability'] = availability
4004         if not channel_id:
4005             metadata.update(self._extract_uploader(data))
4006         metadata.update({
4007             'channel': metadata['uploader'],
4008             'channel_id': metadata['uploader_id'],
4009             'channel_url': metadata['uploader_url']})
4010         ytcfg = self.extract_ytcfg(item_id, webpage)
4011         return self.playlist_result(
4012             self._entries(
4013                 selected_tab, playlist_id,
4014                 self._extract_identity_token(webpage, item_id),
4015                 self._extract_account_syncid(ytcfg, data), ytcfg),
4016             **metadata)
4017
4018     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
4019         first_id = last_id = None
4020         ytcfg = self.extract_ytcfg(playlist_id, webpage)
4021         headers = self.generate_api_headers(
4022             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4023             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
4024         for page_num in itertools.count(1):
4025             videos = list(self._playlist_entries(playlist))
4026             if not videos:
4027                 return
4028             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
4029             if start >= len(videos):
4030                 return
4031             for video in videos[start:]:
4032                 if video['id'] == first_id:
4033                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
4034                     return
4035                 yield video
4036             first_id = first_id or videos[0]['id']
4037             last_id = videos[-1]['id']
4038             watch_endpoint = try_get(
4039                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4040             query = {
4041                 'playlistId': playlist_id,
4042                 'videoId': watch_endpoint.get('videoId') or last_id,
4043                 'index': watch_endpoint.get('index') or len(videos),
4044                 'params': watch_endpoint.get('params') or 'OAE%3D'
4045             }
4046             response = self._extract_response(
4047                 item_id='%s page %d' % (playlist_id, page_num),
4048                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4049                 check_get_keys='contents'
4050             )
4051             playlist = try_get(
4052                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4053
4054     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4055         title = playlist.get('title') or try_get(
4056             data, lambda x: x['titleText']['simpleText'], compat_str)
4057         playlist_id = playlist.get('playlistId') or item_id
4058
4059         # Delegating everything except mix playlists to regular tab-based playlist URL
4060         playlist_url = urljoin(url, try_get(
4061             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4062             compat_str))
4063         if playlist_url and playlist_url != url:
4064             return self.url_result(
4065                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4066                 video_title=title)
4067
4068         return self.playlist_result(
4069             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4070             playlist_id=playlist_id, playlist_title=title)
4071
4072     def _extract_availability(self, data):
4073         """
4074         Gets the availability of a given playlist/tab.
4075         Note: Unless YouTube tells us explicitly, we do not assume it is public
4076         @param data: response
4077         """
4078         is_private = is_unlisted = None
4079         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4080         badge_labels = self._extract_badges(renderer)
4081
4082         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4083         privacy_dropdown_entries = try_get(
4084             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4085         for renderer_dict in privacy_dropdown_entries:
4086             is_selected = try_get(
4087                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4088             if not is_selected:
4089                 continue
4090             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4091             if label:
4092                 badge_labels.add(label.lower())
4093                 break
4094
4095         for badge_label in badge_labels:
4096             if badge_label == 'unlisted':
4097                 is_unlisted = True
4098             elif badge_label == 'private':
4099                 is_private = True
4100             elif badge_label == 'public':
4101                 is_unlisted = is_private = False
4102         return self._availability(is_private, False, False, False, is_unlisted)
4103
4104     @staticmethod
4105     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4106         sidebar_renderer = try_get(
4107             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4108         for item in sidebar_renderer:
4109             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4110             if renderer:
4111                 return renderer
4112
4113     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4114         """
4115         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4116         """
4117         browse_id = params = None
4118         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4119         if not renderer:
4120             return
4121         menu_renderer = try_get(
4122             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4123         for menu_item in menu_renderer:
4124             if not isinstance(menu_item, dict):
4125                 continue
4126             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4127             text = try_get(
4128                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4129             if not text or text.lower() != 'show unavailable videos':
4130                 continue
4131             browse_endpoint = try_get(
4132                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4133             browse_id = browse_endpoint.get('browseId')
4134             params = browse_endpoint.get('params')
4135             break
4136
4137         ytcfg = self.extract_ytcfg(item_id, webpage)
4138         headers = self.generate_api_headers(
4139             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4140             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4141             visitor_data=try_get(
4142                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4143         query = {
4144             'params': params or 'wgYCCAA=',
4145             'browseId': browse_id or 'VL%s' % item_id
4146         }
4147         return self._extract_response(
4148             item_id=item_id, headers=headers, query=query,
4149             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4150             note='Downloading API JSON with unavailable videos')
4151
4152     def _extract_webpage(self, url, item_id):
4153         retries = self.get_param('extractor_retries', 3)
4154         count = -1
4155         last_error = 'Incomplete yt initial data recieved'
4156         while count < retries:
4157             count += 1
4158             # Sometimes youtube returns a webpage with incomplete ytInitialData
4159             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4160             if count:
4161                 self.report_warning('%s. Retrying ...' % last_error)
4162             webpage = self._download_webpage(
4163                 url, item_id,
4164                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4165             data = self.extract_yt_initial_data(item_id, webpage)
4166             if data.get('contents') or data.get('currentVideoEndpoint'):
4167                 break
4168             # Extract alerts here only when there is error
4169             self._extract_and_report_alerts(data)
4170             if count >= retries:
4171                 raise ExtractorError(last_error)
4172         return webpage, data
4173
4174     @staticmethod
4175     def _smuggle_data(entries, data):
4176         for entry in entries:
4177             if data:
4178                 entry['url'] = smuggle_url(entry['url'], data)
4179             yield entry
4180
4181     def _real_extract(self, url):
4182         url, smuggled_data = unsmuggle_url(url, {})
4183         if self.is_music_url(url):
4184             smuggled_data['is_music_url'] = True
4185         info_dict = self.__real_extract(url, smuggled_data)
4186         if info_dict.get('entries'):
4187             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4188         return info_dict
4189
4190     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4191
4192     def __real_extract(self, url, smuggled_data):
4193         item_id = self._match_id(url)
4194         url = compat_urlparse.urlunparse(
4195             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4196         compat_opts = self.get_param('compat_opts', [])
4197
4198         def get_mobj(url):
4199             mobj = self._url_re.match(url).groupdict()
4200             mobj.update((k, '') for k, v in mobj.items() if v is None)
4201             return mobj
4202
4203         mobj = get_mobj(url)
4204         # Youtube returns incomplete data if tabname is not lower case
4205         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4206
4207         if is_channel:
4208             if smuggled_data.get('is_music_url'):
4209                 if item_id[:2] == 'VL':
4210                     # Youtube music VL channels have an equivalent playlist
4211                     item_id = item_id[2:]
4212                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4213                 elif item_id[:2] == 'MP':
4214                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4215                     item_id = self._search_regex(
4216                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4217                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4218                         'playlist id')
4219                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4220                 elif mobj['channel_type'] == 'browse':
4221                     # Youtube music /browse/ should be changed to /channel/
4222                     pre = 'https://www.youtube.com/channel/%s' % item_id
4223         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4224             # Home URLs should redirect to /videos/
4225             self.report_warning(
4226                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4227                 'To download only the videos in the home page, add a "/featured" to the URL')
4228             tab = '/videos'
4229
4230         url = ''.join((pre, tab, post))
4231         mobj = get_mobj(url)
4232
4233         # Handle both video/playlist URLs
4234         qs = parse_qs(url)
4235         video_id = qs.get('v', [None])[0]
4236         playlist_id = qs.get('list', [None])[0]
4237
4238         if not video_id and mobj['not_channel'].startswith('watch'):
4239             if not playlist_id:
4240                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4241                 raise ExtractorError('Unable to recognize tab page')
4242             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4243             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4244             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4245             mobj = get_mobj(url)
4246
4247         if video_id and playlist_id:
4248             if self.get_param('noplaylist'):
4249                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4250                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4251             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4252
4253         webpage, data = self._extract_webpage(url, item_id)
4254
4255         tabs = try_get(
4256             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4257         if tabs:
4258             selected_tab = self._extract_selected_tab(tabs)
4259             tab_name = selected_tab.get('title', '')
4260             if 'no-youtube-channel-redirect' not in compat_opts:
4261                 if mobj['tab'] == '/live':
4262                     # Live tab should have redirected to the video
4263                     raise ExtractorError('The channel is not currently live', expected=True)
4264                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4265                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4266                         # Topic channels don't have /videos. Use the equivalent playlist instead
4267                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4268                         pl_id = 'UU%s' % item_id[2:]
4269                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4270                         try:
4271                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4272                             for alert_type, alert_message in self._extract_alerts(pl_data):
4273                                 if alert_type == 'error':
4274                                     raise ExtractorError('Youtube said: %s' % alert_message)
4275                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4276                         except ExtractorError:
4277                             self.report_warning('The playlist gave error. Falling back to channel URL')
4278                     else:
4279                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4280
4281         self.write_debug('Final URL: %s' % url)
4282
4283         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4284         if 'no-youtube-unavailable-videos' not in compat_opts:
4285             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4286         self._extract_and_report_alerts(data)
4287         tabs = try_get(
4288             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4289         if tabs:
4290             return self._extract_from_tabs(item_id, webpage, data, tabs)
4291
4292         playlist = try_get(
4293             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4294         if playlist:
4295             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4296
4297         video_id = try_get(
4298             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4299             compat_str) or video_id
4300         if video_id:
4301             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4302                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4303             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4304
4305         raise ExtractorError('Unable to recognize tab page')
4306
4307
4308 class YoutubePlaylistIE(InfoExtractor):
4309     IE_DESC = 'YouTube.com playlists'
4310     _VALID_URL = r'''(?x)(?:
4311                         (?:https?://)?
4312                         (?:\w+\.)?
4313                         (?:
4314                             (?:
4315                                 youtube(?:kids)?\.com|
4316                                 invidio\.us
4317                             )
4318                             /.*?\?.*?\blist=
4319                         )?
4320                         (?P<id>%(playlist_id)s)
4321                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4322     IE_NAME = 'youtube:playlist'
4323     _TESTS = [{
4324         'note': 'issue #673',
4325         'url': 'PLBB231211A4F62143',
4326         'info_dict': {
4327             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4328             'id': 'PLBB231211A4F62143',
4329             'uploader': 'Wickydoo',
4330             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4331             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4332         },
4333         'playlist_mincount': 29,
4334     }, {
4335         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4336         'info_dict': {
4337             'title': 'YDL_safe_search',
4338             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4339         },
4340         'playlist_count': 2,
4341         'skip': 'This playlist is private',
4342     }, {
4343         'note': 'embedded',
4344         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4345         'playlist_count': 4,
4346         'info_dict': {
4347             'title': 'JODA15',
4348             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4349             'uploader': 'milan',
4350             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4351         }
4352     }, {
4353         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4354         'playlist_mincount': 654,
4355         'info_dict': {
4356             'title': '2018 Chinese New Singles (11/6 updated)',
4357             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4358             'uploader': 'LBK',
4359             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4360             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4361         }
4362     }, {
4363         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4364         'only_matching': True,
4365     }, {
4366         # music album playlist
4367         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4368         'only_matching': True,
4369     }]
4370
4371     @classmethod
4372     def suitable(cls, url):
4373         if YoutubeTabIE.suitable(url):
4374             return False
4375         # Hack for lazy extractors until more generic solution is implemented
4376         # (see #28780)
4377         from .youtube import parse_qs
4378         qs = parse_qs(url)
4379         if qs.get('v', [None])[0]:
4380             return False
4381         return super(YoutubePlaylistIE, cls).suitable(url)
4382
4383     def _real_extract(self, url):
4384         playlist_id = self._match_id(url)
4385         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4386         url = update_url_query(
4387             'https://www.youtube.com/playlist',
4388             parse_qs(url) or {'list': playlist_id})
4389         if is_music_url:
4390             url = smuggle_url(url, {'is_music_url': True})
4391         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4392
4393
4394 class YoutubeYtBeIE(InfoExtractor):
4395     IE_DESC = 'youtu.be'
4396     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4397     _TESTS = [{
4398         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4399         'info_dict': {
4400             'id': 'yeWKywCrFtk',
4401             'ext': 'mp4',
4402             'title': 'Small Scale Baler and Braiding Rugs',
4403             'uploader': 'Backus-Page House Museum',
4404             'uploader_id': 'backuspagemuseum',
4405             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4406             'upload_date': '20161008',
4407             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4408             'categories': ['Nonprofits & Activism'],
4409             'tags': list,
4410             'like_count': int,
4411             'dislike_count': int,
4412         },
4413         'params': {
4414             'noplaylist': True,
4415             'skip_download': True,
4416         },
4417     }, {
4418         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4419         'only_matching': True,
4420     }]
4421
4422     def _real_extract(self, url):
4423         mobj = re.match(self._VALID_URL, url)
4424         video_id = mobj.group('id')
4425         playlist_id = mobj.group('playlist_id')
4426         return self.url_result(
4427             update_url_query('https://www.youtube.com/watch', {
4428                 'v': video_id,
4429                 'list': playlist_id,
4430                 'feature': 'youtu.be',
4431             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4432
4433
4434 class YoutubeYtUserIE(InfoExtractor):
4435     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4436     _VALID_URL = r'ytuser:(?P<id>.+)'
4437     _TESTS = [{
4438         'url': 'ytuser:phihag',
4439         'only_matching': True,
4440     }]
4441
4442     def _real_extract(self, url):
4443         user_id = self._match_id(url)
4444         return self.url_result(
4445             'https://www.youtube.com/user/%s' % user_id,
4446             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4447
4448
4449 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4450     IE_NAME = 'youtube:favorites'
4451     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4452     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4453     _LOGIN_REQUIRED = True
4454     _TESTS = [{
4455         'url': ':ytfav',
4456         'only_matching': True,
4457     }, {
4458         'url': ':ytfavorites',
4459         'only_matching': True,
4460     }]
4461
4462     def _real_extract(self, url):
4463         return self.url_result(
4464             'https://www.youtube.com/playlist?list=LL',
4465             ie=YoutubeTabIE.ie_key())
4466
4467
4468 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4469     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4470     # there doesn't appear to be a real limit, for example if you search for
4471     # 'python' you get more than 8.000.000 results
4472     _MAX_RESULTS = float('inf')
4473     IE_NAME = 'youtube:search'
4474     _SEARCH_KEY = 'ytsearch'
4475     _SEARCH_PARAMS = None
4476     _TESTS = []
4477
4478     def _entries(self, query, n):
4479         data = {'query': query}
4480         if self._SEARCH_PARAMS:
4481             data['params'] = self._SEARCH_PARAMS
4482         total = 0
4483         continuation = {}
4484         for page_num in itertools.count(1):
4485             data.update(continuation)
4486             search = self._extract_response(
4487                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4488                 check_get_keys=('contents', 'onResponseReceivedCommands')
4489             )
4490             if not search:
4491                 break
4492             slr_contents = try_get(
4493                 search,
4494                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4495                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4496                 list)
4497             if not slr_contents:
4498                 break
4499
4500             # Youtube sometimes adds promoted content to searches,
4501             # changing the index location of videos and token.
4502             # So we search through all entries till we find them.
4503             continuation = None
4504             for slr_content in slr_contents:
4505                 if not continuation:
4506                     continuation = self._extract_continuation({'contents': [slr_content]})
4507
4508                 isr_contents = try_get(
4509                     slr_content,
4510                     lambda x: x['itemSectionRenderer']['contents'],
4511                     list)
4512                 if not isr_contents:
4513                     continue
4514                 for content in isr_contents:
4515                     if not isinstance(content, dict):
4516                         continue
4517                     video = content.get('videoRenderer')
4518                     if not isinstance(video, dict):
4519                         continue
4520                     video_id = video.get('videoId')
4521                     if not video_id:
4522                         continue
4523
4524                     yield self._extract_video(video)
4525                     total += 1
4526                     if total == n:
4527                         return
4528
4529             if not continuation:
4530                 break
4531
4532     def _get_n_results(self, query, n):
4533         """Get a specified number of results for a query"""
4534         return self.playlist_result(self._entries(query, n), query, query)
4535
4536
4537 class YoutubeSearchDateIE(YoutubeSearchIE):
4538     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4539     _SEARCH_KEY = 'ytsearchdate'
4540     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4541     _SEARCH_PARAMS = 'CAI%3D'
4542
4543
4544 class YoutubeSearchURLIE(YoutubeSearchIE):
4545     IE_DESC = 'YouTube.com search URLs'
4546     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4547     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4548     # _MAX_RESULTS = 100
4549     _TESTS = [{
4550         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4551         'playlist_mincount': 5,
4552         'info_dict': {
4553             'id': 'youtube-dl test video',
4554             'title': 'youtube-dl test video',
4555         }
4556     }, {
4557         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4558         'only_matching': True,
4559     }]
4560
4561     @classmethod
4562     def _make_valid_url(cls):
4563         return cls._VALID_URL
4564
4565     def _real_extract(self, url):
4566         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4567         query = (qs.get('search_query') or qs.get('q'))[0]
4568         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4569         return self._get_n_results(query, self._MAX_RESULTS)
4570
4571
4572 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4573     """
4574     Base class for feed extractors
4575     Subclasses must define the _FEED_NAME property.
4576     """
4577     _LOGIN_REQUIRED = True
4578     _TESTS = []
4579
4580     @property
4581     def IE_NAME(self):
4582         return 'youtube:%s' % self._FEED_NAME
4583
4584     def _real_extract(self, url):
4585         return self.url_result(
4586             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4587             ie=YoutubeTabIE.ie_key())
4588
4589
4590 class YoutubeWatchLaterIE(InfoExtractor):
4591     IE_NAME = 'youtube:watchlater'
4592     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4593     _VALID_URL = r':ytwatchlater'
4594     _TESTS = [{
4595         'url': ':ytwatchlater',
4596         'only_matching': True,
4597     }]
4598
4599     def _real_extract(self, url):
4600         return self.url_result(
4601             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4602
4603
4604 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4605     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4606     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4607     _FEED_NAME = 'recommended'
4608     _LOGIN_REQUIRED = False
4609     _TESTS = [{
4610         'url': ':ytrec',
4611         'only_matching': True,
4612     }, {
4613         'url': ':ytrecommended',
4614         'only_matching': True,
4615     }, {
4616         'url': 'https://youtube.com',
4617         'only_matching': True,
4618     }]
4619
4620
4621 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4622     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4623     _VALID_URL = r':ytsub(?:scription)?s?'
4624     _FEED_NAME = 'subscriptions'
4625     _TESTS = [{
4626         'url': ':ytsubs',
4627         'only_matching': True,
4628     }, {
4629         'url': ':ytsubscriptions',
4630         'only_matching': True,
4631     }]
4632
4633
4634 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4635     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4636     _VALID_URL = r':ythis(?:tory)?'
4637     _FEED_NAME = 'history'
4638     _TESTS = [{
4639         'url': ':ythistory',
4640         'only_matching': True,
4641     }]
4642
4643
4644 class YoutubeTruncatedURLIE(InfoExtractor):
4645     IE_NAME = 'youtube:truncated_url'
4646     IE_DESC = False  # Do not list
4647     _VALID_URL = r'''(?x)
4648         (?:https?://)?
4649         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4650         (?:watch\?(?:
4651             feature=[a-z_]+|
4652             annotation_id=annotation_[^&]+|
4653             x-yt-cl=[0-9]+|
4654             hl=[^&]*|
4655             t=[0-9]+
4656         )?
4657         |
4658             attribution_link\?a=[^&]+
4659         )
4660         $
4661     '''
4662
4663     _TESTS = [{
4664         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4665         'only_matching': True,
4666     }, {
4667         'url': 'https://www.youtube.com/watch?',
4668         'only_matching': True,
4669     }, {
4670         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4671         'only_matching': True,
4672     }, {
4673         'url': 'https://www.youtube.com/watch?feature=foo',
4674         'only_matching': True,
4675     }, {
4676         'url': 'https://www.youtube.com/watch?hl=en-GB',
4677         'only_matching': True,
4678     }, {
4679         'url': 'https://www.youtube.com/watch?t=2372',
4680         'only_matching': True,
4681     }]
4682
4683     def _real_extract(self, url):
4684         raise ExtractorError(
4685             'Did you forget to quote the URL? Remember that & is a meta '
4686             'character in most shells, so you want to put the URL in quotes, '
4687             'like  youtube-dl '
4688             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4689             ' or simply  youtube-dl BaW_jenozKc  .',
4690             expected=True)
4691
4692
4693 class YoutubeTruncatedIDIE(InfoExtractor):
4694     IE_NAME = 'youtube:truncated_id'
4695     IE_DESC = False  # Do not list
4696     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4697
4698     _TESTS = [{
4699         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4700         'only_matching': True,
4701     }]
4702
4703     def _real_extract(self, url):
4704         video_id = self._match_id(url)
4705         raise ExtractorError(
4706             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4707             expected=True)