yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bytes_to_intlist,
  32     clean_html,
  33     datetime_from_str,
  34     dict_get,
  35     error_to_compat_str,
  36     ExtractorError,
  37     float_or_none,
  38     format_field,
  39     int_or_none,
  40     intlist_to_bytes,
  41     mimetype2ext,
  42     network_exceptions,
  43     orderedSet,
  44     parse_codecs,
  45     parse_count,
  46     parse_duration,
  47     parse_iso8601,
  48     qualities,
  49     remove_start,
  50     smuggle_url,
  51     str_or_none,
  52     str_to_int,
  53     traverse_obj,
  54     try_get,
  55     unescapeHTML,
  56     unified_strdate,
  57     unsmuggle_url,
  58     update_url_query,
  59     url_or_none,
  60     urlencode_postdata,
  61     urljoin,
  62     variadic,
  63 )
  64
  65
  66 def parse_qs(url):
  67     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  68
  69
  70 # any clients starting with _ cannot be explicity requested by the user
  71 INNERTUBE_CLIENTS = {
  72     'web': {
  73         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  74         'INNERTUBE_CONTEXT': {
  75             'client': {
  76                 'clientName': 'WEB',
  77                 'clientVersion': '2.20210622.10.00',
  78             }
  79         },
  80         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  81     },
  82     'web_embedded': {
  83         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  84         'INNERTUBE_CONTEXT': {
  85             'client': {
  86                 'clientName': 'WEB_EMBEDDED_PLAYER',
  87                 'clientVersion': '1.20210620.0.1',
  88             },
  89         },
  90         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  91     },
  92     'web_music': {
  93         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
  94         'INNERTUBE_HOST': 'music.youtube.com',
  95         'INNERTUBE_CONTEXT': {
  96             'client': {
  97                 'clientName': 'WEB_REMIX',
  98                 'clientVersion': '1.20210621.00.00',
  99             }
 100         },
 101         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 102     },
 103     'android': {
 104         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 105         'INNERTUBE_CONTEXT': {
 106             'client': {
 107                 'clientName': 'ANDROID',
 108                 'clientVersion': '16.20',
 109             }
 110         },
 111         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 112     },
 113     'android_embedded': {
 114         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 115         'INNERTUBE_CONTEXT': {
 116             'client': {
 117                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 118                 'clientVersion': '16.20',
 119             },
 120         },
 121         'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 122     },
 123     'android_music': {
 124         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 125         'INNERTUBE_HOST': 'music.youtube.com',
 126         'INNERTUBE_CONTEXT': {
 127             'client': {
 128                 'clientName': 'ANDROID_MUSIC',
 129                 'clientVersion': '4.32',
 130             }
 131         },
 132         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 133     },
 134     'ios': {
 135         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 136         'INNERTUBE_CONTEXT': {
 137             'client': {
 138                 'clientName': 'IOS',
 139                 'clientVersion': '16.20',
 140             }
 141         },
 142         'INNERTUBE_CONTEXT_CLIENT_NAME': 5
 143     },
 144     'ios_embedded': {
 145         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 146         'INNERTUBE_CONTEXT': {
 147             'client': {
 148                 'clientName': 'IOS_MESSAGES_EXTENSION',
 149                 'clientVersion': '16.20',
 150             },
 151         },
 152         'INNERTUBE_CONTEXT_CLIENT_NAME': 66
 153     },
 154     'ios_music': {
 155         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 156         'INNERTUBE_HOST': 'music.youtube.com',
 157         'INNERTUBE_CONTEXT': {
 158             'client': {
 159                 'clientName': 'IOS_MUSIC',
 160                 'clientVersion': '4.32',
 161             },
 162         },
 163         'INNERTUBE_CONTEXT_CLIENT_NAME': 26
 164     },
 165     'mweb': {
 166         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 167         'INNERTUBE_CONTEXT': {
 168             'client': {
 169                 'clientName': 'MWEB',
 170                 'clientVersion': '2.20210721.07.00',
 171             }
 172         },
 173         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 174     },
 175 }
 176
 177
 178 def build_innertube_clients():
 179     third_party = {
 180         'embedUrl': 'https://google.com',  # Can be any valid URL
 181     }
 182     base_clients = ('android', 'web', 'ios', 'mweb')
 183     priority = qualities(base_clients[::-1])
 184
 185     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 186         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM4DrUqRUYnGn3llEO78bcxq8')
 187         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 188         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 189         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 190
 191         if client in base_clients:
 192             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 193             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 194             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 195             agegate_ytcfg['priority'] -= 1
 196         elif client.endswith('_embedded'):
 197             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 198             ytcfg['priority'] -= 2
 199         else:
 200             ytcfg['priority'] -= 3
 201
 202
 203 build_innertube_clients()
 204
 205
 206 class YoutubeBaseInfoExtractor(InfoExtractor):
 207     """Provide base functions for Youtube extractors"""
 208     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
 209     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
 210
 211     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
 212     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
 213     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 214
 215     _RESERVED_NAMES = (
 216         r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
 217         r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
 218         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 219
 220     _NETRC_MACHINE = 'youtube'
 221     # If True it will raise an error if no login info is provided
 222     _LOGIN_REQUIRED = False
 223
 224     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 225
 226     def _login(self):
 227         """
 228         Attempt to log in to YouTube.
 229         True is returned if successful or skipped.
 230         False is returned if login failed.
 231
 232         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 233         """
 234
 235         def warn(message):
 236             self.report_warning(message)
 237
 238         # username+password login is broken
 239         if (self._LOGIN_REQUIRED
 240                 and self.get_param('cookiefile') is None
 241                 and self.get_param('cookiesfrombrowser') is None):
 242             self.raise_login_required(
 243                 'Login details are needed to download this content', method='cookies')
 244         username, password = self._get_login_info()
 245         if username:
 246             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 247         return
 248
 249         # Everything below this is broken!
 250         r'''
 251         # No authentication to be performed
 252         if username is None:
 253             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 254                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 255             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 256             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 257             return True
 258
 259         login_page = self._download_webpage(
 260             self._LOGIN_URL, None,
 261             note='Downloading login page',
 262             errnote='unable to fetch login page', fatal=False)
 263         if login_page is False:
 264             return
 265
 266         login_form = self._hidden_inputs(login_page)
 267
 268         def req(url, f_req, note, errnote):
 269             data = login_form.copy()
 270             data.update({
 271                 'pstMsg': 1,
 272                 'checkConnection': 'youtube',
 273                 'checkedDomains': 'youtube',
 274                 'hl': 'en',
 275                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 276                 'f.req': json.dumps(f_req),
 277                 'flowName': 'GlifWebSignIn',
 278                 'flowEntry': 'ServiceLogin',
 279                 # TODO: reverse actual botguard identifier generation algo
 280                 'bgRequest': '["identifier",""]',
 281             })
 282             return self._download_json(
 283                 url, None, note=note, errnote=errnote,
 284                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 285                 fatal=False,
 286                 data=urlencode_postdata(data), headers={
 287                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 288                     'Google-Accounts-XSRF': 1,
 289                 })
 290
 291         lookup_req = [
 292             username,
 293             None, [], None, 'US', None, None, 2, False, True,
 294             [
 295                 None, None,
 296                 [2, 1, None, 1,
 297                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 298                  None, [], 4],
 299                 1, [None, None, []], None, None, None, True
 300             ],
 301             username,
 302         ]
 303
 304         lookup_results = req(
 305             self._LOOKUP_URL, lookup_req,
 306             'Looking up account info', 'Unable to look up account info')
 307
 308         if lookup_results is False:
 309             return False
 310
 311         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 312         if not user_hash:
 313             warn('Unable to extract user hash')
 314             return False
 315
 316         challenge_req = [
 317             user_hash,
 318             None, 1, None, [1, None, None, None, [password, None, True]],
 319             [
 320                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 321                 1, [None, None, []], None, None, None, True
 322             ]]
 323
 324         challenge_results = req(
 325             self._CHALLENGE_URL, challenge_req,
 326             'Logging in', 'Unable to log in')
 327
 328         if challenge_results is False:
 329             return
 330
 331         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 332         if login_res:
 333             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 334             warn(
 335                 'Unable to login: %s' % 'Invalid password'
 336                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 337             return False
 338
 339         res = try_get(challenge_results, lambda x: x[0][-1], list)
 340         if not res:
 341             warn('Unable to extract result entry')
 342             return False
 343
 344         login_challenge = try_get(res, lambda x: x[0][0], list)
 345         if login_challenge:
 346             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 347             if challenge_str == 'TWO_STEP_VERIFICATION':
 348                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 349                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 350                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 351                 if status == 'QUOTA_EXCEEDED':
 352                     warn('Exceeded the limit of TFA codes, try later')
 353                     return False
 354
 355                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 356                 if not tl:
 357                     warn('Unable to extract TL')
 358                     return False
 359
 360                 tfa_code = self._get_tfa_info('2-step verification code')
 361
 362                 if not tfa_code:
 363                     warn(
 364                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 365                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 366                     return False
 367
 368                 tfa_code = remove_start(tfa_code, 'G-')
 369
 370                 tfa_req = [
 371                     user_hash, None, 2, None,
 372                     [
 373                         9, None, None, None, None, None, None, None,
 374                         [None, tfa_code, True, 2]
 375                     ]]
 376
 377                 tfa_results = req(
 378                     self._TFA_URL.format(tl), tfa_req,
 379                     'Submitting TFA code', 'Unable to submit TFA code')
 380
 381                 if tfa_results is False:
 382                     return False
 383
 384                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 385                 if tfa_res:
 386                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 387                     warn(
 388                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 389                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 390                     return False
 391
 392                 check_cookie_url = try_get(
 393                     tfa_results, lambda x: x[0][-1][2], compat_str)
 394             else:
 395                 CHALLENGES = {
 396                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 397                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 398                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 399                 }
 400                 challenge = CHALLENGES.get(
 401                     challenge_str,
 402                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 403                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 404                 return False
 405         else:
 406             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 407
 408         if not check_cookie_url:
 409             warn('Unable to extract CheckCookie URL')
 410             return False
 411
 412         check_cookie_results = self._download_webpage(
 413             check_cookie_url, None, 'Checking cookie', fatal=False)
 414
 415         if check_cookie_results is False:
 416             return False
 417
 418         if 'https://myaccount.google.com/' not in check_cookie_results:
 419             warn('Unable to log in')
 420             return False
 421
 422         return True
 423         '''
 424
 425     def _initialize_consent(self):
 426         cookies = self._get_cookies('https://www.youtube.com/')
 427         if cookies.get('__Secure-3PSID'):
 428             return
 429         consent_id = None
 430         consent = cookies.get('CONSENT')
 431         if consent:
 432             if 'YES' in consent.value:
 433                 return
 434             consent_id = self._search_regex(
 435                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 436         if not consent_id:
 437             consent_id = random.randint(100, 999)
 438         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 439
 440     def _real_initialize(self):
 441         self._initialize_consent()
 442         if self._downloader is None:
 443             return
 444         if not self._login():
 445             return
 446
 447     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 448     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 449     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 450
 451     def _get_default_ytcfg(self, client='web'):
 452         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 453
 454     def _get_innertube_host(self, client='web'):
 455         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 456
 457     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 458         # try_get but with fallback to default ytcfg client values when present
 459         _func = lambda y: try_get(y, getter, expected_type)
 460         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 461
 462     def _extract_client_name(self, ytcfg, default_client='web'):
 463         return (
 464             try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str)
 465             or self._ytcfg_get_safe(
 466                 ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['clientName'], compat_str, default_client))
 467
 468     @staticmethod
 469     def _extract_session_index(*data):
 470         for ytcfg in data:
 471             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 472             if session_index is not None:
 473                 return session_index
 474
 475     def _extract_client_version(self, ytcfg, default_client='web'):
 476         return (
 477             try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str)
 478             or self._ytcfg_get_safe(
 479                 ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion'], compat_str, default_client))
 480
 481     def _extract_api_key(self, ytcfg=None, default_client='web'):
 482         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 483
 484     def _extract_context(self, ytcfg=None, default_client='web'):
 485         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 486         context = _get_context(ytcfg)
 487         if context:
 488             return context
 489
 490         context = _get_context(self._get_default_ytcfg(default_client))
 491         if not ytcfg:
 492             return context
 493
 494         # Recreate the client context (required)
 495         context['client'].update({
 496             'clientVersion': self._extract_client_version(ytcfg, default_client),
 497             'clientName': self._extract_client_name(ytcfg, default_client),
 498         })
 499         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 500         if visitor_data:
 501             context['client']['visitorData'] = visitor_data
 502         return context
 503
 504     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 505         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 506         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 507         yt_cookies = self._get_cookies('https://www.youtube.com')
 508         sapisid_cookie = dict_get(
 509             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 510         if sapisid_cookie is None or not sapisid_cookie.value:
 511             return
 512         time_now = round(time.time())
 513         # SAPISID cookie is required if not already present
 514         if not yt_cookies.get('SAPISID'):
 515             self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie', only_once=True)
 516             self._set_cookie(
 517                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 518         self.write_debug('Extracted SAPISID cookie', only_once=True)
 519         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 520         sapisidhash = hashlib.sha1(
 521             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 522         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 523
 524     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 525                   note='Downloading API JSON', errnote='Unable to download API page',
 526                   context=None, api_key=None, api_hostname=None, default_client='web'):
 527
 528         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 529         data.update(query)
 530         real_headers = self.generate_api_headers(default_client=default_client)
 531         real_headers.update({'content-type': 'application/json'})
 532         if headers:
 533             real_headers.update(headers)
 534         return self._download_json(
 535             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 536             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 537             data=json.dumps(data).encode('utf8'), headers=real_headers,
 538             query={'key': api_key or self._extract_api_key()})
 539
 540     def extract_yt_initial_data(self, video_id, webpage):
 541         return self._parse_json(
 542             self._search_regex(
 543                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 544                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 545             video_id)
 546
 547     def _extract_identity_token(self, webpage, item_id):
 548         if not webpage:
 549             return None
 550         ytcfg = self.extract_ytcfg(item_id, webpage)
 551         if ytcfg:
 552             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 553             if token:
 554                 return token
 555         return self._search_regex(
 556             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 557             'identity token', default=None)
 558
 559     @staticmethod
 560     def _extract_account_syncid(*args):
 561         """
 562         Extract syncId required to download private playlists of secondary channels
 563         @params response and/or ytcfg
 564         """
 565         for data in args:
 566             # ytcfg includes channel_syncid if on secondary channel
 567             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 568             if delegated_sid:
 569                 return delegated_sid
 570             sync_ids = (try_get(
 571                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 572                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 573             if len(sync_ids) >= 2 and sync_ids[1]:
 574                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 575                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 576                 return sync_ids[0]
 577
 578     def extract_ytcfg(self, video_id, webpage):
 579         if not webpage:
 580             return {}
 581         return self._parse_json(
 582             self._search_regex(
 583                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 584                 default='{}'), video_id, fatal=False) or {}
 585
 586     def generate_api_headers(
 587             self, ytcfg=None, identity_token=None, account_syncid=None,
 588             visitor_data=None, api_hostname=None, default_client='web', session_index=None):
 589         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 590         headers = {
 591             'X-YouTube-Client-Name': compat_str(
 592                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 593             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 594             'Origin': origin
 595         }
 596         if not visitor_data and ytcfg:
 597             visitor_data = try_get(
 598                 self._extract_context(ytcfg, default_client), lambda x: x['client']['visitorData'], compat_str)
 599         if identity_token:
 600             headers['X-Youtube-Identity-Token'] = identity_token
 601         if account_syncid:
 602             headers['X-Goog-PageId'] = account_syncid
 603         if session_index is None and ytcfg:
 604             session_index = self._extract_session_index(ytcfg)
 605         if account_syncid or session_index is not None:
 606             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 607         if visitor_data:
 608             headers['X-Goog-Visitor-Id'] = visitor_data
 609         auth = self._generate_sapisidhash_header(origin)
 610         if auth is not None:
 611             headers['Authorization'] = auth
 612             headers['X-Origin'] = origin
 613         return headers
 614
 615     @staticmethod
 616     def _build_api_continuation_query(continuation, ctp=None):
 617         query = {
 618             'continuation': continuation
 619         }
 620         # TODO: Inconsistency with clickTrackingParams.
 621         # Currently we have a fixed ctp contained within context (from ytcfg)
 622         # and a ctp in root query for continuation.
 623         if ctp:
 624             query['clickTracking'] = {'clickTrackingParams': ctp}
 625         return query
 626
 627     @classmethod
 628     def _extract_next_continuation_data(cls, renderer):
 629         next_continuation = try_get(
 630             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 631                        lambda x: x['continuation']['reloadContinuationData']), dict)
 632         if not next_continuation:
 633             return
 634         continuation = next_continuation.get('continuation')
 635         if not continuation:
 636             return
 637         ctp = next_continuation.get('clickTrackingParams')
 638         return cls._build_api_continuation_query(continuation, ctp)
 639
 640     @classmethod
 641     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 642         if isinstance(continuation_ep, dict):
 643             continuation = try_get(
 644                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 645             if not continuation:
 646                 return
 647             ctp = continuation_ep.get('clickTrackingParams')
 648             return cls._build_api_continuation_query(continuation, ctp)
 649
 650     @classmethod
 651     def _extract_continuation(cls, renderer):
 652         next_continuation = cls._extract_next_continuation_data(renderer)
 653         if next_continuation:
 654             return next_continuation
 655
 656         contents = []
 657         for key in ('contents', 'items'):
 658             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 659
 660         for content in contents:
 661             if not isinstance(content, dict):
 662                 continue
 663             continuation_ep = try_get(
 664                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 665                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 666                 dict)
 667             continuation = cls._extract_continuation_ep_data(continuation_ep)
 668             if continuation:
 669                 return continuation
 670
 671     @classmethod
 672     def _extract_alerts(cls, data):
 673         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 674             if not isinstance(alert_dict, dict):
 675                 continue
 676             for alert in alert_dict.values():
 677                 alert_type = alert.get('type')
 678                 if not alert_type:
 679                     continue
 680                 message = cls._get_text(alert, 'text')
 681                 if message:
 682                     yield alert_type, message
 683
 684     def _report_alerts(self, alerts, expected=True):
 685         errors = []
 686         warnings = []
 687         for alert_type, alert_message in alerts:
 688             if alert_type.lower() == 'error':
 689                 errors.append([alert_type, alert_message])
 690             else:
 691                 warnings.append([alert_type, alert_message])
 692
 693         for alert_type, alert_message in (warnings + errors[:-1]):
 694             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 695         if errors:
 696             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 697
 698     def _extract_and_report_alerts(self, data, *args, **kwargs):
 699         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 700
 701     def _extract_badges(self, renderer: dict):
 702         badges = set()
 703         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 704             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 705             if label:
 706                 badges.add(label.lower())
 707         return badges
 708
 709     @staticmethod
 710     def _get_text(data, *path_list, max_runs=None):
 711         for path in path_list or [None]:
 712             if path is None:
 713                 obj = [data]
 714             else:
 715                 obj = traverse_obj(data, path, default=[])
 716                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 717                     obj = [obj]
 718             for item in obj:
 719                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 720                 if text:
 721                     return text
 722                 runs = try_get(item, lambda x: x['runs'], list) or []
 723                 if not runs and isinstance(item, list):
 724                     runs = item
 725
 726                 runs = runs[:min(len(runs), max_runs or len(runs))]
 727                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 728                 if text:
 729                     return text
 730
 731     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 732                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 733                           default_client='web'):
 734         response = None
 735         last_error = None
 736         count = -1
 737         retries = self.get_param('extractor_retries', 3)
 738         if check_get_keys is None:
 739             check_get_keys = []
 740         while count < retries:
 741             count += 1
 742             if last_error:
 743                 self.report_warning('%s. Retrying ...' % last_error)
 744             try:
 745                 response = self._call_api(
 746                     ep=ep, fatal=True, headers=headers,
 747                     video_id=item_id, query=query,
 748                     context=self._extract_context(ytcfg, default_client),
 749                     api_key=self._extract_api_key(ytcfg, default_client),
 750                     api_hostname=api_hostname, default_client=default_client,
 751                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 752             except ExtractorError as e:
 753                 if isinstance(e.cause, network_exceptions):
 754                     # Downloading page may result in intermittent 5xx HTTP error
 755                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 756                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 757                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 758                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 759                         last_error = error_to_compat_str(e.cause or e)
 760                         if count < retries:
 761                             continue
 762                 if fatal:
 763                     raise
 764                 else:
 765                     self.report_warning(error_to_compat_str(e))
 766                     return
 767
 768             else:
 769                 # Youtube may send alerts if there was an issue with the continuation page
 770                 try:
 771                     self._extract_and_report_alerts(response, expected=False)
 772                 except ExtractorError as e:
 773                     if fatal:
 774                         raise
 775                     self.report_warning(error_to_compat_str(e))
 776                     return
 777                 if not check_get_keys or dict_get(response, check_get_keys):
 778                     break
 779                 # Youtube sometimes sends incomplete data
 780                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 781                 last_error = 'Incomplete data received'
 782                 if count >= retries:
 783                     if fatal:
 784                         raise ExtractorError(last_error)
 785                     else:
 786                         self.report_warning(last_error)
 787                         return
 788         return response
 789
 790     @staticmethod
 791     def is_music_url(url):
 792         return re.match(r'https?://music\.youtube\.com/', url) is not None
 793
 794     def _extract_video(self, renderer):
 795         video_id = renderer.get('videoId')
 796         title = self._get_text(renderer, 'title')
 797         description = self._get_text(renderer, 'descriptionSnippet')
 798         duration = parse_duration(self._get_text(
 799             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 800         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 801         view_count = str_to_int(self._search_regex(
 802             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 803             'view count', default=None))
 804
 805         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 806
 807         return {
 808             '_type': 'url',
 809             'ie_key': YoutubeIE.ie_key(),
 810             'id': video_id,
 811             'url': video_id,
 812             'title': title,
 813             'description': description,
 814             'duration': duration,
 815             'view_count': view_count,
 816             'uploader': uploader,
 817         }
 818
 819
 820 class YoutubeIE(YoutubeBaseInfoExtractor):
 821     IE_DESC = 'YouTube.com'
 822     _INVIDIOUS_SITES = (
 823         # invidious-redirect websites
 824         r'(?:www\.)?redirect\.invidious\.io',
 825         r'(?:(?:www|dev)\.)?invidio\.us',
 826         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 827         r'(?:www\.)?invidious\.pussthecat\.org',
 828         r'(?:www\.)?invidious\.zee\.li',
 829         r'(?:www\.)?invidious\.ethibox\.fr',
 830         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 831         # youtube-dl invidious instances list
 832         r'(?:(?:www|no)\.)?invidiou\.sh',
 833         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 834         r'(?:www\.)?invidious\.kabi\.tk',
 835         r'(?:www\.)?invidious\.mastodon\.host',
 836         r'(?:www\.)?invidious\.zapashcanon\.fr',
 837         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 838         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 839         r'(?:www\.)?invidious\.himiko\.cloud',
 840         r'(?:www\.)?invidious\.reallyancient\.tech',
 841         r'(?:www\.)?invidious\.tube',
 842         r'(?:www\.)?invidiou\.site',
 843         r'(?:www\.)?invidious\.site',
 844         r'(?:www\.)?invidious\.xyz',
 845         r'(?:www\.)?invidious\.nixnet\.xyz',
 846         r'(?:www\.)?invidious\.048596\.xyz',
 847         r'(?:www\.)?invidious\.drycat\.fr',
 848         r'(?:www\.)?inv\.skyn3t\.in',
 849         r'(?:www\.)?tube\.poal\.co',
 850         r'(?:www\.)?tube\.connect\.cafe',
 851         r'(?:www\.)?vid\.wxzm\.sx',
 852         r'(?:www\.)?vid\.mint\.lgbt',
 853         r'(?:www\.)?vid\.puffyan\.us',
 854         r'(?:www\.)?yewtu\.be',
 855         r'(?:www\.)?yt\.elukerio\.org',
 856         r'(?:www\.)?yt\.lelux\.fi',
 857         r'(?:www\.)?invidious\.ggc-project\.de',
 858         r'(?:www\.)?yt\.maisputain\.ovh',
 859         r'(?:www\.)?ytprivate\.com',
 860         r'(?:www\.)?invidious\.13ad\.de',
 861         r'(?:www\.)?invidious\.toot\.koeln',
 862         r'(?:www\.)?invidious\.fdn\.fr',
 863         r'(?:www\.)?watch\.nettohikari\.com',
 864         r'(?:www\.)?invidious\.namazso\.eu',
 865         r'(?:www\.)?invidious\.silkky\.cloud',
 866         r'(?:www\.)?invidious\.exonip\.de',
 867         r'(?:www\.)?invidious\.riverside\.rocks',
 868         r'(?:www\.)?invidious\.blamefran\.net',
 869         r'(?:www\.)?invidious\.moomoo\.de',
 870         r'(?:www\.)?ytb\.trom\.tf',
 871         r'(?:www\.)?yt\.cyberhost\.uk',
 872         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 873         r'(?:www\.)?qklhadlycap4cnod\.onion',
 874         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 875         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 876         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 877         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 878         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 879         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 880         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 881         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 882         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 883         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 884     )
 885     _VALID_URL = r"""(?x)^
 886                      (
 887                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 888                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 889                             (?:www\.)?deturl\.com/www\.youtube\.com|
 890                             (?:www\.)?pwnyoutube\.com|
 891                             (?:www\.)?hooktube\.com|
 892                             (?:www\.)?yourepeat\.com|
 893                             tube\.majestyc\.net|
 894                             %(invidious)s|
 895                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 896                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 897                          (?:                                                  # the various things that can precede the ID:
 898                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 899                              |(?:                                             # or the v= param in all its forms
 900                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 901                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 902                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 903                                  v=
 904                              )
 905                          ))
 906                          |(?:
 907                             youtu\.be|                                        # just youtu.be/xxxx
 908                             vid\.plus|                                        # or vid.plus/xxxx
 909                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 910                             %(invidious)s
 911                          )/
 912                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 913                          )
 914                      )?                                                       # all until now is optional -> you can pass the naked ID
 915                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 916                      (?(1).+)?                                                # if we found the ID, everything can follow
 917                      (?:\#|$)""" % {
 918         'invidious': '|'.join(_INVIDIOUS_SITES),
 919     }
 920     _PLAYER_INFO_RE = (
 921         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 922         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 923         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 924     )
 925     _formats = {
 926         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 927         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 928         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 929         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 930         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 931         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 932         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 933         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 934         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 935         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 936         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 937         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 938         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 939         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 940         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 941         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 942         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 943         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 944
 945
 946         # 3D videos
 947         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 948         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 949         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 950         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 951         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 952         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 953         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 954
 955         # Apple HTTP Live Streaming
 956         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 957         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 958         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 959         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 960         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 961         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 962         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 963         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 964
 965         # DASH mp4 video
 966         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 967         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 968         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 969         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 970         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 971         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 972         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 973         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 974         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 975         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 976         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 977         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 978
 979         # Dash mp4 audio
 980         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 981         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 982         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 983         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 984         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 985         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 986         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 987
 988         # Dash webm
 989         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 990         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 991         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 992         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 993         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 994         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 995         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 996         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 997         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 998         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 999         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1000         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1001         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1002         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1003         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1004         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
1005         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1006         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1007         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1008         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1009         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
1010         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
1011
1012         # Dash webm audio
1013         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
1014         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
1015
1016         # Dash webm audio with opus inside
1017         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
1018         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
1019         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
1020
1021         # RTMP (unnamed)
1022         '_rtmp': {'protocol': 'rtmp'},
1023
1024         # av01 video only formats sometimes served with "unknown" codecs
1025         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1026         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1027         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1028         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
1029     }
1030     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
1031
1032     _AGE_GATE_REASONS = (
1033         'Sign in to confirm your age',
1034         'This video may be inappropriate for some users.',
1035         'Sorry, this content is age-restricted.',
1036         'Please confirm your age.')
1037
1038     _AGE_GATE_STATUS_REASONS = (
1039         'AGE_VERIFICATION_REQUIRED',
1040         'AGE_CHECK_REQUIRED'
1041     )
1042
1043     _GEO_BYPASS = False
1044
1045     IE_NAME = 'youtube'
1046     _TESTS = [
1047         {
1048             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
1049             'info_dict': {
1050                 'id': 'BaW_jenozKc',
1051                 'ext': 'mp4',
1052                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1053                 'uploader': 'Philipp Hagemeister',
1054                 'uploader_id': 'phihag',
1055                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1056                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
1057                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
1058                 'upload_date': '20121002',
1059                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1060                 'categories': ['Science & Technology'],
1061                 'tags': ['youtube-dl'],
1062                 'duration': 10,
1063                 'view_count': int,
1064                 'like_count': int,
1065                 'dislike_count': int,
1066                 'start_time': 1,
1067                 'end_time': 9,
1068             }
1069         },
1070         {
1071             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1072             'note': 'Embed-only video (#1746)',
1073             'info_dict': {
1074                 'id': 'yZIXLfi8CZQ',
1075                 'ext': 'mp4',
1076                 'upload_date': '20120608',
1077                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1078                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1079                 'uploader': 'SET India',
1080                 'uploader_id': 'setindia',
1081                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1082                 'age_limit': 18,
1083             },
1084             'skip': 'Private video',
1085         },
1086         {
1087             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1088             'note': 'Use the first video ID in the URL',
1089             'info_dict': {
1090                 'id': 'BaW_jenozKc',
1091                 'ext': 'mp4',
1092                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1093                 'uploader': 'Philipp Hagemeister',
1094                 'uploader_id': 'phihag',
1095                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1096                 'upload_date': '20121002',
1097                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1098                 'categories': ['Science & Technology'],
1099                 'tags': ['youtube-dl'],
1100                 'duration': 10,
1101                 'view_count': int,
1102                 'like_count': int,
1103                 'dislike_count': int,
1104             },
1105             'params': {
1106                 'skip_download': True,
1107             },
1108         },
1109         {
1110             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1111             'note': '256k DASH audio (format 141) via DASH manifest',
1112             'info_dict': {
1113                 'id': 'a9LDPn-MO4I',
1114                 'ext': 'm4a',
1115                 'upload_date': '20121002',
1116                 'uploader_id': '8KVIDEO',
1117                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1118                 'description': '',
1119                 'uploader': '8KVIDEO',
1120                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1121             },
1122             'params': {
1123                 'youtube_include_dash_manifest': True,
1124                 'format': '141',
1125             },
1126             'skip': 'format 141 not served anymore',
1127         },
1128         # DASH manifest with encrypted signature
1129         {
1130             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1131             'info_dict': {
1132                 'id': 'IB3lcPjvWLA',
1133                 'ext': 'm4a',
1134                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1135                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1136                 'duration': 244,
1137                 'uploader': 'AfrojackVEVO',
1138                 'uploader_id': 'AfrojackVEVO',
1139                 'upload_date': '20131011',
1140                 'abr': 129.495,
1141             },
1142             'params': {
1143                 'youtube_include_dash_manifest': True,
1144                 'format': '141/bestaudio[ext=m4a]',
1145             },
1146         },
1147         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1148         {
1149             'note': 'Embed allowed age-gate video',
1150             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1151             'info_dict': {
1152                 'id': 'HtVdAasjOgU',
1153                 'ext': 'mp4',
1154                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1155                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1156                 'duration': 142,
1157                 'uploader': 'The Witcher',
1158                 'uploader_id': 'WitcherGame',
1159                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1160                 'upload_date': '20140605',
1161                 'age_limit': 18,
1162             },
1163         },
1164         {
1165             'note': 'Age-gate video with embed allowed in public site',
1166             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1167             'info_dict': {
1168                 'id': 'HsUATh_Nc2U',
1169                 'ext': 'mp4',
1170                 'title': 'Godzilla 2 (Official Video)',
1171                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1172                 'upload_date': '20200408',
1173                 'uploader_id': 'FlyingKitty900',
1174                 'uploader': 'FlyingKitty',
1175                 'age_limit': 18,
1176             },
1177         },
1178         {
1179             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1180             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1181             'info_dict': {
1182                 'id': 'Tq92D6wQ1mg',
1183                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1184                 'ext': 'mp4','upload_date': '20191227',
1185                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1186                 'uploader': 'Projekt Melody',
1187                 'description': 'md5:17eccca93a786d51bc67646756894066',
1188                 'age_limit': 18,
1189             },
1190         },
1191         {
1192             'note': 'Non-Agegated non-embeddable video',
1193             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1194             'info_dict': {
1195                 'id': 'MeJVWBSsPAY',
1196                 'ext': 'mp4',
1197                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1198                 'uploader': 'Herr Lurik',
1199                 'uploader_id': 'st3in234',
1200                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1201                 'upload_date': '20130730',
1202             },
1203         },
1204         {
1205             'note': 'Non-bypassable age-gated video',
1206             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1207             'only_matching': True,
1208         },
1209         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1210         # YouTube Red ad is not captured for creator
1211         {
1212             'url': '__2ABJjxzNo',
1213             'info_dict': {
1214                 'id': '__2ABJjxzNo',
1215                 'ext': 'mp4',
1216                 'duration': 266,
1217                 'upload_date': '20100430',
1218                 'uploader_id': 'deadmau5',
1219                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1220                 'creator': 'deadmau5',
1221                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1222                 'uploader': 'deadmau5',
1223                 'title': 'Deadmau5 - Some Chords (HD)',
1224                 'alt_title': 'Some Chords',
1225             },
1226             'expected_warnings': [
1227                 'DASH manifest missing',
1228             ]
1229         },
1230         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1231         {
1232             'url': 'lqQg6PlCWgI',
1233             'info_dict': {
1234                 'id': 'lqQg6PlCWgI',
1235                 'ext': 'mp4',
1236                 'duration': 6085,
1237                 'upload_date': '20150827',
1238                 'uploader_id': 'olympic',
1239                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1240                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1241                 'uploader': 'Olympics',
1242                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1243             },
1244             'params': {
1245                 'skip_download': 'requires avconv',
1246             }
1247         },
1248         # Non-square pixels
1249         {
1250             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1251             'info_dict': {
1252                 'id': '_b-2C3KPAM0',
1253                 'ext': 'mp4',
1254                 'stretched_ratio': 16 / 9.,
1255                 'duration': 85,
1256                 'upload_date': '20110310',
1257                 'uploader_id': 'AllenMeow',
1258                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1259                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1260                 'uploader': '孫ᄋᄅ',
1261                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1262             },
1263         },
1264         # url_encoded_fmt_stream_map is empty string
1265         {
1266             'url': 'qEJwOuvDf7I',
1267             'info_dict': {
1268                 'id': 'qEJwOuvDf7I',
1269                 'ext': 'webm',
1270                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1271                 'description': '',
1272                 'upload_date': '20150404',
1273                 'uploader_id': 'spbelect',
1274                 'uploader': 'Наблюдатели Петербурга',
1275             },
1276             'params': {
1277                 'skip_download': 'requires avconv',
1278             },
1279             'skip': 'This live event has ended.',
1280         },
1281         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1282         {
1283             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1284             'info_dict': {
1285                 'id': 'FIl7x6_3R5Y',
1286                 'ext': 'webm',
1287                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1288                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1289                 'duration': 220,
1290                 'upload_date': '20150625',
1291                 'uploader_id': 'dorappi2000',
1292                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1293                 'uploader': 'dorappi2000',
1294                 'formats': 'mincount:31',
1295             },
1296             'skip': 'not actual anymore',
1297         },
1298         # DASH manifest with segment_list
1299         {
1300             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1301             'md5': '8ce563a1d667b599d21064e982ab9e31',
1302             'info_dict': {
1303                 'id': 'CsmdDsKjzN8',
1304                 'ext': 'mp4',
1305                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1306                 'uploader': 'Airtek',
1307                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1308                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1309                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1310             },
1311             'params': {
1312                 'youtube_include_dash_manifest': True,
1313                 'format': '135',  # bestvideo
1314             },
1315             'skip': 'This live event has ended.',
1316         },
1317         {
1318             # Multifeed videos (multiple cameras), URL is for Main Camera
1319             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1320             'info_dict': {
1321                 'id': 'jvGDaLqkpTg',
1322                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1323                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1324             },
1325             'playlist': [{
1326                 'info_dict': {
1327                     'id': 'jvGDaLqkpTg',
1328                     'ext': 'mp4',
1329                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1330                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1331                     'duration': 10643,
1332                     'upload_date': '20161111',
1333                     'uploader': 'Team PGP',
1334                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1335                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1336                 },
1337             }, {
1338                 'info_dict': {
1339                     'id': '3AKt1R1aDnw',
1340                     'ext': 'mp4',
1341                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1342                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1343                     'duration': 10991,
1344                     'upload_date': '20161111',
1345                     'uploader': 'Team PGP',
1346                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1347                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1348                 },
1349             }, {
1350                 'info_dict': {
1351                     'id': 'RtAMM00gpVc',
1352                     'ext': 'mp4',
1353                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1354                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1355                     'duration': 10995,
1356                     'upload_date': '20161111',
1357                     'uploader': 'Team PGP',
1358                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1359                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1360                 },
1361             }, {
1362                 'info_dict': {
1363                     'id': '6N2fdlP3C5U',
1364                     'ext': 'mp4',
1365                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1366                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1367                     'duration': 10990,
1368                     'upload_date': '20161111',
1369                     'uploader': 'Team PGP',
1370                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1371                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1372                 },
1373             }],
1374             'params': {
1375                 'skip_download': True,
1376             },
1377             'skip': 'Not multifeed anymore',
1378         },
1379         {
1380             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1381             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1382             'info_dict': {
1383                 'id': 'gVfLd0zydlo',
1384                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1385             },
1386             'playlist_count': 2,
1387             'skip': 'Not multifeed anymore',
1388         },
1389         {
1390             'url': 'https://vid.plus/FlRa-iH7PGw',
1391             'only_matching': True,
1392         },
1393         {
1394             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1395             'only_matching': True,
1396         },
1397         {
1398             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1399             # Also tests cut-off URL expansion in video description (see
1400             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1401             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1402             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1403             'info_dict': {
1404                 'id': 'lsguqyKfVQg',
1405                 'ext': 'mp4',
1406                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1407                 'alt_title': 'Dark Walk',
1408                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1409                 'duration': 133,
1410                 'upload_date': '20151119',
1411                 'uploader_id': 'IronSoulElf',
1412                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1413                 'uploader': 'IronSoulElf',
1414                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1415                 'track': 'Dark Walk',
1416                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1417                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1418             },
1419             'params': {
1420                 'skip_download': True,
1421             },
1422         },
1423         {
1424             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1425             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1426             'only_matching': True,
1427         },
1428         {
1429             # Video with yt:stretch=17:0
1430             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1431             'info_dict': {
1432                 'id': 'Q39EVAstoRM',
1433                 'ext': 'mp4',
1434                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1435                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1436                 'upload_date': '20151107',
1437                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1438                 'uploader': 'CH GAMER DROID',
1439             },
1440             'params': {
1441                 'skip_download': True,
1442             },
1443             'skip': 'This video does not exist.',
1444         },
1445         {
1446             # Video with incomplete 'yt:stretch=16:'
1447             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1448             'only_matching': True,
1449         },
1450         {
1451             # Video licensed under Creative Commons
1452             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1453             'info_dict': {
1454                 'id': 'M4gD1WSo5mA',
1455                 'ext': 'mp4',
1456                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1457                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1458                 'duration': 721,
1459                 'upload_date': '20150127',
1460                 'uploader_id': 'BerkmanCenter',
1461                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1462                 'uploader': 'The Berkman Klein Center for Internet & Society',
1463                 'license': 'Creative Commons Attribution license (reuse allowed)',
1464             },
1465             'params': {
1466                 'skip_download': True,
1467             },
1468         },
1469         {
1470             # Channel-like uploader_url
1471             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1472             'info_dict': {
1473                 'id': 'eQcmzGIKrzg',
1474                 'ext': 'mp4',
1475                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1476                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1477                 'duration': 4060,
1478                 'upload_date': '20151119',
1479                 'uploader': 'Bernie Sanders',
1480                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1481                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1482                 'license': 'Creative Commons Attribution license (reuse allowed)',
1483             },
1484             'params': {
1485                 'skip_download': True,
1486             },
1487         },
1488         {
1489             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1490             'only_matching': True,
1491         },
1492         {
1493             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1494             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1495             'only_matching': True,
1496         },
1497         {
1498             # Rental video preview
1499             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1500             'info_dict': {
1501                 'id': 'uGpuVWrhIzE',
1502                 'ext': 'mp4',
1503                 'title': 'Piku - Trailer',
1504                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1505                 'upload_date': '20150811',
1506                 'uploader': 'FlixMatrix',
1507                 'uploader_id': 'FlixMatrixKaravan',
1508                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1509                 'license': 'Standard YouTube License',
1510             },
1511             'params': {
1512                 'skip_download': True,
1513             },
1514             'skip': 'This video is not available.',
1515         },
1516         {
1517             # YouTube Red video with episode data
1518             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1519             'info_dict': {
1520                 'id': 'iqKdEhx-dD4',
1521                 'ext': 'mp4',
1522                 'title': 'Isolation - Mind Field (Ep 1)',
1523                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1524                 'duration': 2085,
1525                 'upload_date': '20170118',
1526                 'uploader': 'Vsauce',
1527                 'uploader_id': 'Vsauce',
1528                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1529                 'series': 'Mind Field',
1530                 'season_number': 1,
1531                 'episode_number': 1,
1532             },
1533             'params': {
1534                 'skip_download': True,
1535             },
1536             'expected_warnings': [
1537                 'Skipping DASH manifest',
1538             ],
1539         },
1540         {
1541             # The following content has been identified by the YouTube community
1542             # as inappropriate or offensive to some audiences.
1543             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1544             'info_dict': {
1545                 'id': '6SJNVb0GnPI',
1546                 'ext': 'mp4',
1547                 'title': 'Race Differences in Intelligence',
1548                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1549                 'duration': 965,
1550                 'upload_date': '20140124',
1551                 'uploader': 'New Century Foundation',
1552                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1553                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1554             },
1555             'params': {
1556                 'skip_download': True,
1557             },
1558             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1559         },
1560         {
1561             # itag 212
1562             'url': '1t24XAntNCY',
1563             'only_matching': True,
1564         },
1565         {
1566             # geo restricted to JP
1567             'url': 'sJL6WA-aGkQ',
1568             'only_matching': True,
1569         },
1570         {
1571             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1572             'only_matching': True,
1573         },
1574         {
1575             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1576             'only_matching': True,
1577         },
1578         {
1579             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1580             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1581             'only_matching': True,
1582         },
1583         {
1584             # DRM protected
1585             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1586             'only_matching': True,
1587         },
1588         {
1589             # Video with unsupported adaptive stream type formats
1590             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1591             'info_dict': {
1592                 'id': 'Z4Vy8R84T1U',
1593                 'ext': 'mp4',
1594                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1595                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1596                 'duration': 433,
1597                 'upload_date': '20130923',
1598                 'uploader': 'Amelia Putri Harwita',
1599                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1600                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1601                 'formats': 'maxcount:10',
1602             },
1603             'params': {
1604                 'skip_download': True,
1605                 'youtube_include_dash_manifest': False,
1606             },
1607             'skip': 'not actual anymore',
1608         },
1609         {
1610             # Youtube Music Auto-generated description
1611             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1612             'info_dict': {
1613                 'id': 'MgNrAu2pzNs',
1614                 'ext': 'mp4',
1615                 'title': 'Voyeur Girl',
1616                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1617                 'upload_date': '20190312',
1618                 'uploader': 'Stephen - Topic',
1619                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1620                 'artist': 'Stephen',
1621                 'track': 'Voyeur Girl',
1622                 'album': 'it\'s too much love to know my dear',
1623                 'release_date': '20190313',
1624                 'release_year': 2019,
1625             },
1626             'params': {
1627                 'skip_download': True,
1628             },
1629         },
1630         {
1631             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1632             'only_matching': True,
1633         },
1634         {
1635             # invalid -> valid video id redirection
1636             'url': 'DJztXj2GPfl',
1637             'info_dict': {
1638                 'id': 'DJztXj2GPfk',
1639                 'ext': 'mp4',
1640                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1641                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1642                 'upload_date': '20090125',
1643                 'uploader': 'Prochorowka',
1644                 'uploader_id': 'Prochorowka',
1645                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1646                 'artist': 'Panjabi MC',
1647                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1648                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1649             },
1650             'params': {
1651                 'skip_download': True,
1652             },
1653             'skip': 'Video unavailable',
1654         },
1655         {
1656             # empty description results in an empty string
1657             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1658             'info_dict': {
1659                 'id': 'x41yOUIvK2k',
1660                 'ext': 'mp4',
1661                 'title': 'IMG 3456',
1662                 'description': '',
1663                 'upload_date': '20170613',
1664                 'uploader_id': 'ElevageOrVert',
1665                 'uploader': 'ElevageOrVert',
1666             },
1667             'params': {
1668                 'skip_download': True,
1669             },
1670         },
1671         {
1672             # with '};' inside yt initial data (see [1])
1673             # see [2] for an example with '};' inside ytInitialPlayerResponse
1674             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1675             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1676             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1677             'info_dict': {
1678                 'id': 'CHqg6qOn4no',
1679                 'ext': 'mp4',
1680                 'title': 'Part 77   Sort a list of simple types in c#',
1681                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1682                 'upload_date': '20130831',
1683                 'uploader_id': 'kudvenkat',
1684                 'uploader': 'kudvenkat',
1685             },
1686             'params': {
1687                 'skip_download': True,
1688             },
1689         },
1690         {
1691             # another example of '};' in ytInitialData
1692             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1693             'only_matching': True,
1694         },
1695         {
1696             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1697             'only_matching': True,
1698         },
1699         {
1700             # https://github.com/ytdl-org/youtube-dl/pull/28094
1701             'url': 'OtqTfy26tG0',
1702             'info_dict': {
1703                 'id': 'OtqTfy26tG0',
1704                 'ext': 'mp4',
1705                 'title': 'Burn Out',
1706                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1707                 'upload_date': '20141120',
1708                 'uploader': 'The Cinematic Orchestra - Topic',
1709                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1710                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1711                 'artist': 'The Cinematic Orchestra',
1712                 'track': 'Burn Out',
1713                 'album': 'Every Day',
1714                 'release_data': None,
1715                 'release_year': None,
1716             },
1717             'params': {
1718                 'skip_download': True,
1719             },
1720         },
1721         {
1722             # controversial video, only works with bpctr when authenticated with cookies
1723             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1724             'only_matching': True,
1725         },
1726         {
1727             # controversial video, requires bpctr/contentCheckOk
1728             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1729             'info_dict': {
1730                 'id': 'SZJvDhaSDnc',
1731                 'ext': 'mp4',
1732                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1733                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1734                 'uploader': 'CBS This Morning',
1735                 'uploader_id': 'CBSThisMorning',
1736                 'upload_date': '20140716',
1737                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1738             }
1739         },
1740         {
1741             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1742             'url': 'cBvYw8_A0vQ',
1743             'info_dict': {
1744                 'id': 'cBvYw8_A0vQ',
1745                 'ext': 'mp4',
1746                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1747                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1748                 'upload_date': '20201120',
1749                 'uploader': 'Walk around Japan',
1750                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1751                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1752             },
1753             'params': {
1754                 'skip_download': True,
1755             },
1756         }, {
1757             # Has multiple audio streams
1758             'url': 'WaOKSUlf4TM',
1759             'only_matching': True
1760         }, {
1761             # Requires Premium: has format 141 when requested using YTM url
1762             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1763             'only_matching': True
1764         }, {
1765             # multiple subtitles with same lang_code
1766             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1767             'only_matching': True,
1768         }, {
1769             # Force use android client fallback
1770             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1771             'info_dict': {
1772                 'id': 'YOelRv7fMxY',
1773                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1774                 'ext': '3gp',
1775                 'upload_date': '20210624',
1776                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1777                 'uploader': 'colinfurze',
1778                 'uploader_id': 'colinfurze',
1779                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1780                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1781             },
1782             'params': {
1783                 'format': '17',  # 3gp format available on android
1784                 'extractor_args': {'youtube': {'player_client': ['android']}},
1785             },
1786         },
1787         {
1788             # Skip download of additional client configs (remix client config in this case)
1789             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1790             'only_matching': True,
1791             'params': {
1792                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1793             },
1794         }
1795     ]
1796
1797     @classmethod
1798     def suitable(cls, url):
1799         # Hack for lazy extractors until more generic solution is implemented
1800         # (see #28780)
1801         from .youtube import parse_qs
1802         qs = parse_qs(url)
1803         if qs.get('list', [None])[0]:
1804             return False
1805         return super(YoutubeIE, cls).suitable(url)
1806
1807     def __init__(self, *args, **kwargs):
1808         super(YoutubeIE, self).__init__(*args, **kwargs)
1809         self._code_cache = {}
1810         self._player_cache = {}
1811
1812     def _extract_player_url(self, ytcfg=None, webpage=None):
1813         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1814         if not player_url and webpage:
1815             player_url = self._search_regex(
1816                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1817                 webpage, 'player URL', fatal=False)
1818         if not player_url:
1819             return None
1820         if player_url.startswith('//'):
1821             player_url = 'https:' + player_url
1822         elif not re.match(r'https?://', player_url):
1823             player_url = compat_urlparse.urljoin(
1824                 'https://www.youtube.com', player_url)
1825         return player_url
1826
1827     def _signature_cache_id(self, example_sig):
1828         """ Return a string representation of a signature """
1829         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1830
1831     @classmethod
1832     def _extract_player_info(cls, player_url):
1833         for player_re in cls._PLAYER_INFO_RE:
1834             id_m = re.search(player_re, player_url)
1835             if id_m:
1836                 break
1837         else:
1838             raise ExtractorError('Cannot identify player %r' % player_url)
1839         return id_m.group('id')
1840
1841     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1842         player_id = self._extract_player_info(player_url)
1843         if player_id not in self._code_cache:
1844             self._code_cache[player_id] = self._download_webpage(
1845                 player_url, video_id, fatal=fatal,
1846                 note='Downloading player ' + player_id,
1847                 errnote='Download of %s failed' % player_url)
1848         return player_id in self._code_cache
1849
1850     def _extract_signature_function(self, video_id, player_url, example_sig):
1851         player_id = self._extract_player_info(player_url)
1852
1853         # Read from filesystem cache
1854         func_id = 'js_%s_%s' % (
1855             player_id, self._signature_cache_id(example_sig))
1856         assert os.path.basename(func_id) == func_id
1857
1858         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1859         if cache_spec is not None:
1860             return lambda s: ''.join(s[i] for i in cache_spec)
1861
1862         if self._load_player(video_id, player_url):
1863             code = self._code_cache[player_id]
1864             res = self._parse_sig_js(code)
1865
1866             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1867             cache_res = res(test_string)
1868             cache_spec = [ord(c) for c in cache_res]
1869
1870             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1871             return res
1872
1873     def _print_sig_code(self, func, example_sig):
1874         def gen_sig_code(idxs):
1875             def _genslice(start, end, step):
1876                 starts = '' if start == 0 else str(start)
1877                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1878                 steps = '' if step == 1 else (':%d' % step)
1879                 return 's[%s%s%s]' % (starts, ends, steps)
1880
1881             step = None
1882             # Quelch pyflakes warnings - start will be set when step is set
1883             start = '(Never used)'
1884             for i, prev in zip(idxs[1:], idxs[:-1]):
1885                 if step is not None:
1886                     if i - prev == step:
1887                         continue
1888                     yield _genslice(start, prev, step)
1889                     step = None
1890                     continue
1891                 if i - prev in [-1, 1]:
1892                     step = i - prev
1893                     start = prev
1894                     continue
1895                 else:
1896                     yield 's[%d]' % prev
1897             if step is None:
1898                 yield 's[%d]' % i
1899             else:
1900                 yield _genslice(start, i, step)
1901
1902         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1903         cache_res = func(test_string)
1904         cache_spec = [ord(c) for c in cache_res]
1905         expr_code = ' + '.join(gen_sig_code(cache_spec))
1906         signature_id_tuple = '(%s)' % (
1907             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1908         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1909                 '    return %s\n') % (signature_id_tuple, expr_code)
1910         self.to_screen('Extracted signature function:\n' + code)
1911
1912     def _parse_sig_js(self, jscode):
1913         funcname = self._search_regex(
1914             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1915              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1916              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1917              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1918              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1919              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1920              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1921              # Obsolete patterns
1922              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1923              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1924              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1925              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1926              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1927              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1928              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1929              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1930             jscode, 'Initial JS player signature function name', group='sig')
1931
1932         jsi = JSInterpreter(jscode)
1933         initial_function = jsi.extract_function(funcname)
1934         return lambda s: initial_function([s])
1935
1936     def _decrypt_signature(self, s, video_id, player_url):
1937         """Turn the encrypted s field into a working signature"""
1938
1939         if player_url is None:
1940             raise ExtractorError('Cannot decrypt signature without player_url')
1941
1942         try:
1943             player_id = (player_url, self._signature_cache_id(s))
1944             if player_id not in self._player_cache:
1945                 func = self._extract_signature_function(
1946                     video_id, player_url, s
1947                 )
1948                 self._player_cache[player_id] = func
1949             func = self._player_cache[player_id]
1950             if self.get_param('youtube_print_sig_code'):
1951                 self._print_sig_code(func, s)
1952             return func(s)
1953         except Exception as e:
1954             tb = traceback.format_exc()
1955             raise ExtractorError(
1956                 'Signature extraction failed: ' + tb, cause=e)
1957
1958     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1959         """
1960         Extract signatureTimestamp (sts)
1961         Required to tell API what sig/player version is in use.
1962         """
1963         sts = None
1964         if isinstance(ytcfg, dict):
1965             sts = int_or_none(ytcfg.get('STS'))
1966
1967         if not sts:
1968             # Attempt to extract from player
1969             if player_url is None:
1970                 error_msg = 'Cannot extract signature timestamp without player_url.'
1971                 if fatal:
1972                     raise ExtractorError(error_msg)
1973                 self.report_warning(error_msg)
1974                 return
1975             if self._load_player(video_id, player_url, fatal=fatal):
1976                 player_id = self._extract_player_info(player_url)
1977                 code = self._code_cache[player_id]
1978                 sts = int_or_none(self._search_regex(
1979                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1980                     'JS player signature timestamp', group='sts', fatal=fatal))
1981         return sts
1982
1983     def _mark_watched(self, video_id, player_responses):
1984         playback_url = traverse_obj(
1985             player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1986             expected_type=url_or_none, get_all=False)
1987         if not playback_url:
1988             self.report_warning('Unable to mark watched')
1989             return
1990         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1991         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1992
1993         # cpn generation algorithm is reverse engineered from base.js.
1994         # In fact it works even with dummy cpn.
1995         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1996         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1997
1998         qs.update({
1999             'ver': ['2'],
2000             'cpn': [cpn],
2001         })
2002         playback_url = compat_urlparse.urlunparse(
2003             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
2004
2005         self._download_webpage(
2006             playback_url, video_id, 'Marking watched',
2007             'Unable to mark watched', fatal=False)
2008
2009     @staticmethod
2010     def _extract_urls(webpage):
2011         # Embedded YouTube player
2012         entries = [
2013             unescapeHTML(mobj.group('url'))
2014             for mobj in re.finditer(r'''(?x)
2015             (?:
2016                 <iframe[^>]+?src=|
2017                 data-video-url=|
2018                 <embed[^>]+?src=|
2019                 embedSWF\(?:\s*|
2020                 <object[^>]+data=|
2021                 new\s+SWFObject\(
2022             )
2023             (["\'])
2024                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
2025                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
2026             \1''', webpage)]
2027
2028         # lazyYT YouTube embed
2029         entries.extend(list(map(
2030             unescapeHTML,
2031             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
2032
2033         # Wordpress "YouTube Video Importer" plugin
2034         matches = re.findall(r'''(?x)<div[^>]+
2035             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
2036             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
2037         entries.extend(m[-1] for m in matches)
2038
2039         return entries
2040
2041     @staticmethod
2042     def _extract_url(webpage):
2043         urls = YoutubeIE._extract_urls(webpage)
2044         return urls[0] if urls else None
2045
2046     @classmethod
2047     def extract_id(cls, url):
2048         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2049         if mobj is None:
2050             raise ExtractorError('Invalid URL: %s' % url)
2051         video_id = mobj.group(2)
2052         return video_id
2053
2054     def _extract_chapters_from_json(self, data, duration):
2055         chapter_list = traverse_obj(
2056             data, (
2057                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2058                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2059             ), expected_type=list)
2060
2061         return self._extract_chapters(
2062             chapter_list,
2063             chapter_time=lambda chapter: float_or_none(
2064                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2065             chapter_title=lambda chapter: traverse_obj(
2066                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2067             duration=duration)
2068
2069     def _extract_chapters_from_engagement_panel(self, data, duration):
2070         content_list = traverse_obj(
2071             data,
2072             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2073             expected_type=list, default=[])
2074         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2075         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2076
2077         return next((
2078             filter(None, (
2079                 self._extract_chapters(
2080                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2081                     chapter_time, chapter_title, duration)
2082                 for contents in content_list
2083             ))), [])
2084
2085     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2086         chapters = []
2087         last_chapter = {'start_time': 0}
2088         for idx, chapter in enumerate(chapter_list or []):
2089             title = chapter_title(chapter)
2090             start_time = chapter_time(chapter)
2091             if start_time is None:
2092                 continue
2093             last_chapter['end_time'] = start_time
2094             if start_time < last_chapter['start_time']:
2095                 if idx == 1:
2096                     chapters.pop()
2097                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2098                 else:
2099                     self.report_warning(f'Invalid start time for chapter "{title}"')
2100                     continue
2101             last_chapter = {'start_time': start_time, 'title': title}
2102             chapters.append(last_chapter)
2103         last_chapter['end_time'] = duration
2104         return chapters
2105
2106     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2107         return self._parse_json(self._search_regex(
2108             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2109              regex), webpage, name, default='{}'), video_id, fatal=False)
2110
2111     @staticmethod
2112     def parse_time_text(time_text):
2113         """
2114         Parse the comment time text
2115         time_text is in the format 'X units ago (edited)'
2116         """
2117         time_text_split = time_text.split(' ')
2118         if len(time_text_split) >= 3:
2119             try:
2120                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2121             except ValueError:
2122                 return None
2123
2124     def _extract_comment(self, comment_renderer, parent=None):
2125         comment_id = comment_renderer.get('commentId')
2126         if not comment_id:
2127             return
2128
2129         text = self._get_text(comment_renderer, 'contentText')
2130
2131         # note: timestamp is an estimate calculated from the current time and time_text
2132         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2133         time_text_dt = self.parse_time_text(time_text)
2134         if isinstance(time_text_dt, datetime.datetime):
2135             timestamp = calendar.timegm(time_text_dt.timetuple())
2136         author = self._get_text(comment_renderer, 'authorText')
2137         author_id = try_get(comment_renderer,
2138                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2139
2140         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2141                                                        lambda x: x['likeCount']), compat_str)) or 0
2142         author_thumbnail = try_get(comment_renderer,
2143                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2144
2145         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2146         is_favorited = 'creatorHeart' in (try_get(
2147             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2148         return {
2149             'id': comment_id,
2150             'text': text,
2151             'timestamp': timestamp,
2152             'time_text': time_text,
2153             'like_count': votes,
2154             'is_favorited': is_favorited,
2155             'author': author,
2156             'author_id': author_id,
2157             'author_thumbnail': author_thumbnail,
2158             'author_is_uploader': author_is_uploader,
2159             'parent': parent or 'root'
2160         }
2161
2162     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2163                          ytcfg, video_id, parent=None, comment_counts=None):
2164
2165         def extract_header(contents):
2166             _total_comments = 0
2167             _continuation = None
2168             for content in contents:
2169                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2170                 expected_comment_count = parse_count(self._get_text(
2171                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2172
2173                 if expected_comment_count:
2174                     comment_counts[1] = expected_comment_count
2175                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2176                     _total_comments = comment_counts[1]
2177                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2178                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2179
2180                 sort_menu_item = try_get(
2181                     comments_header_renderer,
2182                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2183                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2184
2185                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2186                 if not _continuation:
2187                     continue
2188
2189                 sort_text = sort_menu_item.get('title')
2190                 if isinstance(sort_text, compat_str):
2191                     sort_text = sort_text.lower()
2192                 else:
2193                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2194                 self.to_screen('Sorting comments by %s' % sort_text)
2195                 break
2196             return _total_comments, _continuation
2197
2198         def extract_thread(contents):
2199             if not parent:
2200                 comment_counts[2] = 0
2201             for content in contents:
2202                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2203                 comment_renderer = try_get(
2204                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2205                     content, (lambda x: x['commentRenderer'], dict))
2206
2207                 if not comment_renderer:
2208                     continue
2209                 comment = self._extract_comment(comment_renderer, parent)
2210                 if not comment:
2211                     continue
2212                 comment_counts[0] += 1
2213                 yield comment
2214                 # Attempt to get the replies
2215                 comment_replies_renderer = try_get(
2216                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2217
2218                 if comment_replies_renderer:
2219                     comment_counts[2] += 1
2220                     comment_entries_iter = self._comment_entries(
2221                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2222                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2223
2224                     for reply_comment in comment_entries_iter:
2225                         yield reply_comment
2226
2227         # YouTube comments have a max depth of 2
2228         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2229         if max_depth == 1 and parent:
2230             return
2231         if not comment_counts:
2232             # comment so far, est. total comments, current comment thread #
2233             comment_counts = [0, 0, 0]
2234
2235         continuation = self._extract_continuation(root_continuation_data)
2236         if continuation and len(continuation['continuation']) < 27:
2237             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2238             continuation_token = self._generate_comment_continuation(video_id)
2239             continuation = self._build_api_continuation_query(continuation_token, None)
2240
2241         visitor_data = None
2242         is_first_continuation = parent is None
2243
2244         for page_num in itertools.count(0):
2245             if not continuation:
2246                 break
2247             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2248             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2249             if page_num == 0:
2250                 if is_first_continuation:
2251                     note_prefix = 'Downloading comment section API JSON'
2252                 else:
2253                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2254                         comment_counts[2], comment_prog_str)
2255             else:
2256                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2257                     '       ' if parent else '', ' replies' if parent else '',
2258                     page_num, comment_prog_str)
2259
2260             response = self._extract_response(
2261                 item_id=None, query=continuation,
2262                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2263                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2264             if not response:
2265                 break
2266             visitor_data = try_get(
2267                 response,
2268                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2269                 compat_str) or visitor_data
2270
2271             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2272
2273             continuation = None
2274             if isinstance(continuation_contents, list):
2275                 for continuation_section in continuation_contents:
2276                     if not isinstance(continuation_section, dict):
2277                         continue
2278                     continuation_items = try_get(
2279                         continuation_section,
2280                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2281                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2282                         list) or []
2283                     if is_first_continuation:
2284                         total_comments, continuation = extract_header(continuation_items)
2285                         if total_comments:
2286                             yield total_comments
2287                         is_first_continuation = False
2288                         if continuation:
2289                             break
2290                         continue
2291                     count = 0
2292                     for count, entry in enumerate(extract_thread(continuation_items)):
2293                         yield entry
2294                     continuation = self._extract_continuation({'contents': continuation_items})
2295                     if continuation:
2296                         # Sometimes YouTube provides a continuation without any comments
2297                         # In most cases we end up just downloading these with very little comments to come.
2298                         if count == 0:
2299                             if not parent:
2300                                 self.report_warning('No comments received - assuming end of comments')
2301                             continuation = None
2302                         break
2303
2304             # Deprecated response structure
2305             elif isinstance(continuation_contents, dict):
2306                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2307                 for key, continuation_renderer in continuation_contents.items():
2308                     if key not in known_continuation_renderers:
2309                         continue
2310                     if not isinstance(continuation_renderer, dict):
2311                         continue
2312                     if is_first_continuation:
2313                         header_continuation_items = [continuation_renderer.get('header') or {}]
2314                         total_comments, continuation = extract_header(header_continuation_items)
2315                         if total_comments:
2316                             yield total_comments
2317                         is_first_continuation = False
2318                         if continuation:
2319                             break
2320
2321                     # Sometimes YouTube provides a continuation without any comments
2322                     # In most cases we end up just downloading these with very little comments to come.
2323                     count = 0
2324                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2325                         yield entry
2326                     continuation = self._extract_continuation(continuation_renderer)
2327                     if count == 0:
2328                         if not parent:
2329                             self.report_warning('No comments received - assuming end of comments')
2330                         continuation = None
2331                     break
2332
2333     @staticmethod
2334     def _generate_comment_continuation(video_id):
2335         """
2336         Generates initial comment section continuation token from given video id
2337         """
2338         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2339         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2340         new_continuation_intlist = list(itertools.chain.from_iterable(
2341             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2342         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2343
2344     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2345         """Entry for comment extraction"""
2346         def _real_comment_extract(contents):
2347             if isinstance(contents, list):
2348                 for entry in contents:
2349                     for key, renderer in entry.items():
2350                         if key not in known_entry_comment_renderers:
2351                             continue
2352                         yield from self._comment_entries(
2353                             renderer, video_id=video_id, ytcfg=ytcfg,
2354                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2355                             account_syncid=self._extract_account_syncid(ytcfg))
2356                         break
2357         comments = []
2358         known_entry_comment_renderers = ('itemSectionRenderer',)
2359         estimated_total = 0
2360         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2361         # Force English regardless of account setting to prevent parsing issues
2362         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2363         ytcfg = copy.deepcopy(ytcfg)
2364         traverse_obj(
2365             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2366         try:
2367             for comment in _real_comment_extract(contents):
2368                 if len(comments) >= max_comments:
2369                     break
2370                 if isinstance(comment, int):
2371                     estimated_total = comment
2372                     continue
2373                 comments.append(comment)
2374         except KeyboardInterrupt:
2375             self.to_screen('Interrupted by user')
2376         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2377         return {
2378             'comments': comments,
2379             'comment_count': len(comments),
2380         }
2381
2382     @staticmethod
2383     def _generate_player_context(sts=None):
2384         context = {
2385             'html5Preference': 'HTML5_PREF_WANTS',
2386         }
2387         if sts is not None:
2388             context['signatureTimestamp'] = sts
2389         return {
2390             'playbackContext': {
2391                 'contentPlaybackContext': context
2392             },
2393             'contentCheckOk': True,
2394             'racyCheckOk': True
2395         }
2396
2397     def _is_agegated(self, player_response):
2398         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2399         for reason in reasons:
2400             if reason in self._AGE_GATE_REASONS + self._AGE_GATE_STATUS_REASONS:
2401                 return True
2402         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')) is not None:
2403             return True
2404         return False
2405
2406     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr):
2407
2408         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2409         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2410         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False)
2411         headers = self.generate_api_headers(
2412             player_ytcfg, identity_token, syncid,
2413             default_client=client, session_index=session_index)
2414
2415         yt_query = {'videoId': video_id}
2416         yt_query.update(self._generate_player_context(sts))
2417         return self._extract_response(
2418             item_id=video_id, ep='player', query=yt_query,
2419             ytcfg=player_ytcfg, headers=headers, fatal=False,
2420             default_client=client,
2421             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2422         ) or None
2423
2424     def _get_requested_clients(self, url, smuggled_data):
2425         requested_clients = []
2426         allowed_clients = sorted(
2427             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2428             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2429         for client in self._configuration_arg('player_client'):
2430             if client in allowed_clients:
2431                 requested_clients.append(client)
2432             elif client == 'all':
2433                 requested_clients.extend(allowed_clients)
2434             else:
2435                 self.report_warning(f'Skipping unsupported client {client}')
2436         if not requested_clients:
2437             requested_clients = ['android', 'web']
2438
2439         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2440             requested_clients.extend(
2441                 f'{client}_music' for client in requested_clients if not client.endswith('_music'))
2442
2443         return orderedSet(requested_clients)
2444
2445     def _extract_player_ytcfg(self, client, video_id):
2446         url = {
2447             'web_music': 'https://music.youtube.com',
2448             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2449         }.get(client)
2450         if not url:
2451             return {}
2452         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2453         return self.extract_ytcfg(video_id, webpage) or {}
2454
2455     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, player_url, identity_token):
2456         initial_pr = None
2457         if webpage:
2458             initial_pr = self._extract_yt_initial_variable(
2459                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2460                 video_id, 'initial player response')
2461
2462         original_clients = clients
2463         clients = clients[::-1]
2464         while clients:
2465             client = clients.pop()
2466             player_ytcfg = master_ytcfg if client == 'web' else {}
2467             if 'configs' not in self._configuration_arg('player_skip'):
2468                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2469
2470             pr = (
2471                 initial_pr if client == 'web' and initial_pr
2472                 else self._extract_player_response(
2473                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr))
2474             if pr:
2475                 yield pr
2476
2477             if self._is_agegated(pr):
2478                 client = f'{client}_agegate'
2479                 if client in INNERTUBE_CLIENTS and client not in original_clients:
2480                     clients.append(client)
2481
2482         # Android player_response does not have microFormats which are needed for
2483         # extraction of some data. So we return the initial_pr with formats
2484         # stripped out even if not requested by the user
2485         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2486         if initial_pr and 'web' not in original_clients:
2487             initial_pr['streamingData'] = None
2488             yield initial_pr
2489
2490     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2491         itags, stream_ids = [], []
2492         itag_qualities, res_qualities = {}, {}
2493         q = qualities([
2494             # Normally tiny is the smallest video-only formats. But
2495             # audio-only formats with unknown quality may get tagged as tiny
2496             'tiny',
2497             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2498             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2499         ])
2500         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2501
2502         for fmt in streaming_formats:
2503             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2504                 continue
2505
2506             itag = str_or_none(fmt.get('itag'))
2507             audio_track = fmt.get('audioTrack') or {}
2508             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2509             if stream_id in stream_ids:
2510                 continue
2511
2512             quality = fmt.get('quality')
2513             height = int_or_none(fmt.get('height'))
2514             if quality == 'tiny' or not quality:
2515                 quality = fmt.get('audioQuality', '').lower() or quality
2516             # The 3gp format (17) in android client has a quality of "small",
2517             # but is actually worse than other formats
2518             if itag == '17':
2519                 quality = 'tiny'
2520             if quality:
2521                 if itag:
2522                     itag_qualities[itag] = quality
2523                 if height:
2524                     res_qualities[height] = quality
2525             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2526             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2527             # number of fragment that would subsequently requested with (`&sq=N`)
2528             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2529                 continue
2530
2531             fmt_url = fmt.get('url')
2532             if not fmt_url:
2533                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2534                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2535                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2536                 if not (sc and fmt_url and encrypted_sig):
2537                     continue
2538                 if not player_url:
2539                     continue
2540                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2541                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2542                 fmt_url += '&' + sp + '=' + signature
2543
2544             if itag:
2545                 itags.append(itag)
2546                 stream_ids.append(stream_id)
2547
2548             tbr = float_or_none(
2549                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2550             dct = {
2551                 'asr': int_or_none(fmt.get('audioSampleRate')),
2552                 'filesize': int_or_none(fmt.get('contentLength')),
2553                 'format_id': itag,
2554                 'format_note': ', '.join(filter(None, (
2555                     audio_track.get('displayName'),
2556                     fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
2557                 'fps': int_or_none(fmt.get('fps')),
2558                 'height': height,
2559                 'quality': q(quality),
2560                 'tbr': tbr,
2561                 'url': fmt_url,
2562                 'width': int_or_none(fmt.get('width')),
2563                 'language': audio_track.get('id', '').split('.')[0],
2564             }
2565             mime_mobj = re.match(
2566                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2567             if mime_mobj:
2568                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2569                 dct.update(parse_codecs(mime_mobj.group(2)))
2570             no_audio = dct.get('acodec') == 'none'
2571             no_video = dct.get('vcodec') == 'none'
2572             if no_audio:
2573                 dct['vbr'] = tbr
2574             if no_video:
2575                 dct['abr'] = tbr
2576             if no_audio or no_video:
2577                 dct['downloader_options'] = {
2578                     # Youtube throttles chunks >~10M
2579                     'http_chunk_size': 10485760,
2580                 }
2581                 if dct.get('ext'):
2582                     dct['container'] = dct['ext'] + '_dash'
2583             yield dct
2584
2585         skip_manifests = self._configuration_arg('skip')
2586         get_dash = not is_live and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2587         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2588
2589         def guess_quality(f):
2590             for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
2591                 if val in qdict:
2592                     return q(qdict[val])
2593             return -1
2594
2595         for sd in streaming_data:
2596             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2597             if hls_manifest_url:
2598                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2599                     itag = self._search_regex(
2600                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2601                     if itag in itags:
2602                         continue
2603                     if itag:
2604                         f['format_id'] = itag
2605                         itags.append(itag)
2606                     f['quality'] = guess_quality(f)
2607                     yield f
2608
2609             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2610             if dash_manifest_url:
2611                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2612                     itag = f['format_id']
2613                     if itag in itags:
2614                         continue
2615                     if itag:
2616                         itags.append(itag)
2617                     f['quality'] = guess_quality(f)
2618                     filesize = int_or_none(self._search_regex(
2619                         r'/clen/(\d+)', f.get('fragment_base_url')
2620                         or f['url'], 'file size', default=None))
2621                     if filesize:
2622                         f['filesize'] = filesize
2623                     yield f
2624
2625     def _real_extract(self, url):
2626         url, smuggled_data = unsmuggle_url(url, {})
2627         video_id = self._match_id(url)
2628
2629         base_url = self.http_scheme() + '//www.youtube.com/'
2630         webpage_url = base_url + 'watch?v=' + video_id
2631         webpage = self._download_webpage(
2632             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2633
2634         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2635         player_url = self._extract_player_url(master_ytcfg, webpage)
2636         identity_token = self._extract_identity_token(webpage, video_id)
2637
2638         player_responses = list(self._extract_player_responses(
2639             self._get_requested_clients(url, smuggled_data),
2640             video_id, webpage, master_ytcfg, player_url, identity_token))
2641
2642         get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
2643
2644         playability_statuses = traverse_obj(
2645             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2646
2647         trailer_video_id = get_first(
2648             playability_statuses,
2649             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2650             expected_type=str)
2651         if trailer_video_id:
2652             return self.url_result(
2653                 trailer_video_id, self.ie_key(), trailer_video_id)
2654
2655         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2656                        if webpage else (lambda x: None))
2657
2658         video_details = traverse_obj(
2659             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2660         microformats = traverse_obj(
2661             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2662             expected_type=dict, default=[])
2663         video_title = (
2664             get_first(video_details, 'title')
2665             or self._get_text(microformats, (..., 'title'))
2666             or search_meta(['og:title', 'twitter:title', 'title']))
2667         video_description = get_first(video_details, 'shortDescription')
2668
2669         if not smuggled_data.get('force_singlefeed', False):
2670             if not self.get_param('noplaylist'):
2671                 multifeed_metadata_list = get_first(
2672                     player_responses,
2673                     ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2674                     expected_type=str)
2675                 if multifeed_metadata_list:
2676                     entries = []
2677                     feed_ids = []
2678                     for feed in multifeed_metadata_list.split(','):
2679                         # Unquote should take place before split on comma (,) since textual
2680                         # fields may contain comma as well (see
2681                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2682                         feed_data = compat_parse_qs(
2683                             compat_urllib_parse_unquote_plus(feed))
2684
2685                         def feed_entry(name):
2686                             return try_get(
2687                                 feed_data, lambda x: x[name][0], compat_str)
2688
2689                         feed_id = feed_entry('id')
2690                         if not feed_id:
2691                             continue
2692                         feed_title = feed_entry('title')
2693                         title = video_title
2694                         if feed_title:
2695                             title += ' (%s)' % feed_title
2696                         entries.append({
2697                             '_type': 'url_transparent',
2698                             'ie_key': 'Youtube',
2699                             'url': smuggle_url(
2700                                 '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2701                                 {'force_singlefeed': True}),
2702                             'title': title,
2703                         })
2704                         feed_ids.append(feed_id)
2705                     self.to_screen(
2706                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2707                         % (', '.join(feed_ids), video_id))
2708                     return self.playlist_result(
2709                         entries, video_id, video_title, video_description)
2710             else:
2711                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2712
2713         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2714         is_live = get_first(video_details, 'isLive')
2715         if is_live is None:
2716             is_live = get_first(live_broadcast_details, 'isLiveNow')
2717
2718         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2719         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2720
2721         if not formats:
2722             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2723                 self.raise_no_formats(
2724                     'This video is DRM protected.', expected=True)
2725             pemr = get_first(
2726                 playability_statuses,
2727                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2728             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2729             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2730             if subreason:
2731                 if subreason == 'The uploader has not made this video available in your country.':
2732                     countries = get_first(microformats, 'availableCountries')
2733                     if not countries:
2734                         regions_allowed = search_meta('regionsAllowed')
2735                         countries = regions_allowed.split(',') if regions_allowed else None
2736                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2737                 reason += f'. {subreason}'
2738             if reason:
2739                 self.raise_no_formats(reason, expected=True)
2740
2741         for f in formats:
2742             if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']:  # throttled
2743                 f['source_preference'] = -10
2744                 note = f.get('format_note')
2745                 f['format_note'] = f'{note} (throttled)' if note else '(throttled)'
2746
2747         # Source is given priority since formats that throttle are given lower source_preference
2748         # When throttling issue is fully fixed, remove this
2749         self._sort_formats(formats, ('quality', 'height', 'fps', 'source'))
2750
2751         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2752         if not keywords and webpage:
2753             keywords = [
2754                 unescapeHTML(m.group('content'))
2755                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2756         for keyword in keywords:
2757             if keyword.startswith('yt:stretch='):
2758                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2759                 if mobj:
2760                     # NB: float is intentional for forcing float division
2761                     w, h = (float(v) for v in mobj.groups())
2762                     if w > 0 and h > 0:
2763                         ratio = w / h
2764                         for f in formats:
2765                             if f.get('vcodec') != 'none':
2766                                 f['stretched_ratio'] = ratio
2767                         break
2768
2769         thumbnails = []
2770         thumbnail_dicts = traverse_obj(
2771             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2772             expected_type=dict, default=[])
2773         for thumbnail in thumbnail_dicts:
2774             thumbnail_url = thumbnail.get('url')
2775             if not thumbnail_url:
2776                 continue
2777             # Sometimes youtube gives a wrong thumbnail URL. See:
2778             # https://github.com/yt-dlp/yt-dlp/issues/233
2779             # https://github.com/ytdl-org/youtube-dl/issues/28023
2780             if 'maxresdefault' in thumbnail_url:
2781                 thumbnail_url = thumbnail_url.split('?')[0]
2782             thumbnails.append({
2783                 'url': thumbnail_url,
2784                 'height': int_or_none(thumbnail.get('height')),
2785                 'width': int_or_none(thumbnail.get('width')),
2786             })
2787         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2788         if thumbnail_url:
2789             thumbnails.append({
2790                 'url': thumbnail_url,
2791             })
2792         # The best resolution thumbnails sometimes does not appear in the webpage
2793         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2794         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2795         hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
2796         # TODO: Test them also? - For some videos, even these don't exist
2797         guaranteed_thumbnail_names = [
2798             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2799             'mqdefault', 'mq1', 'mq2', 'mq3',
2800             'default', '1', '2', '3'
2801         ]
2802         thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
2803         n_thumbnail_names = len(thumbnail_names)
2804
2805         thumbnails.extend({
2806             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2807                 video_id=video_id, name=name, ext=ext,
2808                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2809             '_test_url': name in hq_thumbnail_names,
2810         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2811         for thumb in thumbnails:
2812             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2813             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2814         self._remove_duplicate_formats(thumbnails)
2815
2816         category = get_first(microformats, 'category') or search_meta('genre')
2817         channel_id = str_or_none(
2818             get_first(video_details, 'channelId')
2819             or get_first(microformats, 'externalChannelId')
2820             or search_meta('channelId'))
2821         duration = int_or_none(
2822             get_first(video_details, 'lengthSeconds')
2823             or get_first(microformats, 'lengthSeconds')
2824             or parse_duration(search_meta('duration'))) or None
2825         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2826
2827         live_content = get_first(video_details, 'isLiveContent')
2828         is_upcoming = get_first(video_details, 'isUpcoming')
2829         if is_live is None:
2830             if is_upcoming or live_content is False:
2831                 is_live = False
2832         if is_upcoming is None and (live_content or is_live):
2833             is_upcoming = False
2834         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2835         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2836         if not duration and live_endtime and live_starttime:
2837             duration = live_endtime - live_starttime
2838
2839         info = {
2840             'id': video_id,
2841             'title': self._live_title(video_title) if is_live else video_title,
2842             'formats': formats,
2843             'thumbnails': thumbnails,
2844             'description': video_description,
2845             'upload_date': unified_strdate(
2846                 get_first(microformats, 'uploadDate')
2847                 or search_meta('uploadDate')),
2848             'uploader': get_first(video_details, 'author'),
2849             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2850             'uploader_url': owner_profile_url,
2851             'channel_id': channel_id,
2852             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2853             'duration': duration,
2854             'view_count': int_or_none(
2855                 get_first((video_details, microformats), (..., 'viewCount'))
2856                 or search_meta('interactionCount')),
2857             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2858             'age_limit': 18 if (
2859                 get_first(microformats, 'isFamilySafe') is False
2860                 or search_meta('isFamilyFriendly') == 'false'
2861                 or search_meta('og:restrictions:age') == '18+') else 0,
2862             'webpage_url': webpage_url,
2863             'categories': [category] if category else None,
2864             'tags': keywords,
2865             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2866             'is_live': is_live,
2867             'was_live': (False if is_live or is_upcoming or live_content is False
2868                          else None if is_live is None or is_upcoming is None
2869                          else live_content),
2870             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2871             'release_timestamp': live_starttime,
2872         }
2873
2874         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2875         # Converted into dicts to remove duplicates
2876         captions = {
2877             sub.get('baseUrl'): sub
2878             for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2879         translation_languages = {
2880             lang.get('languageCode'): lang.get('languageName')
2881             for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2882         subtitles = {}
2883         if pctr:
2884             def process_language(container, base_url, lang_code, sub_name, query):
2885                 lang_subs = container.setdefault(lang_code, [])
2886                 for fmt in self._SUBTITLE_FORMATS:
2887                     query.update({
2888                         'fmt': fmt,
2889                     })
2890                     lang_subs.append({
2891                         'ext': fmt,
2892                         'url': update_url_query(base_url, query),
2893                         'name': sub_name,
2894                     })
2895
2896             for base_url, caption_track in captions.items():
2897                 if not base_url:
2898                     continue
2899                 if caption_track.get('kind') != 'asr':
2900                     lang_code = (
2901                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2902                         or caption_track.get('languageCode'))
2903                     if not lang_code:
2904                         continue
2905                     process_language(
2906                         subtitles, base_url, lang_code,
2907                         traverse_obj(caption_track, ('name', 'simpleText')),
2908                         {})
2909                     continue
2910                 automatic_captions = {}
2911                 for trans_code, trans_name in translation_languages.items():
2912                     if not trans_code:
2913                         continue
2914                     process_language(
2915                         automatic_captions, base_url, trans_code,
2916                         self._get_text(trans_name, max_runs=1),
2917                         {'tlang': trans_code})
2918                 info['automatic_captions'] = automatic_captions
2919         info['subtitles'] = subtitles
2920
2921         parsed_url = compat_urllib_parse_urlparse(url)
2922         for component in [parsed_url.fragment, parsed_url.query]:
2923             query = compat_parse_qs(component)
2924             for k, v in query.items():
2925                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2926                     d_k += '_time'
2927                     if d_k not in info and k in s_ks:
2928                         info[d_k] = parse_duration(query[k][0])
2929
2930         # Youtube Music Auto-generated description
2931         if video_description:
2932             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2933             if mobj:
2934                 release_year = mobj.group('release_year')
2935                 release_date = mobj.group('release_date')
2936                 if release_date:
2937                     release_date = release_date.replace('-', '')
2938                     if not release_year:
2939                         release_year = release_date[:4]
2940                 info.update({
2941                     'album': mobj.group('album'.strip()),
2942                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2943                     'track': mobj.group('track').strip(),
2944                     'release_date': release_date,
2945                     'release_year': int_or_none(release_year),
2946                 })
2947
2948         initial_data = None
2949         if webpage:
2950             initial_data = self._extract_yt_initial_variable(
2951                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2952                 'yt initial data')
2953         if not initial_data:
2954             headers = self.generate_api_headers(
2955                 master_ytcfg, identity_token, self._extract_account_syncid(master_ytcfg),
2956                 session_index=self._extract_session_index(master_ytcfg))
2957
2958             initial_data = self._extract_response(
2959                 item_id=video_id, ep='next', fatal=False,
2960                 ytcfg=master_ytcfg, headers=headers, query={'videoId': video_id},
2961                 note='Downloading initial data API JSON')
2962
2963         try:
2964             # This will error if there is no livechat
2965             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2966             info['subtitles']['live_chat'] = [{
2967                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2968                 'video_id': video_id,
2969                 'ext': 'json',
2970                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2971             }]
2972         except (KeyError, IndexError, TypeError):
2973             pass
2974
2975         if initial_data:
2976             info['chapters'] = (
2977                 self._extract_chapters_from_json(initial_data, duration)
2978                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
2979                 or None)
2980
2981             contents = try_get(
2982                 initial_data,
2983                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2984                 list) or []
2985             for content in contents:
2986                 vpir = content.get('videoPrimaryInfoRenderer')
2987                 if vpir:
2988                     stl = vpir.get('superTitleLink')
2989                     if stl:
2990                         stl = self._get_text(stl)
2991                         if try_get(
2992                                 vpir,
2993                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2994                             info['location'] = stl
2995                         else:
2996                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2997                             if mobj:
2998                                 info.update({
2999                                     'series': mobj.group(1),
3000                                     'season_number': int(mobj.group(2)),
3001                                     'episode_number': int(mobj.group(3)),
3002                                 })
3003                     for tlb in (try_get(
3004                             vpir,
3005                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3006                             list) or []):
3007                         tbr = tlb.get('toggleButtonRenderer') or {}
3008                         for getter, regex in [(
3009                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3010                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3011                                     lambda x: x['accessibility'],
3012                                     lambda x: x['accessibilityData']['accessibilityData'],
3013                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3014                             label = (try_get(tbr, getter, dict) or {}).get('label')
3015                             if label:
3016                                 mobj = re.match(regex, label)
3017                                 if mobj:
3018                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3019                                     break
3020                     sbr_tooltip = try_get(
3021                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3022                     if sbr_tooltip:
3023                         like_count, dislike_count = sbr_tooltip.split(' / ')
3024                         info.update({
3025                             'like_count': str_to_int(like_count),
3026                             'dislike_count': str_to_int(dislike_count),
3027                         })
3028                 vsir = content.get('videoSecondaryInfoRenderer')
3029                 if vsir:
3030                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3031                     rows = try_get(
3032                         vsir,
3033                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3034                         list) or []
3035                     multiple_songs = False
3036                     for row in rows:
3037                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3038                             multiple_songs = True
3039                             break
3040                     for row in rows:
3041                         mrr = row.get('metadataRowRenderer') or {}
3042                         mrr_title = mrr.get('title')
3043                         if not mrr_title:
3044                             continue
3045                         mrr_title = self._get_text(mrr, 'title')
3046                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3047                         if mrr_title == 'License':
3048                             info['license'] = mrr_contents_text
3049                         elif not multiple_songs:
3050                             if mrr_title == 'Album':
3051                                 info['album'] = mrr_contents_text
3052                             elif mrr_title == 'Artist':
3053                                 info['artist'] = mrr_contents_text
3054                             elif mrr_title == 'Song':
3055                                 info['track'] = mrr_contents_text
3056
3057         fallbacks = {
3058             'channel': 'uploader',
3059             'channel_id': 'uploader_id',
3060             'channel_url': 'uploader_url',
3061         }
3062         for to, frm in fallbacks.items():
3063             if not info.get(to):
3064                 info[to] = info.get(frm)
3065
3066         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3067             v = info.get(s_k)
3068             if v:
3069                 info[d_k] = v
3070
3071         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3072         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3073         is_membersonly = None
3074         is_premium = None
3075         if initial_data and is_private is not None:
3076             is_membersonly = False
3077             is_premium = False
3078             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3079             badge_labels = set()
3080             for content in contents:
3081                 if not isinstance(content, dict):
3082                     continue
3083                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3084             for badge_label in badge_labels:
3085                 if badge_label.lower() == 'members only':
3086                     is_membersonly = True
3087                 elif badge_label.lower() == 'premium':
3088                     is_premium = True
3089                 elif badge_label.lower() == 'unlisted':
3090                     is_unlisted = True
3091
3092         info['availability'] = self._availability(
3093             is_private=is_private,
3094             needs_premium=is_premium,
3095             needs_subscription=is_membersonly,
3096             needs_auth=info['age_limit'] >= 18,
3097             is_unlisted=None if is_private is None else is_unlisted)
3098
3099         # get xsrf for annotations or comments
3100         get_annotations = self.get_param('writeannotations', False)
3101         get_comments = self.get_param('getcomments', False)
3102         if get_annotations or get_comments:
3103             xsrf_token = None
3104             if master_ytcfg:
3105                 xsrf_token = try_get(master_ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
3106             if not xsrf_token:
3107                 xsrf_token = self._search_regex(
3108                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3109                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
3110
3111         # annotations
3112         if get_annotations:
3113             invideo_url = get_first(
3114                 player_responses,
3115                 ('annotations', 0, 'playerAnnotationsUrlsRenderer', 'invideoUrl'),
3116                 expected_type=str)
3117             if xsrf_token and invideo_url:
3118                 xsrf_field_name = None
3119                 if master_ytcfg:
3120                     xsrf_field_name = try_get(master_ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3121                 if not xsrf_field_name:
3122                     xsrf_field_name = self._search_regex(
3123                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3124                         webpage, 'xsrf field name',
3125                         group='xsrf_field_name', default='session_token')
3126                 info['annotations'] = self._download_webpage(
3127                     self._proto_relative_url(invideo_url),
3128                     video_id, note='Downloading annotations',
3129                     errnote='Unable to download video annotations', fatal=False,
3130                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3131
3132         if get_comments:
3133             info['__post_extractor'] = lambda: self._extract_comments(master_ytcfg, video_id, contents, webpage)
3134
3135         self.mark_watched(video_id, player_responses)
3136
3137         return info
3138
3139
3140 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3141     IE_DESC = 'YouTube.com tab'
3142     _VALID_URL = r'''(?x)
3143                     https?://
3144                         (?:\w+\.)?
3145                         (?:
3146                             youtube(?:kids)?\.com|
3147                             invidio\.us
3148                         )/
3149                         (?:
3150                             (?P<channel_type>channel|c|user|browse)/|
3151                             (?P<not_channel>
3152                                 feed/|hashtag/|
3153                                 (?:playlist|watch)\?.*?\blist=
3154                             )|
3155                             (?!(?:%s)\b)  # Direct URLs
3156                         )
3157                         (?P<id>[^/?\#&]+)
3158                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3159     IE_NAME = 'youtube:tab'
3160
3161     _TESTS = [{
3162         'note': 'playlists, multipage',
3163         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3164         'playlist_mincount': 94,
3165         'info_dict': {
3166             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3167             'title': 'Игорь Клейнер - Playlists',
3168             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3169             'uploader': 'Игорь Клейнер',
3170             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3171         },
3172     }, {
3173         'note': 'playlists, multipage, different order',
3174         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3175         'playlist_mincount': 94,
3176         'info_dict': {
3177             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3178             'title': 'Игорь Клейнер - Playlists',
3179             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3180             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3181             'uploader': 'Игорь Клейнер',
3182         },
3183     }, {
3184         'note': 'playlists, series',
3185         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3186         'playlist_mincount': 5,
3187         'info_dict': {
3188             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3189             'title': '3Blue1Brown - Playlists',
3190             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3191             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3192             'uploader': '3Blue1Brown',
3193         },
3194     }, {
3195         'note': 'playlists, singlepage',
3196         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3197         'playlist_mincount': 4,
3198         'info_dict': {
3199             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3200             'title': 'ThirstForScience - Playlists',
3201             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3202             'uploader': 'ThirstForScience',
3203             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3204         }
3205     }, {
3206         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3207         'only_matching': True,
3208     }, {
3209         'note': 'basic, single video playlist',
3210         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3211         'info_dict': {
3212             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3213             'uploader': 'Sergey M.',
3214             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3215             'title': 'youtube-dl public playlist',
3216         },
3217         'playlist_count': 1,
3218     }, {
3219         'note': 'empty playlist',
3220         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3221         'info_dict': {
3222             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3223             'uploader': 'Sergey M.',
3224             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3225             'title': 'youtube-dl empty playlist',
3226         },
3227         'playlist_count': 0,
3228     }, {
3229         'note': 'Home tab',
3230         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3231         'info_dict': {
3232             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3233             'title': 'lex will - Home',
3234             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3235             'uploader': 'lex will',
3236             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3237         },
3238         'playlist_mincount': 2,
3239     }, {
3240         'note': 'Videos tab',
3241         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3242         'info_dict': {
3243             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3244             'title': 'lex will - Videos',
3245             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3246             'uploader': 'lex will',
3247             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3248         },
3249         'playlist_mincount': 975,
3250     }, {
3251         'note': 'Videos tab, sorted by popular',
3252         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3253         'info_dict': {
3254             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3255             'title': 'lex will - Videos',
3256             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3257             'uploader': 'lex will',
3258             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3259         },
3260         'playlist_mincount': 199,
3261     }, {
3262         'note': 'Playlists tab',
3263         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3264         'info_dict': {
3265             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3266             'title': 'lex will - Playlists',
3267             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3268             'uploader': 'lex will',
3269             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3270         },
3271         'playlist_mincount': 17,
3272     }, {
3273         'note': 'Community tab',
3274         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3275         'info_dict': {
3276             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3277             'title': 'lex will - Community',
3278             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3279             'uploader': 'lex will',
3280             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3281         },
3282         'playlist_mincount': 18,
3283     }, {
3284         'note': 'Channels tab',
3285         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3286         'info_dict': {
3287             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3288             'title': 'lex will - Channels',
3289             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3290             'uploader': 'lex will',
3291             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3292         },
3293         'playlist_mincount': 12,
3294     }, {
3295         'note': 'Search tab',
3296         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3297         'playlist_mincount': 40,
3298         'info_dict': {
3299             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3300             'title': '3Blue1Brown - Search - linear algebra',
3301             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3302             'uploader': '3Blue1Brown',
3303             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3304         },
3305     }, {
3306         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3307         'only_matching': True,
3308     }, {
3309         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3310         'only_matching': True,
3311     }, {
3312         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3313         'only_matching': True,
3314     }, {
3315         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3316         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3317         'info_dict': {
3318             'title': '29C3: Not my department',
3319             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3320             'uploader': 'Christiaan008',
3321             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3322             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3323         },
3324         'playlist_count': 96,
3325     }, {
3326         'note': 'Large playlist',
3327         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3328         'info_dict': {
3329             'title': 'Uploads from Cauchemar',
3330             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3331             'uploader': 'Cauchemar',
3332             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3333         },
3334         'playlist_mincount': 1123,
3335     }, {
3336         'note': 'even larger playlist, 8832 videos',
3337         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3338         'only_matching': True,
3339     }, {
3340         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3341         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3342         'info_dict': {
3343             'title': 'Uploads from Interstellar Movie',
3344             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3345             'uploader': 'Interstellar Movie',
3346             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3347         },
3348         'playlist_mincount': 21,
3349     }, {
3350         'note': 'Playlist with "show unavailable videos" button',
3351         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3352         'info_dict': {
3353             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3354             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3355             'uploader': 'Phim Siêu Nhân Nhật Bản',
3356             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3357         },
3358         'playlist_mincount': 200,
3359     }, {
3360         'note': 'Playlist with unavailable videos in page 7',
3361         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3362         'info_dict': {
3363             'title': 'Uploads from BlankTV',
3364             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3365             'uploader': 'BlankTV',
3366             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3367         },
3368         'playlist_mincount': 1000,
3369     }, {
3370         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3371         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3372         'info_dict': {
3373             'title': 'Data Analysis with Dr Mike Pound',
3374             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3375             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3376             'uploader': 'Computerphile',
3377             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3378         },
3379         'playlist_mincount': 11,
3380     }, {
3381         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3382         'only_matching': True,
3383     }, {
3384         'note': 'Playlist URL that does not actually serve a playlist',
3385         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3386         'info_dict': {
3387             'id': 'FqZTN594JQw',
3388             'ext': 'webm',
3389             'title': "Smiley's People 01 detective, Adventure Series, Action",
3390             'uploader': 'STREEM',
3391             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3392             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3393             'upload_date': '20150526',
3394             'license': 'Standard YouTube License',
3395             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3396             'categories': ['People & Blogs'],
3397             'tags': list,
3398             'view_count': int,
3399             'like_count': int,
3400             'dislike_count': int,
3401         },
3402         'params': {
3403             'skip_download': True,
3404         },
3405         'skip': 'This video is not available.',
3406         'add_ie': [YoutubeIE.ie_key()],
3407     }, {
3408         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3409         'only_matching': True,
3410     }, {
3411         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3412         'only_matching': True,
3413     }, {
3414         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3415         'info_dict': {
3416             'id': 'FMtPN8yp5LU',  # This will keep changing
3417             'ext': 'mp4',
3418             'title': compat_str,
3419             'uploader': 'Sky News',
3420             'uploader_id': 'skynews',
3421             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3422             'upload_date': r're:\d{8}',
3423             'description': compat_str,
3424             'categories': ['News & Politics'],
3425             'tags': list,
3426             'like_count': int,
3427             'dislike_count': int,
3428         },
3429         'params': {
3430             'skip_download': True,
3431         },
3432         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3433     }, {
3434         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3435         'info_dict': {
3436             'id': 'a48o2S1cPoo',
3437             'ext': 'mp4',
3438             'title': 'The Young Turks - Live Main Show',
3439             'uploader': 'The Young Turks',
3440             'uploader_id': 'TheYoungTurks',
3441             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3442             'upload_date': '20150715',
3443             'license': 'Standard YouTube License',
3444             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3445             'categories': ['News & Politics'],
3446             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3447             'like_count': int,
3448             'dislike_count': int,
3449         },
3450         'params': {
3451             'skip_download': True,
3452         },
3453         'only_matching': True,
3454     }, {
3455         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3456         'only_matching': True,
3457     }, {
3458         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3459         'only_matching': True,
3460     }, {
3461         'note': 'A channel that is not live. Should raise error',
3462         'url': 'https://www.youtube.com/user/numberphile/live',
3463         'only_matching': True,
3464     }, {
3465         'url': 'https://www.youtube.com/feed/trending',
3466         'only_matching': True,
3467     }, {
3468         'url': 'https://www.youtube.com/feed/library',
3469         'only_matching': True,
3470     }, {
3471         'url': 'https://www.youtube.com/feed/history',
3472         'only_matching': True,
3473     }, {
3474         'url': 'https://www.youtube.com/feed/subscriptions',
3475         'only_matching': True,
3476     }, {
3477         'url': 'https://www.youtube.com/feed/watch_later',
3478         'only_matching': True,
3479     }, {
3480         'note': 'Recommended - redirects to home page',
3481         'url': 'https://www.youtube.com/feed/recommended',
3482         'only_matching': True,
3483     }, {
3484         'note': 'inline playlist with not always working continuations',
3485         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3486         'only_matching': True,
3487     }, {
3488         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3489         'only_matching': True,
3490     }, {
3491         'url': 'https://www.youtube.com/course',
3492         'only_matching': True,
3493     }, {
3494         'url': 'https://www.youtube.com/zsecurity',
3495         'only_matching': True,
3496     }, {
3497         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3498         'only_matching': True,
3499     }, {
3500         'url': 'https://www.youtube.com/TheYoungTurks/live',
3501         'only_matching': True,
3502     }, {
3503         'url': 'https://www.youtube.com/hashtag/cctv9',
3504         'info_dict': {
3505             'id': 'cctv9',
3506             'title': '#cctv9',
3507         },
3508         'playlist_mincount': 350,
3509     }, {
3510         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3511         'only_matching': True,
3512     }, {
3513         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3514         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3515         'only_matching': True
3516     }, {
3517         'note': '/browse/ should redirect to /channel/',
3518         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3519         'only_matching': True
3520     }, {
3521         'note': 'VLPL, should redirect to playlist?list=PL...',
3522         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3523         'info_dict': {
3524             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3525             'uploader': 'NoCopyrightSounds',
3526             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3527             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3528             'title': 'NCS Releases',
3529         },
3530         'playlist_mincount': 166,
3531     }, {
3532         'note': 'Topic, should redirect to playlist?list=UU...',
3533         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3534         'info_dict': {
3535             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3536             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3537             'title': 'Uploads from Royalty Free Music - Topic',
3538             'uploader': 'Royalty Free Music - Topic',
3539         },
3540         'expected_warnings': [
3541             'A channel/user page was given',
3542             'The URL does not have a videos tab',
3543         ],
3544         'playlist_mincount': 101,
3545     }, {
3546         'note': 'Topic without a UU playlist',
3547         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3548         'info_dict': {
3549             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3550             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3551         },
3552         'expected_warnings': [
3553             'A channel/user page was given',
3554             'The URL does not have a videos tab',
3555             'Falling back to channel URL',
3556         ],
3557         'playlist_mincount': 9,
3558     }, {
3559         'note': 'Youtube music Album',
3560         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3561         'info_dict': {
3562             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3563             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3564         },
3565         'playlist_count': 50,
3566     }, {
3567         'note': 'unlisted single video playlist',
3568         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3569         'info_dict': {
3570             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3571             'uploader': 'colethedj',
3572             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3573             'title': 'yt-dlp unlisted playlist test',
3574             'availability': 'unlisted'
3575         },
3576         'playlist_count': 1,
3577     }]
3578
3579     @classmethod
3580     def suitable(cls, url):
3581         return False if YoutubeIE.suitable(url) else super(
3582             YoutubeTabIE, cls).suitable(url)
3583
3584     def _extract_channel_id(self, webpage):
3585         channel_id = self._html_search_meta(
3586             'channelId', webpage, 'channel id', default=None)
3587         if channel_id:
3588             return channel_id
3589         channel_url = self._html_search_meta(
3590             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3591              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3592              'twitter:app:url:googleplay'), webpage, 'channel url')
3593         return self._search_regex(
3594             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3595             channel_url, 'channel id')
3596
3597     @staticmethod
3598     def _extract_basic_item_renderer(item):
3599         # Modified from _extract_grid_item_renderer
3600         known_basic_renderers = (
3601             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3602         )
3603         for key, renderer in item.items():
3604             if not isinstance(renderer, dict):
3605                 continue
3606             elif key in known_basic_renderers:
3607                 return renderer
3608             elif key.startswith('grid') and key.endswith('Renderer'):
3609                 return renderer
3610
3611     def _grid_entries(self, grid_renderer):
3612         for item in grid_renderer['items']:
3613             if not isinstance(item, dict):
3614                 continue
3615             renderer = self._extract_basic_item_renderer(item)
3616             if not isinstance(renderer, dict):
3617                 continue
3618             title = self._get_text(renderer, 'title')
3619
3620             # playlist
3621             playlist_id = renderer.get('playlistId')
3622             if playlist_id:
3623                 yield self.url_result(
3624                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3625                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3626                     video_title=title)
3627                 continue
3628             # video
3629             video_id = renderer.get('videoId')
3630             if video_id:
3631                 yield self._extract_video(renderer)
3632                 continue
3633             # channel
3634             channel_id = renderer.get('channelId')
3635             if channel_id:
3636                 yield self.url_result(
3637                     'https://www.youtube.com/channel/%s' % channel_id,
3638                     ie=YoutubeTabIE.ie_key(), video_title=title)
3639                 continue
3640             # generic endpoint URL support
3641             ep_url = urljoin('https://www.youtube.com/', try_get(
3642                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3643                 compat_str))
3644             if ep_url:
3645                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3646                     if ie.suitable(ep_url):
3647                         yield self.url_result(
3648                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3649                         break
3650
3651     def _shelf_entries_from_content(self, shelf_renderer):
3652         content = shelf_renderer.get('content')
3653         if not isinstance(content, dict):
3654             return
3655         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3656         if renderer:
3657             # TODO: add support for nested playlists so each shelf is processed
3658             # as separate playlist
3659             # TODO: this includes only first N items
3660             for entry in self._grid_entries(renderer):
3661                 yield entry
3662         renderer = content.get('horizontalListRenderer')
3663         if renderer:
3664             # TODO
3665             pass
3666
3667     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3668         ep = try_get(
3669             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3670             compat_str)
3671         shelf_url = urljoin('https://www.youtube.com', ep)
3672         if shelf_url:
3673             # Skipping links to another channels, note that checking for
3674             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3675             # will not work
3676             if skip_channels and '/channels?' in shelf_url:
3677                 return
3678             title = self._get_text(shelf_renderer, 'title')
3679             yield self.url_result(shelf_url, video_title=title)
3680         # Shelf may not contain shelf URL, fallback to extraction from content
3681         for entry in self._shelf_entries_from_content(shelf_renderer):
3682             yield entry
3683
3684     def _playlist_entries(self, video_list_renderer):
3685         for content in video_list_renderer['contents']:
3686             if not isinstance(content, dict):
3687                 continue
3688             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3689             if not isinstance(renderer, dict):
3690                 continue
3691             video_id = renderer.get('videoId')
3692             if not video_id:
3693                 continue
3694             yield self._extract_video(renderer)
3695
3696     def _rich_entries(self, rich_grid_renderer):
3697         renderer = try_get(
3698             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3699         video_id = renderer.get('videoId')
3700         if not video_id:
3701             return
3702         yield self._extract_video(renderer)
3703
3704     def _video_entry(self, video_renderer):
3705         video_id = video_renderer.get('videoId')
3706         if video_id:
3707             return self._extract_video(video_renderer)
3708
3709     def _post_thread_entries(self, post_thread_renderer):
3710         post_renderer = try_get(
3711             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3712         if not post_renderer:
3713             return
3714         # video attachment
3715         video_renderer = try_get(
3716             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3717         video_id = video_renderer.get('videoId')
3718         if video_id:
3719             entry = self._extract_video(video_renderer)
3720             if entry:
3721                 yield entry
3722         # playlist attachment
3723         playlist_id = try_get(
3724             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3725         if playlist_id:
3726             yield self.url_result(
3727                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3728                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3729         # inline video links
3730         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3731         for run in runs:
3732             if not isinstance(run, dict):
3733                 continue
3734             ep_url = try_get(
3735                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3736             if not ep_url:
3737                 continue
3738             if not YoutubeIE.suitable(ep_url):
3739                 continue
3740             ep_video_id = YoutubeIE._match_id(ep_url)
3741             if video_id == ep_video_id:
3742                 continue
3743             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3744
3745     def _post_thread_continuation_entries(self, post_thread_continuation):
3746         contents = post_thread_continuation.get('contents')
3747         if not isinstance(contents, list):
3748             return
3749         for content in contents:
3750             renderer = content.get('backstagePostThreadRenderer')
3751             if not isinstance(renderer, dict):
3752                 continue
3753             for entry in self._post_thread_entries(renderer):
3754                 yield entry
3755
3756     r''' # unused
3757     def _rich_grid_entries(self, contents):
3758         for content in contents:
3759             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3760             if video_renderer:
3761                 entry = self._video_entry(video_renderer)
3762                 if entry:
3763                     yield entry
3764     '''
3765     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3766
3767         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3768             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3769             for content in contents:
3770                 if not isinstance(content, dict):
3771                     continue
3772                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3773                 if not is_renderer:
3774                     renderer = content.get('richItemRenderer')
3775                     if renderer:
3776                         for entry in self._rich_entries(renderer):
3777                             yield entry
3778                         continuation_list[0] = self._extract_continuation(parent_renderer)
3779                     continue
3780                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3781                 for isr_content in isr_contents:
3782                     if not isinstance(isr_content, dict):
3783                         continue
3784
3785                     known_renderers = {
3786                         'playlistVideoListRenderer': self._playlist_entries,
3787                         'gridRenderer': self._grid_entries,
3788                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3789                         'backstagePostThreadRenderer': self._post_thread_entries,
3790                         'videoRenderer': lambda x: [self._video_entry(x)],
3791                     }
3792                     for key, renderer in isr_content.items():
3793                         if key not in known_renderers:
3794                             continue
3795                         for entry in known_renderers[key](renderer):
3796                             if entry:
3797                                 yield entry
3798                         continuation_list[0] = self._extract_continuation(renderer)
3799                         break
3800
3801                 if not continuation_list[0]:
3802                     continuation_list[0] = self._extract_continuation(is_renderer)
3803
3804             if not continuation_list[0]:
3805                 continuation_list[0] = self._extract_continuation(parent_renderer)
3806
3807         continuation_list = [None]  # Python 2 doesnot support nonlocal
3808         tab_content = try_get(tab, lambda x: x['content'], dict)
3809         if not tab_content:
3810             return
3811         parent_renderer = (
3812             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3813             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3814         for entry in extract_entries(parent_renderer):
3815             yield entry
3816         continuation = continuation_list[0]
3817         visitor_data = None
3818
3819         for page_num in itertools.count(1):
3820             if not continuation:
3821                 break
3822             headers = self.generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3823             response = self._extract_response(
3824                 item_id='%s page %s' % (item_id, page_num),
3825                 query=continuation, headers=headers, ytcfg=ytcfg,
3826                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3827
3828             if not response:
3829                 break
3830             visitor_data = try_get(
3831                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3832
3833             known_continuation_renderers = {
3834                 'playlistVideoListContinuation': self._playlist_entries,
3835                 'gridContinuation': self._grid_entries,
3836                 'itemSectionContinuation': self._post_thread_continuation_entries,
3837                 'sectionListContinuation': extract_entries,  # for feeds
3838             }
3839             continuation_contents = try_get(
3840                 response, lambda x: x['continuationContents'], dict) or {}
3841             continuation_renderer = None
3842             for key, value in continuation_contents.items():
3843                 if key not in known_continuation_renderers:
3844                     continue
3845                 continuation_renderer = value
3846                 continuation_list = [None]
3847                 for entry in known_continuation_renderers[key](continuation_renderer):
3848                     yield entry
3849                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3850                 break
3851             if continuation_renderer:
3852                 continue
3853
3854             known_renderers = {
3855                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3856                 'gridVideoRenderer': (self._grid_entries, 'items'),
3857                 'gridChannelRenderer': (self._grid_entries, 'items'),
3858                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3859                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3860                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3861                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3862             }
3863             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3864             continuation_items = try_get(
3865                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3866             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3867             video_items_renderer = None
3868             for key, value in continuation_item.items():
3869                 if key not in known_renderers:
3870                     continue
3871                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3872                 continuation_list = [None]
3873                 for entry in known_renderers[key][0](video_items_renderer):
3874                     yield entry
3875                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3876                 break
3877             if video_items_renderer:
3878                 continue
3879             break
3880
3881     @staticmethod
3882     def _extract_selected_tab(tabs):
3883         for tab in tabs:
3884             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3885             if renderer.get('selected') is True:
3886                 return renderer
3887         else:
3888             raise ExtractorError('Unable to find selected tab')
3889
3890     @classmethod
3891     def _extract_uploader(cls, data):
3892         uploader = {}
3893         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3894         owner = try_get(
3895             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3896         if owner:
3897             uploader['uploader'] = owner.get('text')
3898             uploader['uploader_id'] = try_get(
3899                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3900             uploader['uploader_url'] = urljoin(
3901                 'https://www.youtube.com/',
3902                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3903         return {k: v for k, v in uploader.items() if v is not None}
3904
3905     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3906         playlist_id = title = description = channel_url = channel_name = channel_id = None
3907         thumbnails_list = tags = []
3908
3909         selected_tab = self._extract_selected_tab(tabs)
3910         renderer = try_get(
3911             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3912         if renderer:
3913             channel_name = renderer.get('title')
3914             channel_url = renderer.get('channelUrl')
3915             channel_id = renderer.get('externalId')
3916         else:
3917             renderer = try_get(
3918                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3919
3920         if renderer:
3921             title = renderer.get('title')
3922             description = renderer.get('description', '')
3923             playlist_id = channel_id
3924             tags = renderer.get('keywords', '').split()
3925             thumbnails_list = (
3926                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3927                 or try_get(
3928                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3929                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3930                     list)
3931                 or [])
3932
3933         thumbnails = []
3934         for t in thumbnails_list:
3935             if not isinstance(t, dict):
3936                 continue
3937             thumbnail_url = url_or_none(t.get('url'))
3938             if not thumbnail_url:
3939                 continue
3940             thumbnails.append({
3941                 'url': thumbnail_url,
3942                 'width': int_or_none(t.get('width')),
3943                 'height': int_or_none(t.get('height')),
3944             })
3945         if playlist_id is None:
3946             playlist_id = item_id
3947         if title is None:
3948             title = (
3949                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3950                 or playlist_id)
3951         title += format_field(selected_tab, 'title', ' - %s')
3952         title += format_field(selected_tab, 'expandedText', ' - %s')
3953         metadata = {
3954             'playlist_id': playlist_id,
3955             'playlist_title': title,
3956             'playlist_description': description,
3957             'uploader': channel_name,
3958             'uploader_id': channel_id,
3959             'uploader_url': channel_url,
3960             'thumbnails': thumbnails,
3961             'tags': tags,
3962         }
3963         availability = self._extract_availability(data)
3964         if availability:
3965             metadata['availability'] = availability
3966         if not channel_id:
3967             metadata.update(self._extract_uploader(data))
3968         metadata.update({
3969             'channel': metadata['uploader'],
3970             'channel_id': metadata['uploader_id'],
3971             'channel_url': metadata['uploader_url']})
3972         ytcfg = self.extract_ytcfg(item_id, webpage)
3973         return self.playlist_result(
3974             self._entries(
3975                 selected_tab, playlist_id,
3976                 self._extract_identity_token(webpage, item_id),
3977                 self._extract_account_syncid(ytcfg, data), ytcfg),
3978             **metadata)
3979
3980     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3981         first_id = last_id = None
3982         ytcfg = self.extract_ytcfg(playlist_id, webpage)
3983         headers = self.generate_api_headers(
3984             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3985             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
3986         for page_num in itertools.count(1):
3987             videos = list(self._playlist_entries(playlist))
3988             if not videos:
3989                 return
3990             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3991             if start >= len(videos):
3992                 return
3993             for video in videos[start:]:
3994                 if video['id'] == first_id:
3995                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3996                     return
3997                 yield video
3998             first_id = first_id or videos[0]['id']
3999             last_id = videos[-1]['id']
4000             watch_endpoint = try_get(
4001                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
4002             query = {
4003                 'playlistId': playlist_id,
4004                 'videoId': watch_endpoint.get('videoId') or last_id,
4005                 'index': watch_endpoint.get('index') or len(videos),
4006                 'params': watch_endpoint.get('params') or 'OAE%3D'
4007             }
4008             response = self._extract_response(
4009                 item_id='%s page %d' % (playlist_id, page_num),
4010                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
4011                 check_get_keys='contents'
4012             )
4013             playlist = try_get(
4014                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4015
4016     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
4017         title = playlist.get('title') or try_get(
4018             data, lambda x: x['titleText']['simpleText'], compat_str)
4019         playlist_id = playlist.get('playlistId') or item_id
4020
4021         # Delegating everything except mix playlists to regular tab-based playlist URL
4022         playlist_url = urljoin(url, try_get(
4023             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
4024             compat_str))
4025         if playlist_url and playlist_url != url:
4026             return self.url_result(
4027                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
4028                 video_title=title)
4029
4030         return self.playlist_result(
4031             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
4032             playlist_id=playlist_id, playlist_title=title)
4033
4034     def _extract_availability(self, data):
4035         """
4036         Gets the availability of a given playlist/tab.
4037         Note: Unless YouTube tells us explicitly, we do not assume it is public
4038         @param data: response
4039         """
4040         is_private = is_unlisted = None
4041         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
4042         badge_labels = self._extract_badges(renderer)
4043
4044         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
4045         privacy_dropdown_entries = try_get(
4046             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
4047         for renderer_dict in privacy_dropdown_entries:
4048             is_selected = try_get(
4049                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
4050             if not is_selected:
4051                 continue
4052             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
4053             if label:
4054                 badge_labels.add(label.lower())
4055                 break
4056
4057         for badge_label in badge_labels:
4058             if badge_label == 'unlisted':
4059                 is_unlisted = True
4060             elif badge_label == 'private':
4061                 is_private = True
4062             elif badge_label == 'public':
4063                 is_unlisted = is_private = False
4064         return self._availability(is_private, False, False, False, is_unlisted)
4065
4066     @staticmethod
4067     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
4068         sidebar_renderer = try_get(
4069             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
4070         for item in sidebar_renderer:
4071             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
4072             if renderer:
4073                 return renderer
4074
4075     def _reload_with_unavailable_videos(self, item_id, data, webpage):
4076         """
4077         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
4078         """
4079         browse_id = params = None
4080         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
4081         if not renderer:
4082             return
4083         menu_renderer = try_get(
4084             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
4085         for menu_item in menu_renderer:
4086             if not isinstance(menu_item, dict):
4087                 continue
4088             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
4089             text = try_get(
4090                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
4091             if not text or text.lower() != 'show unavailable videos':
4092                 continue
4093             browse_endpoint = try_get(
4094                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
4095             browse_id = browse_endpoint.get('browseId')
4096             params = browse_endpoint.get('params')
4097             break
4098
4099         ytcfg = self.extract_ytcfg(item_id, webpage)
4100         headers = self.generate_api_headers(
4101             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
4102             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4103             visitor_data=try_get(
4104                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4105         query = {
4106             'params': params or 'wgYCCAA=',
4107             'browseId': browse_id or 'VL%s' % item_id
4108         }
4109         return self._extract_response(
4110             item_id=item_id, headers=headers, query=query,
4111             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
4112             note='Downloading API JSON with unavailable videos')
4113
4114     def _extract_webpage(self, url, item_id):
4115         retries = self.get_param('extractor_retries', 3)
4116         count = -1
4117         last_error = 'Incomplete yt initial data recieved'
4118         while count < retries:
4119             count += 1
4120             # Sometimes youtube returns a webpage with incomplete ytInitialData
4121             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4122             if count:
4123                 self.report_warning('%s. Retrying ...' % last_error)
4124             webpage = self._download_webpage(
4125                 url, item_id,
4126                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4127             data = self.extract_yt_initial_data(item_id, webpage)
4128             if data.get('contents') or data.get('currentVideoEndpoint'):
4129                 break
4130             # Extract alerts here only when there is error
4131             self._extract_and_report_alerts(data)
4132             if count >= retries:
4133                 raise ExtractorError(last_error)
4134         return webpage, data
4135
4136     @staticmethod
4137     def _smuggle_data(entries, data):
4138         for entry in entries:
4139             if data:
4140                 entry['url'] = smuggle_url(entry['url'], data)
4141             yield entry
4142
4143     def _real_extract(self, url):
4144         url, smuggled_data = unsmuggle_url(url, {})
4145         if self.is_music_url(url):
4146             smuggled_data['is_music_url'] = True
4147         info_dict = self.__real_extract(url, smuggled_data)
4148         if info_dict.get('entries'):
4149             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4150         return info_dict
4151
4152     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4153
4154     def __real_extract(self, url, smuggled_data):
4155         item_id = self._match_id(url)
4156         url = compat_urlparse.urlunparse(
4157             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4158         compat_opts = self.get_param('compat_opts', [])
4159
4160         def get_mobj(url):
4161             mobj = self._url_re.match(url).groupdict()
4162             mobj.update((k, '') for k, v in mobj.items() if v is None)
4163             return mobj
4164
4165         mobj = get_mobj(url)
4166         # Youtube returns incomplete data if tabname is not lower case
4167         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4168
4169         if is_channel:
4170             if smuggled_data.get('is_music_url'):
4171                 if item_id[:2] == 'VL':
4172                     # Youtube music VL channels have an equivalent playlist
4173                     item_id = item_id[2:]
4174                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4175                 elif item_id[:2] == 'MP':
4176                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4177                     item_id = self._search_regex(
4178                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4179                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4180                         'playlist id')
4181                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4182                 elif mobj['channel_type'] == 'browse':
4183                     # Youtube music /browse/ should be changed to /channel/
4184                     pre = 'https://www.youtube.com/channel/%s' % item_id
4185         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4186             # Home URLs should redirect to /videos/
4187             self.report_warning(
4188                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4189                 'To download only the videos in the home page, add a "/featured" to the URL')
4190             tab = '/videos'
4191
4192         url = ''.join((pre, tab, post))
4193         mobj = get_mobj(url)
4194
4195         # Handle both video/playlist URLs
4196         qs = parse_qs(url)
4197         video_id = qs.get('v', [None])[0]
4198         playlist_id = qs.get('list', [None])[0]
4199
4200         if not video_id and mobj['not_channel'].startswith('watch'):
4201             if not playlist_id:
4202                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4203                 raise ExtractorError('Unable to recognize tab page')
4204             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4205             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4206             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4207             mobj = get_mobj(url)
4208
4209         if video_id and playlist_id:
4210             if self.get_param('noplaylist'):
4211                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4212                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4213             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4214
4215         webpage, data = self._extract_webpage(url, item_id)
4216
4217         tabs = try_get(
4218             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4219         if tabs:
4220             selected_tab = self._extract_selected_tab(tabs)
4221             tab_name = selected_tab.get('title', '')
4222             if 'no-youtube-channel-redirect' not in compat_opts:
4223                 if mobj['tab'] == '/live':
4224                     # Live tab should have redirected to the video
4225                     raise ExtractorError('The channel is not currently live', expected=True)
4226                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4227                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4228                         # Topic channels don't have /videos. Use the equivalent playlist instead
4229                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4230                         pl_id = 'UU%s' % item_id[2:]
4231                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4232                         try:
4233                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4234                             for alert_type, alert_message in self._extract_alerts(pl_data):
4235                                 if alert_type == 'error':
4236                                     raise ExtractorError('Youtube said: %s' % alert_message)
4237                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4238                         except ExtractorError:
4239                             self.report_warning('The playlist gave error. Falling back to channel URL')
4240                     else:
4241                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4242
4243         self.write_debug('Final URL: %s' % url)
4244
4245         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4246         if 'no-youtube-unavailable-videos' not in compat_opts:
4247             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4248         self._extract_and_report_alerts(data)
4249         tabs = try_get(
4250             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4251         if tabs:
4252             return self._extract_from_tabs(item_id, webpage, data, tabs)
4253
4254         playlist = try_get(
4255             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4256         if playlist:
4257             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4258
4259         video_id = try_get(
4260             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4261             compat_str) or video_id
4262         if video_id:
4263             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4264                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4265             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4266
4267         raise ExtractorError('Unable to recognize tab page')
4268
4269
4270 class YoutubePlaylistIE(InfoExtractor):
4271     IE_DESC = 'YouTube.com playlists'
4272     _VALID_URL = r'''(?x)(?:
4273                         (?:https?://)?
4274                         (?:\w+\.)?
4275                         (?:
4276                             (?:
4277                                 youtube(?:kids)?\.com|
4278                                 invidio\.us
4279                             )
4280                             /.*?\?.*?\blist=
4281                         )?
4282                         (?P<id>%(playlist_id)s)
4283                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4284     IE_NAME = 'youtube:playlist'
4285     _TESTS = [{
4286         'note': 'issue #673',
4287         'url': 'PLBB231211A4F62143',
4288         'info_dict': {
4289             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4290             'id': 'PLBB231211A4F62143',
4291             'uploader': 'Wickydoo',
4292             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4293             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4294         },
4295         'playlist_mincount': 29,
4296     }, {
4297         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4298         'info_dict': {
4299             'title': 'YDL_safe_search',
4300             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4301         },
4302         'playlist_count': 2,
4303         'skip': 'This playlist is private',
4304     }, {
4305         'note': 'embedded',
4306         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4307         'playlist_count': 4,
4308         'info_dict': {
4309             'title': 'JODA15',
4310             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4311             'uploader': 'milan',
4312             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4313         }
4314     }, {
4315         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4316         'playlist_mincount': 654,
4317         'info_dict': {
4318             'title': '2018 Chinese New Singles (11/6 updated)',
4319             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4320             'uploader': 'LBK',
4321             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4322             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4323         }
4324     }, {
4325         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4326         'only_matching': True,
4327     }, {
4328         # music album playlist
4329         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4330         'only_matching': True,
4331     }]
4332
4333     @classmethod
4334     def suitable(cls, url):
4335         if YoutubeTabIE.suitable(url):
4336             return False
4337         # Hack for lazy extractors until more generic solution is implemented
4338         # (see #28780)
4339         from .youtube import parse_qs
4340         qs = parse_qs(url)
4341         if qs.get('v', [None])[0]:
4342             return False
4343         return super(YoutubePlaylistIE, cls).suitable(url)
4344
4345     def _real_extract(self, url):
4346         playlist_id = self._match_id(url)
4347         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4348         url = update_url_query(
4349             'https://www.youtube.com/playlist',
4350             parse_qs(url) or {'list': playlist_id})
4351         if is_music_url:
4352             url = smuggle_url(url, {'is_music_url': True})
4353         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4354
4355
4356 class YoutubeYtBeIE(InfoExtractor):
4357     IE_DESC = 'youtu.be'
4358     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4359     _TESTS = [{
4360         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4361         'info_dict': {
4362             'id': 'yeWKywCrFtk',
4363             'ext': 'mp4',
4364             'title': 'Small Scale Baler and Braiding Rugs',
4365             'uploader': 'Backus-Page House Museum',
4366             'uploader_id': 'backuspagemuseum',
4367             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4368             'upload_date': '20161008',
4369             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4370             'categories': ['Nonprofits & Activism'],
4371             'tags': list,
4372             'like_count': int,
4373             'dislike_count': int,
4374         },
4375         'params': {
4376             'noplaylist': True,
4377             'skip_download': True,
4378         },
4379     }, {
4380         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4381         'only_matching': True,
4382     }]
4383
4384     def _real_extract(self, url):
4385         mobj = re.match(self._VALID_URL, url)
4386         video_id = mobj.group('id')
4387         playlist_id = mobj.group('playlist_id')
4388         return self.url_result(
4389             update_url_query('https://www.youtube.com/watch', {
4390                 'v': video_id,
4391                 'list': playlist_id,
4392                 'feature': 'youtu.be',
4393             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4394
4395
4396 class YoutubeYtUserIE(InfoExtractor):
4397     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4398     _VALID_URL = r'ytuser:(?P<id>.+)'
4399     _TESTS = [{
4400         'url': 'ytuser:phihag',
4401         'only_matching': True,
4402     }]
4403
4404     def _real_extract(self, url):
4405         user_id = self._match_id(url)
4406         return self.url_result(
4407             'https://www.youtube.com/user/%s' % user_id,
4408             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4409
4410
4411 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4412     IE_NAME = 'youtube:favorites'
4413     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4414     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4415     _LOGIN_REQUIRED = True
4416     _TESTS = [{
4417         'url': ':ytfav',
4418         'only_matching': True,
4419     }, {
4420         'url': ':ytfavorites',
4421         'only_matching': True,
4422     }]
4423
4424     def _real_extract(self, url):
4425         return self.url_result(
4426             'https://www.youtube.com/playlist?list=LL',
4427             ie=YoutubeTabIE.ie_key())
4428
4429
4430 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4431     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4432     # there doesn't appear to be a real limit, for example if you search for
4433     # 'python' you get more than 8.000.000 results
4434     _MAX_RESULTS = float('inf')
4435     IE_NAME = 'youtube:search'
4436     _SEARCH_KEY = 'ytsearch'
4437     _SEARCH_PARAMS = None
4438     _TESTS = []
4439
4440     def _entries(self, query, n):
4441         data = {'query': query}
4442         if self._SEARCH_PARAMS:
4443             data['params'] = self._SEARCH_PARAMS
4444         total = 0
4445         continuation = {}
4446         for page_num in itertools.count(1):
4447             data.update(continuation)
4448             search = self._extract_response(
4449                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4450                 check_get_keys=('contents', 'onResponseReceivedCommands')
4451             )
4452             if not search:
4453                 break
4454             slr_contents = try_get(
4455                 search,
4456                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4457                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4458                 list)
4459             if not slr_contents:
4460                 break
4461
4462             # Youtube sometimes adds promoted content to searches,
4463             # changing the index location of videos and token.
4464             # So we search through all entries till we find them.
4465             continuation = None
4466             for slr_content in slr_contents:
4467                 if not continuation:
4468                     continuation = self._extract_continuation({'contents': [slr_content]})
4469
4470                 isr_contents = try_get(
4471                     slr_content,
4472                     lambda x: x['itemSectionRenderer']['contents'],
4473                     list)
4474                 if not isr_contents:
4475                     continue
4476                 for content in isr_contents:
4477                     if not isinstance(content, dict):
4478                         continue
4479                     video = content.get('videoRenderer')
4480                     if not isinstance(video, dict):
4481                         continue
4482                     video_id = video.get('videoId')
4483                     if not video_id:
4484                         continue
4485
4486                     yield self._extract_video(video)
4487                     total += 1
4488                     if total == n:
4489                         return
4490
4491             if not continuation:
4492                 break
4493
4494     def _get_n_results(self, query, n):
4495         """Get a specified number of results for a query"""
4496         return self.playlist_result(self._entries(query, n), query, query)
4497
4498
4499 class YoutubeSearchDateIE(YoutubeSearchIE):
4500     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4501     _SEARCH_KEY = 'ytsearchdate'
4502     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4503     _SEARCH_PARAMS = 'CAI%3D'
4504
4505
4506 class YoutubeSearchURLIE(YoutubeSearchIE):
4507     IE_DESC = 'YouTube.com search URLs'
4508     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4509     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4510     # _MAX_RESULTS = 100
4511     _TESTS = [{
4512         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4513         'playlist_mincount': 5,
4514         'info_dict': {
4515             'id': 'youtube-dl test video',
4516             'title': 'youtube-dl test video',
4517         }
4518     }, {
4519         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4520         'only_matching': True,
4521     }]
4522
4523     @classmethod
4524     def _make_valid_url(cls):
4525         return cls._VALID_URL
4526
4527     def _real_extract(self, url):
4528         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4529         query = (qs.get('search_query') or qs.get('q'))[0]
4530         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4531         return self._get_n_results(query, self._MAX_RESULTS)
4532
4533
4534 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4535     """
4536     Base class for feed extractors
4537     Subclasses must define the _FEED_NAME property.
4538     """
4539     _LOGIN_REQUIRED = True
4540     _TESTS = []
4541
4542     @property
4543     def IE_NAME(self):
4544         return 'youtube:%s' % self._FEED_NAME
4545
4546     def _real_extract(self, url):
4547         return self.url_result(
4548             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4549             ie=YoutubeTabIE.ie_key())
4550
4551
4552 class YoutubeWatchLaterIE(InfoExtractor):
4553     IE_NAME = 'youtube:watchlater'
4554     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4555     _VALID_URL = r':ytwatchlater'
4556     _TESTS = [{
4557         'url': ':ytwatchlater',
4558         'only_matching': True,
4559     }]
4560
4561     def _real_extract(self, url):
4562         return self.url_result(
4563             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4564
4565
4566 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4567     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4568     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4569     _FEED_NAME = 'recommended'
4570     _LOGIN_REQUIRED = False
4571     _TESTS = [{
4572         'url': ':ytrec',
4573         'only_matching': True,
4574     }, {
4575         'url': ':ytrecommended',
4576         'only_matching': True,
4577     }, {
4578         'url': 'https://youtube.com',
4579         'only_matching': True,
4580     }]
4581
4582
4583 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4584     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4585     _VALID_URL = r':ytsub(?:scription)?s?'
4586     _FEED_NAME = 'subscriptions'
4587     _TESTS = [{
4588         'url': ':ytsubs',
4589         'only_matching': True,
4590     }, {
4591         'url': ':ytsubscriptions',
4592         'only_matching': True,
4593     }]
4594
4595
4596 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4597     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4598     _VALID_URL = r':ythis(?:tory)?'
4599     _FEED_NAME = 'history'
4600     _TESTS = [{
4601         'url': ':ythistory',
4602         'only_matching': True,
4603     }]
4604
4605
4606 class YoutubeTruncatedURLIE(InfoExtractor):
4607     IE_NAME = 'youtube:truncated_url'
4608     IE_DESC = False  # Do not list
4609     _VALID_URL = r'''(?x)
4610         (?:https?://)?
4611         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4612         (?:watch\?(?:
4613             feature=[a-z_]+|
4614             annotation_id=annotation_[^&]+|
4615             x-yt-cl=[0-9]+|
4616             hl=[^&]*|
4617             t=[0-9]+
4618         )?
4619         |
4620             attribution_link\?a=[^&]+
4621         )
4622         $
4623     '''
4624
4625     _TESTS = [{
4626         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4627         'only_matching': True,
4628     }, {
4629         'url': 'https://www.youtube.com/watch?',
4630         'only_matching': True,
4631     }, {
4632         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4633         'only_matching': True,
4634     }, {
4635         'url': 'https://www.youtube.com/watch?feature=foo',
4636         'only_matching': True,
4637     }, {
4638         'url': 'https://www.youtube.com/watch?hl=en-GB',
4639         'only_matching': True,
4640     }, {
4641         'url': 'https://www.youtube.com/watch?t=2372',
4642         'only_matching': True,
4643     }]
4644
4645     def _real_extract(self, url):
4646         raise ExtractorError(
4647             'Did you forget to quote the URL? Remember that & is a meta '
4648             'character in most shells, so you want to put the URL in quotes, '
4649             'like  youtube-dl '
4650             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4651             ' or simply  youtube-dl BaW_jenozKc  .',
4652             expected=True)
4653
4654
4655 class YoutubeTruncatedIDIE(InfoExtractor):
4656     IE_NAME = 'youtube:truncated_id'
4657     IE_DESC = False  # Do not list
4658     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4659
4660     _TESTS = [{
4661         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4662         'only_matching': True,
4663     }]
4664
4665     def _real_extract(self, url):
4666         video_id = self._match_id(url)
4667         raise ExtractorError(
4668             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4669             expected=True)