yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import os.path
  13 import random
  14 import re
  15 import time
  16 import traceback
  17
  18 from .common import InfoExtractor, SearchInfoExtractor
  19 from ..compat import (
  20     compat_chr,
  21     compat_HTTPError,
  22     compat_parse_qs,
  23     compat_str,
  24     compat_urllib_parse_unquote_plus,
  25     compat_urllib_parse_urlencode,
  26     compat_urllib_parse_urlparse,
  27     compat_urlparse,
  28 )
  29 from ..jsinterp import JSInterpreter
  30 from ..utils import (
  31     bool_or_none,
  32     bytes_to_intlist,
  33     clean_html,
  34     dict_get,
  35     datetime_from_str,
  36     error_to_compat_str,
  37     ExtractorError,
  38     format_field,
  39     float_or_none,
  40     int_or_none,
  41     intlist_to_bytes,
  42     mimetype2ext,
  43     parse_codecs,
  44     parse_count,
  45     parse_duration,
  46     qualities,
  47     remove_start,
  48     smuggle_url,
  49     str_or_none,
  50     str_to_int,
  51     try_get,
  52     unescapeHTML,
  53     unified_strdate,
  54     unsmuggle_url,
  55     update_url_query,
  56     url_or_none,
  57     urlencode_postdata,
  58     urljoin,
  59     variadic
  60 )
  61
  62
  63 def parse_qs(url):
  64     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  65
  66
  67 class YoutubeBaseInfoExtractor(InfoExtractor):
  68     """Provide base functions for Youtube extractors"""
  69     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  70     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  71
  72     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  73     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  74     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  75
  76     _RESERVED_NAMES = (
  77         r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
  78         r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
  79         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
  80
  81     _NETRC_MACHINE = 'youtube'
  82     # If True it will raise an error if no login info is provided
  83     _LOGIN_REQUIRED = False
  84
  85     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  86
  87     def _login(self):
  88         """
  89         Attempt to log in to YouTube.
  90         True is returned if successful or skipped.
  91         False is returned if login failed.
  92
  93         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  94         """
  95
  96         def warn(message):
  97             self.report_warning(message)
  98
  99         # username+password login is broken
 100         if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 101             self.raise_login_required(
 102                 'Login details are needed to download this content', method='cookies')
 103         username, password = self._get_login_info()
 104         if username:
 105             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 106         return
 107
 108         # Everything below this is broken!
 109         r'''
 110         # No authentication to be performed
 111         if username is None:
 112             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 113                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 114             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 115             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 116             return True
 117
 118         login_page = self._download_webpage(
 119             self._LOGIN_URL, None,
 120             note='Downloading login page',
 121             errnote='unable to fetch login page', fatal=False)
 122         if login_page is False:
 123             return
 124
 125         login_form = self._hidden_inputs(login_page)
 126
 127         def req(url, f_req, note, errnote):
 128             data = login_form.copy()
 129             data.update({
 130                 'pstMsg': 1,
 131                 'checkConnection': 'youtube',
 132                 'checkedDomains': 'youtube',
 133                 'hl': 'en',
 134                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 135                 'f.req': json.dumps(f_req),
 136                 'flowName': 'GlifWebSignIn',
 137                 'flowEntry': 'ServiceLogin',
 138                 # TODO: reverse actual botguard identifier generation algo
 139                 'bgRequest': '["identifier",""]',
 140             })
 141             return self._download_json(
 142                 url, None, note=note, errnote=errnote,
 143                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 144                 fatal=False,
 145                 data=urlencode_postdata(data), headers={
 146                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 147                     'Google-Accounts-XSRF': 1,
 148                 })
 149
 150         lookup_req = [
 151             username,
 152             None, [], None, 'US', None, None, 2, False, True,
 153             [
 154                 None, None,
 155                 [2, 1, None, 1,
 156                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 157                  None, [], 4],
 158                 1, [None, None, []], None, None, None, True
 159             ],
 160             username,
 161         ]
 162
 163         lookup_results = req(
 164             self._LOOKUP_URL, lookup_req,
 165             'Looking up account info', 'Unable to look up account info')
 166
 167         if lookup_results is False:
 168             return False
 169
 170         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 171         if not user_hash:
 172             warn('Unable to extract user hash')
 173             return False
 174
 175         challenge_req = [
 176             user_hash,
 177             None, 1, None, [1, None, None, None, [password, None, True]],
 178             [
 179                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 180                 1, [None, None, []], None, None, None, True
 181             ]]
 182
 183         challenge_results = req(
 184             self._CHALLENGE_URL, challenge_req,
 185             'Logging in', 'Unable to log in')
 186
 187         if challenge_results is False:
 188             return
 189
 190         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 191         if login_res:
 192             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 193             warn(
 194                 'Unable to login: %s' % 'Invalid password'
 195                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 196             return False
 197
 198         res = try_get(challenge_results, lambda x: x[0][-1], list)
 199         if not res:
 200             warn('Unable to extract result entry')
 201             return False
 202
 203         login_challenge = try_get(res, lambda x: x[0][0], list)
 204         if login_challenge:
 205             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 206             if challenge_str == 'TWO_STEP_VERIFICATION':
 207                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 208                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 209                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 210                 if status == 'QUOTA_EXCEEDED':
 211                     warn('Exceeded the limit of TFA codes, try later')
 212                     return False
 213
 214                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 215                 if not tl:
 216                     warn('Unable to extract TL')
 217                     return False
 218
 219                 tfa_code = self._get_tfa_info('2-step verification code')
 220
 221                 if not tfa_code:
 222                     warn(
 223                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 224                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 225                     return False
 226
 227                 tfa_code = remove_start(tfa_code, 'G-')
 228
 229                 tfa_req = [
 230                     user_hash, None, 2, None,
 231                     [
 232                         9, None, None, None, None, None, None, None,
 233                         [None, tfa_code, True, 2]
 234                     ]]
 235
 236                 tfa_results = req(
 237                     self._TFA_URL.format(tl), tfa_req,
 238                     'Submitting TFA code', 'Unable to submit TFA code')
 239
 240                 if tfa_results is False:
 241                     return False
 242
 243                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 244                 if tfa_res:
 245                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 246                     warn(
 247                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 248                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 249                     return False
 250
 251                 check_cookie_url = try_get(
 252                     tfa_results, lambda x: x[0][-1][2], compat_str)
 253             else:
 254                 CHALLENGES = {
 255                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 256                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 257                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 258                 }
 259                 challenge = CHALLENGES.get(
 260                     challenge_str,
 261                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 262                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 263                 return False
 264         else:
 265             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 266
 267         if not check_cookie_url:
 268             warn('Unable to extract CheckCookie URL')
 269             return False
 270
 271         check_cookie_results = self._download_webpage(
 272             check_cookie_url, None, 'Checking cookie', fatal=False)
 273
 274         if check_cookie_results is False:
 275             return False
 276
 277         if 'https://myaccount.google.com/' not in check_cookie_results:
 278             warn('Unable to log in')
 279             return False
 280
 281         return True
 282         '''
 283
 284     def _initialize_consent(self):
 285         cookies = self._get_cookies('https://www.youtube.com/')
 286         if cookies.get('__Secure-3PSID'):
 287             return
 288         consent_id = None
 289         consent = cookies.get('CONSENT')
 290         if consent:
 291             if 'YES' in consent.value:
 292                 return
 293             consent_id = self._search_regex(
 294                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 295         if not consent_id:
 296             consent_id = random.randint(100, 999)
 297         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 298
 299     def _real_initialize(self):
 300         self._initialize_consent()
 301         if self._downloader is None:
 302             return
 303         if not self._login():
 304             return
 305
 306     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 307     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 308     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 309
 310     _YT_DEFAULT_YTCFGS = {
 311         'WEB': {
 312             'INNERTUBE_API_VERSION': 'v1',
 313             'INNERTUBE_CLIENT_NAME': 'WEB',
 314             'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
 315             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 316             'INNERTUBE_CONTEXT': {
 317                 'client': {
 318                     'clientName': 'WEB',
 319                     'clientVersion': '2.20210622.10.00',
 320                     'hl': 'en',
 321                 }
 322             },
 323             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
 324         },
 325         'WEB_REMIX': {
 326             'INNERTUBE_API_VERSION': 'v1',
 327             'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
 328             'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
 329             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 330             'INNERTUBE_CONTEXT': {
 331                 'client': {
 332                     'clientName': 'WEB_REMIX',
 333                     'clientVersion': '1.20210621.00.00',
 334                     'hl': 'en',
 335                 }
 336             },
 337             'INNERTUBE_CONTEXT_CLIENT_NAME': 67
 338         },
 339         'WEB_EMBEDDED_PLAYER': {
 340             'INNERTUBE_API_VERSION': 'v1',
 341             'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
 342             'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
 343             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 344             'INNERTUBE_CONTEXT': {
 345                 'client': {
 346                     'clientName': 'WEB_EMBEDDED_PLAYER',
 347                     'clientVersion': '1.20210620.0.1',
 348                     'hl': 'en',
 349                 }
 350             },
 351             'INNERTUBE_CONTEXT_CLIENT_NAME': 56
 352         },
 353         'ANDROID': {
 354             'INNERTUBE_API_VERSION': 'v1',
 355             'INNERTUBE_CLIENT_NAME': 'ANDROID',
 356             'INNERTUBE_CLIENT_VERSION': '16.20',
 357             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 358             'INNERTUBE_CONTEXT': {
 359                 'client': {
 360                     'clientName': 'ANDROID',
 361                     'clientVersion': '16.20',
 362                     'hl': 'en',
 363                 }
 364             },
 365             'INNERTUBE_CONTEXT_CLIENT_NAME': 3
 366         },
 367         'ANDROID_EMBEDDED_PLAYER': {
 368             'INNERTUBE_API_VERSION': 'v1',
 369             'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
 370             'INNERTUBE_CLIENT_VERSION': '16.20',
 371             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 372             'INNERTUBE_CONTEXT': {
 373                 'client': {
 374                     'clientName': 'ANDROID_EMBEDDED_PLAYER',
 375                     'clientVersion': '16.20',
 376                     'hl': 'en',
 377                 }
 378             },
 379             'INNERTUBE_CONTEXT_CLIENT_NAME': 55
 380         },
 381         'ANDROID_MUSIC': {
 382             'INNERTUBE_API_VERSION': 'v1',
 383             'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
 384             'INNERTUBE_CLIENT_VERSION': '4.32',
 385             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 386             'INNERTUBE_CONTEXT': {
 387                 'client': {
 388                     'clientName': 'ANDROID_MUSIC',
 389                     'clientVersion': '4.32',
 390                     'hl': 'en',
 391                 }
 392             },
 393             'INNERTUBE_CONTEXT_CLIENT_NAME': 21
 394         }
 395     }
 396
 397     _YT_DEFAULT_INNERTUBE_HOSTS = {
 398         'DIRECT': 'youtubei.googleapis.com',
 399         'WEB': 'www.youtube.com',
 400         'WEB_REMIX': 'music.youtube.com',
 401         'ANDROID_MUSIC': 'music.youtube.com'
 402     }
 403
 404     def _get_default_ytcfg(self, client='WEB'):
 405         if client in self._YT_DEFAULT_YTCFGS:
 406             return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
 407         self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
 408         return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
 409
 410     def _get_innertube_host(self, client='WEB'):
 411         return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
 412
 413     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
 414         # try_get but with fallback to default ytcfg client values when present
 415         _func = lambda y: try_get(y, getter, expected_type)
 416         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 417
 418     def _extract_client_name(self, ytcfg, default_client='WEB'):
 419         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
 420
 421     @staticmethod
 422     def _extract_session_index(ytcfg):
 423         return int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 424
 425     def _extract_client_version(self, ytcfg, default_client='WEB'):
 426         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
 427
 428     def _extract_api_key(self, ytcfg=None, default_client='WEB'):
 429         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 430
 431     def _extract_context(self, ytcfg=None, default_client='WEB'):
 432         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 433         context = _get_context(ytcfg)
 434         if context:
 435             return context
 436
 437         context = _get_context(self._get_default_ytcfg(default_client))
 438         if not ytcfg:
 439             return context
 440
 441         # Recreate the client context (required)
 442         context['client'].update({
 443             'clientVersion': self._extract_client_version(ytcfg, default_client),
 444             'clientName': self._extract_client_name(ytcfg, default_client),
 445         })
 446         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 447         if visitor_data:
 448             context['client']['visitorData'] = visitor_data
 449         return context
 450
 451     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 452         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 453         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 454         yt_cookies = self._get_cookies('https://www.youtube.com')
 455         sapisid_cookie = dict_get(
 456             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 457         if sapisid_cookie is None:
 458             return
 459         time_now = round(time.time())
 460         # SAPISID cookie is required if not already present
 461         if not yt_cookies.get('SAPISID'):
 462             self._set_cookie(
 463                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 464         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 465         sapisidhash = hashlib.sha1(
 466             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 467         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 468
 469     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 470                   note='Downloading API JSON', errnote='Unable to download API page',
 471                   context=None, api_key=None, api_hostname=None, default_client='WEB'):
 472
 473         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 474         data.update(query)
 475         real_headers = self._generate_api_headers(client=default_client)
 476         real_headers.update({'content-type': 'application/json'})
 477         if headers:
 478             real_headers.update(headers)
 479         return self._download_json(
 480             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 481             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 482             data=json.dumps(data).encode('utf8'), headers=real_headers,
 483             query={'key': api_key or self._extract_api_key()})
 484
 485     def _extract_yt_initial_data(self, video_id, webpage):
 486         return self._parse_json(
 487             self._search_regex(
 488                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 489                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 490             video_id)
 491
 492     def _extract_identity_token(self, webpage, item_id):
 493         ytcfg = self._extract_ytcfg(item_id, webpage)
 494         if ytcfg:
 495             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 496             if token:
 497                 return token
 498         return self._search_regex(
 499             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 500             'identity token', default=None)
 501
 502     @staticmethod
 503     def _extract_account_syncid(*args):
 504         """
 505         Extract syncId required to download private playlists of secondary channels
 506         @params response and/or ytcfg
 507         """
 508         for data in args:
 509             # ytcfg includes channel_syncid if on secondary channel
 510             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 511             if delegated_sid:
 512                 return delegated_sid
 513             sync_ids = (try_get(
 514                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 515                        lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 516             if len(sync_ids) >= 2 and sync_ids[1]:
 517                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 518                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 519                 return sync_ids[0]
 520
 521     def _extract_ytcfg(self, video_id, webpage):
 522         if not webpage:
 523             return {}
 524         return self._parse_json(
 525             self._search_regex(
 526                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 527                 default='{}'), video_id, fatal=False) or {}
 528
 529     def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
 530                               visitor_data=None, api_hostname=None, client='WEB', session_index=None):
 531         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
 532         headers = {
 533             'X-YouTube-Client-Name': compat_str(
 534                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
 535             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
 536             'Origin': origin
 537         }
 538         if not visitor_data and ytcfg:
 539             visitor_data = try_get(
 540                 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
 541         if identity_token:
 542             headers['X-Youtube-Identity-Token'] = identity_token
 543         if account_syncid:
 544             headers['X-Goog-PageId'] = account_syncid
 545         if session_index is None and ytcfg:
 546             session_index = self._extract_session_index(ytcfg)
 547         if account_syncid or session_index is not None:
 548             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 549         if visitor_data:
 550             headers['X-Goog-Visitor-Id'] = visitor_data
 551         auth = self._generate_sapisidhash_header(origin)
 552         if auth is not None:
 553             headers['Authorization'] = auth
 554             headers['X-Origin'] = origin
 555         return headers
 556
 557     @staticmethod
 558     def _build_api_continuation_query(continuation, ctp=None):
 559         query = {
 560             'continuation': continuation
 561         }
 562         # TODO: Inconsistency with clickTrackingParams.
 563         # Currently we have a fixed ctp contained within context (from ytcfg)
 564         # and a ctp in root query for continuation.
 565         if ctp:
 566             query['clickTracking'] = {'clickTrackingParams': ctp}
 567         return query
 568
 569     @classmethod
 570     def _extract_next_continuation_data(cls, renderer):
 571         next_continuation = try_get(
 572             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 573                        lambda x: x['continuation']['reloadContinuationData']), dict)
 574         if not next_continuation:
 575             return
 576         continuation = next_continuation.get('continuation')
 577         if not continuation:
 578             return
 579         ctp = next_continuation.get('clickTrackingParams')
 580         return cls._build_api_continuation_query(continuation, ctp)
 581
 582     @classmethod
 583     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 584         if isinstance(continuation_ep, dict):
 585             continuation = try_get(
 586                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 587             if not continuation:
 588                 return
 589             ctp = continuation_ep.get('clickTrackingParams')
 590             return cls._build_api_continuation_query(continuation, ctp)
 591
 592     @classmethod
 593     def _extract_continuation(cls, renderer):
 594         next_continuation = cls._extract_next_continuation_data(renderer)
 595         if next_continuation:
 596             return next_continuation
 597
 598         contents = []
 599         for key in ('contents', 'items'):
 600             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 601
 602         for content in contents:
 603             if not isinstance(content, dict):
 604                 continue
 605             continuation_ep = try_get(
 606                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 607                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 608                 dict)
 609             continuation = cls._extract_continuation_ep_data(continuation_ep)
 610             if continuation:
 611                 return continuation
 612
 613     @classmethod
 614     def _extract_alerts(cls, data):
 615         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 616             if not isinstance(alert_dict, dict):
 617                 continue
 618             for alert in alert_dict.values():
 619                 alert_type = alert.get('type')
 620                 if not alert_type:
 621                     continue
 622                 message = cls._get_text(alert.get('text'))
 623                 if message:
 624                     yield alert_type, message
 625
 626     def _report_alerts(self, alerts, expected=True):
 627         errors = []
 628         warnings = []
 629         for alert_type, alert_message in alerts:
 630             if alert_type.lower() == 'error':
 631                 errors.append([alert_type, alert_message])
 632             else:
 633                 warnings.append([alert_type, alert_message])
 634
 635         for alert_type, alert_message in (warnings + errors[:-1]):
 636             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 637         if errors:
 638             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 639
 640     def _extract_and_report_alerts(self, data, *args, **kwargs):
 641         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 642
 643     def _extract_badges(self, renderer: dict):
 644         badges = set()
 645         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 646             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 647             if label:
 648                 badges.add(label.lower())
 649         return badges
 650
 651     @staticmethod
 652     def _get_text(data, getter=None, max_runs=None):
 653         for get in variadic(getter):
 654             d = try_get(data, get) if get is not None else data
 655             text = try_get(d, lambda x: x['simpleText'], compat_str)
 656             if text:
 657                 return text
 658             runs = try_get(d, lambda x: x['runs'], list) or []
 659             if not runs and isinstance(d, list):
 660                 runs = d
 661
 662             def get_runs(runs):
 663                 for run in runs[:min(len(runs), max_runs or len(runs))]:
 664                     yield try_get(run, lambda x: x['text'], compat_str) or ''
 665
 666             text = ''.join(get_runs(runs))
 667             if text:
 668                 return text
 669
 670     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 671                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 672                           default_client='WEB'):
 673         response = None
 674         last_error = None
 675         count = -1
 676         retries = self.get_param('extractor_retries', 3)
 677         if check_get_keys is None:
 678             check_get_keys = []
 679         while count < retries:
 680             count += 1
 681             if last_error:
 682                 self.report_warning('%s. Retrying ...' % last_error)
 683             try:
 684                 response = self._call_api(
 685                     ep=ep, fatal=True, headers=headers,
 686                     video_id=item_id, query=query,
 687                     context=self._extract_context(ytcfg, default_client),
 688                     api_key=self._extract_api_key(ytcfg, default_client),
 689                     api_hostname=api_hostname, default_client=default_client,
 690                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 691             except ExtractorError as e:
 692                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
 693                     # Downloading page may result in intermittent 5xx HTTP error
 694                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 695                     last_error = 'HTTP Error %s' % e.cause.code
 696                     if count < retries:
 697                         continue
 698                 if fatal:
 699                     raise
 700                 else:
 701                     self.report_warning(error_to_compat_str(e))
 702                     return
 703
 704             else:
 705                 # Youtube may send alerts if there was an issue with the continuation page
 706                 try:
 707                     self._extract_and_report_alerts(response, expected=False)
 708                 except ExtractorError as e:
 709                     if fatal:
 710                         raise
 711                     self.report_warning(error_to_compat_str(e))
 712                     return
 713                 if not check_get_keys or dict_get(response, check_get_keys):
 714                     break
 715                 # Youtube sometimes sends incomplete data
 716                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 717                 last_error = 'Incomplete data received'
 718                 if count >= retries:
 719                     if fatal:
 720                         raise ExtractorError(last_error)
 721                     else:
 722                         self.report_warning(last_error)
 723                         return
 724         return response
 725
 726     @staticmethod
 727     def is_music_url(url):
 728         return re.match(r'https?://music\.youtube\.com/', url) is not None
 729
 730     def _extract_video(self, renderer):
 731         video_id = renderer.get('videoId')
 732         title = self._get_text(renderer.get('title'))
 733         description = self._get_text(renderer.get('descriptionSnippet'))
 734         duration = parse_duration(self._get_text(renderer.get('lengthText')))
 735         view_count_text = self._get_text(renderer.get('viewCountText')) or ''
 736         view_count = str_to_int(self._search_regex(
 737             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 738             'view count', default=None))
 739
 740         uploader = self._get_text(renderer, (lambda x: x['ownerText'], lambda x: x['shortBylineText']))
 741
 742         return {
 743             '_type': 'url',
 744             'ie_key': YoutubeIE.ie_key(),
 745             'id': video_id,
 746             'url': video_id,
 747             'title': title,
 748             'description': description,
 749             'duration': duration,
 750             'view_count': view_count,
 751             'uploader': uploader,
 752         }
 753
 754
 755 class YoutubeIE(YoutubeBaseInfoExtractor):
 756     IE_DESC = 'YouTube.com'
 757     _INVIDIOUS_SITES = (
 758         # invidious-redirect websites
 759         r'(?:www\.)?redirect\.invidious\.io',
 760         r'(?:(?:www|dev)\.)?invidio\.us',
 761         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 762         r'(?:www\.)?invidious\.pussthecat\.org',
 763         r'(?:www\.)?invidious\.zee\.li',
 764         r'(?:www\.)?invidious\.ethibox\.fr',
 765         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 766         # youtube-dl invidious instances list
 767         r'(?:(?:www|no)\.)?invidiou\.sh',
 768         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 769         r'(?:www\.)?invidious\.kabi\.tk',
 770         r'(?:www\.)?invidious\.mastodon\.host',
 771         r'(?:www\.)?invidious\.zapashcanon\.fr',
 772         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 773         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 774         r'(?:www\.)?invidious\.himiko\.cloud',
 775         r'(?:www\.)?invidious\.reallyancient\.tech',
 776         r'(?:www\.)?invidious\.tube',
 777         r'(?:www\.)?invidiou\.site',
 778         r'(?:www\.)?invidious\.site',
 779         r'(?:www\.)?invidious\.xyz',
 780         r'(?:www\.)?invidious\.nixnet\.xyz',
 781         r'(?:www\.)?invidious\.048596\.xyz',
 782         r'(?:www\.)?invidious\.drycat\.fr',
 783         r'(?:www\.)?inv\.skyn3t\.in',
 784         r'(?:www\.)?tube\.poal\.co',
 785         r'(?:www\.)?tube\.connect\.cafe',
 786         r'(?:www\.)?vid\.wxzm\.sx',
 787         r'(?:www\.)?vid\.mint\.lgbt',
 788         r'(?:www\.)?vid\.puffyan\.us',
 789         r'(?:www\.)?yewtu\.be',
 790         r'(?:www\.)?yt\.elukerio\.org',
 791         r'(?:www\.)?yt\.lelux\.fi',
 792         r'(?:www\.)?invidious\.ggc-project\.de',
 793         r'(?:www\.)?yt\.maisputain\.ovh',
 794         r'(?:www\.)?ytprivate\.com',
 795         r'(?:www\.)?invidious\.13ad\.de',
 796         r'(?:www\.)?invidious\.toot\.koeln',
 797         r'(?:www\.)?invidious\.fdn\.fr',
 798         r'(?:www\.)?watch\.nettohikari\.com',
 799         r'(?:www\.)?invidious\.namazso\.eu',
 800         r'(?:www\.)?invidious\.silkky\.cloud',
 801         r'(?:www\.)?invidious\.exonip\.de',
 802         r'(?:www\.)?invidious\.riverside\.rocks',
 803         r'(?:www\.)?invidious\.blamefran\.net',
 804         r'(?:www\.)?invidious\.moomoo\.de',
 805         r'(?:www\.)?ytb\.trom\.tf',
 806         r'(?:www\.)?yt\.cyberhost\.uk',
 807         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 808         r'(?:www\.)?qklhadlycap4cnod\.onion',
 809         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 810         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 811         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 812         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 813         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 814         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 815         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 816         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 817         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 818         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 819     )
 820     _VALID_URL = r"""(?x)^
 821                      (
 822                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 823                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 824                             (?:www\.)?deturl\.com/www\.youtube\.com|
 825                             (?:www\.)?pwnyoutube\.com|
 826                             (?:www\.)?hooktube\.com|
 827                             (?:www\.)?yourepeat\.com|
 828                             tube\.majestyc\.net|
 829                             %(invidious)s|
 830                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 831                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 832                          (?:                                                  # the various things that can precede the ID:
 833                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 834                              |(?:                                             # or the v= param in all its forms
 835                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 836                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 837                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 838                                  v=
 839                              )
 840                          ))
 841                          |(?:
 842                             youtu\.be|                                        # just youtu.be/xxxx
 843                             vid\.plus|                                        # or vid.plus/xxxx
 844                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 845                             %(invidious)s
 846                          )/
 847                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 848                          )
 849                      )?                                                       # all until now is optional -> you can pass the naked ID
 850                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 851                      (?(1).+)?                                                # if we found the ID, everything can follow
 852                      (?:\#|$)""" % {
 853         'invidious': '|'.join(_INVIDIOUS_SITES),
 854     }
 855     _PLAYER_INFO_RE = (
 856         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 857         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 858         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 859     )
 860     _formats = {
 861         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 862         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 863         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 864         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 865         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 866         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 867         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 868         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 869         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 870         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 871         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 872         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 873         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 874         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 875         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 876         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 877         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 878         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 879
 880
 881         # 3D videos
 882         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 883         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 884         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 885         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 886         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 887         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 888         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 889
 890         # Apple HTTP Live Streaming
 891         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 892         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 893         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 894         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 895         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 896         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 897         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 898         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 899
 900         # DASH mp4 video
 901         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 902         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 903         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 904         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 905         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 906         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 907         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 908         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 909         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 910         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 911         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 912         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 913
 914         # Dash mp4 audio
 915         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 916         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 917         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 918         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 919         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 920         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 921         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 922
 923         # Dash webm
 924         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 925         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 926         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 927         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 928         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 929         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 930         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 931         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 932         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 933         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 934         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 935         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 936         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 937         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 938         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 939         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 940         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 941         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 942         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 943         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 944         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 945         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 946
 947         # Dash webm audio
 948         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 949         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 950
 951         # Dash webm audio with opus inside
 952         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 953         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 954         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 955
 956         # RTMP (unnamed)
 957         '_rtmp': {'protocol': 'rtmp'},
 958
 959         # av01 video only formats sometimes served with "unknown" codecs
 960         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 961         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 962         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 963         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 964     }
 965     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 966
 967     _AGE_GATE_REASONS = (
 968         'Sign in to confirm your age',
 969         'This video may be inappropriate for some users.',
 970         'Sorry, this content is age-restricted.')
 971
 972     _GEO_BYPASS = False
 973
 974     IE_NAME = 'youtube'
 975     _TESTS = [
 976         {
 977             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 978             'info_dict': {
 979                 'id': 'BaW_jenozKc',
 980                 'ext': 'mp4',
 981                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 982                 'uploader': 'Philipp Hagemeister',
 983                 'uploader_id': 'phihag',
 984                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 985                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 986                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 987                 'upload_date': '20121002',
 988                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 989                 'categories': ['Science & Technology'],
 990                 'tags': ['youtube-dl'],
 991                 'duration': 10,
 992                 'view_count': int,
 993                 'like_count': int,
 994                 'dislike_count': int,
 995                 'start_time': 1,
 996                 'end_time': 9,
 997             }
 998         },
 999         {
1000             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1001             'note': 'Embed-only video (#1746)',
1002             'info_dict': {
1003                 'id': 'yZIXLfi8CZQ',
1004                 'ext': 'mp4',
1005                 'upload_date': '20120608',
1006                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1007                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1008                 'uploader': 'SET India',
1009                 'uploader_id': 'setindia',
1010                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1011                 'age_limit': 18,
1012             },
1013             'skip': 'Private video',
1014         },
1015         {
1016             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1017             'note': 'Use the first video ID in the URL',
1018             'info_dict': {
1019                 'id': 'BaW_jenozKc',
1020                 'ext': 'mp4',
1021                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1022                 'uploader': 'Philipp Hagemeister',
1023                 'uploader_id': 'phihag',
1024                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1025                 'upload_date': '20121002',
1026                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1027                 'categories': ['Science & Technology'],
1028                 'tags': ['youtube-dl'],
1029                 'duration': 10,
1030                 'view_count': int,
1031                 'like_count': int,
1032                 'dislike_count': int,
1033             },
1034             'params': {
1035                 'skip_download': True,
1036             },
1037         },
1038         {
1039             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1040             'note': '256k DASH audio (format 141) via DASH manifest',
1041             'info_dict': {
1042                 'id': 'a9LDPn-MO4I',
1043                 'ext': 'm4a',
1044                 'upload_date': '20121002',
1045                 'uploader_id': '8KVIDEO',
1046                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1047                 'description': '',
1048                 'uploader': '8KVIDEO',
1049                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1050             },
1051             'params': {
1052                 'youtube_include_dash_manifest': True,
1053                 'format': '141',
1054             },
1055             'skip': 'format 141 not served anymore',
1056         },
1057         # DASH manifest with encrypted signature
1058         {
1059             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1060             'info_dict': {
1061                 'id': 'IB3lcPjvWLA',
1062                 'ext': 'm4a',
1063                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1064                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1065                 'duration': 244,
1066                 'uploader': 'AfrojackVEVO',
1067                 'uploader_id': 'AfrojackVEVO',
1068                 'upload_date': '20131011',
1069                 'abr': 129.495,
1070             },
1071             'params': {
1072                 'youtube_include_dash_manifest': True,
1073                 'format': '141/bestaudio[ext=m4a]',
1074             },
1075         },
1076         # Controversy video
1077         {
1078             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1079             'info_dict': {
1080                 'id': 'T4XJQO3qol8',
1081                 'ext': 'mp4',
1082                 'duration': 219,
1083                 'upload_date': '20100909',
1084                 'uploader': 'Amazing Atheist',
1085                 'uploader_id': 'TheAmazingAtheist',
1086                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
1087                 'title': 'Burning Everyone\'s Koran',
1088                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
1089             }
1090         },
1091         # Normal age-gate video (embed allowed)
1092         {
1093             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1094             'info_dict': {
1095                 'id': 'HtVdAasjOgU',
1096                 'ext': 'mp4',
1097                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1098                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1099                 'duration': 142,
1100                 'uploader': 'The Witcher',
1101                 'uploader_id': 'WitcherGame',
1102                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1103                 'upload_date': '20140605',
1104                 'age_limit': 18,
1105             },
1106         },
1107         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1108         # YouTube Red ad is not captured for creator
1109         {
1110             'url': '__2ABJjxzNo',
1111             'info_dict': {
1112                 'id': '__2ABJjxzNo',
1113                 'ext': 'mp4',
1114                 'duration': 266,
1115                 'upload_date': '20100430',
1116                 'uploader_id': 'deadmau5',
1117                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1118                 'creator': 'deadmau5',
1119                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1120                 'uploader': 'deadmau5',
1121                 'title': 'Deadmau5 - Some Chords (HD)',
1122                 'alt_title': 'Some Chords',
1123             },
1124             'expected_warnings': [
1125                 'DASH manifest missing',
1126             ]
1127         },
1128         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1129         {
1130             'url': 'lqQg6PlCWgI',
1131             'info_dict': {
1132                 'id': 'lqQg6PlCWgI',
1133                 'ext': 'mp4',
1134                 'duration': 6085,
1135                 'upload_date': '20150827',
1136                 'uploader_id': 'olympic',
1137                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1138                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1139                 'uploader': 'Olympic',
1140                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1141             },
1142             'params': {
1143                 'skip_download': 'requires avconv',
1144             }
1145         },
1146         # Non-square pixels
1147         {
1148             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1149             'info_dict': {
1150                 'id': '_b-2C3KPAM0',
1151                 'ext': 'mp4',
1152                 'stretched_ratio': 16 / 9.,
1153                 'duration': 85,
1154                 'upload_date': '20110310',
1155                 'uploader_id': 'AllenMeow',
1156                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1157                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1158                 'uploader': '孫ᄋᄅ',
1159                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1160             },
1161         },
1162         # url_encoded_fmt_stream_map is empty string
1163         {
1164             'url': 'qEJwOuvDf7I',
1165             'info_dict': {
1166                 'id': 'qEJwOuvDf7I',
1167                 'ext': 'webm',
1168                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1169                 'description': '',
1170                 'upload_date': '20150404',
1171                 'uploader_id': 'spbelect',
1172                 'uploader': 'Наблюдатели Петербурга',
1173             },
1174             'params': {
1175                 'skip_download': 'requires avconv',
1176             },
1177             'skip': 'This live event has ended.',
1178         },
1179         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1180         {
1181             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1182             'info_dict': {
1183                 'id': 'FIl7x6_3R5Y',
1184                 'ext': 'webm',
1185                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1186                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1187                 'duration': 220,
1188                 'upload_date': '20150625',
1189                 'uploader_id': 'dorappi2000',
1190                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1191                 'uploader': 'dorappi2000',
1192                 'formats': 'mincount:31',
1193             },
1194             'skip': 'not actual anymore',
1195         },
1196         # DASH manifest with segment_list
1197         {
1198             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1199             'md5': '8ce563a1d667b599d21064e982ab9e31',
1200             'info_dict': {
1201                 'id': 'CsmdDsKjzN8',
1202                 'ext': 'mp4',
1203                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1204                 'uploader': 'Airtek',
1205                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1206                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1207                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1208             },
1209             'params': {
1210                 'youtube_include_dash_manifest': True,
1211                 'format': '135',  # bestvideo
1212             },
1213             'skip': 'This live event has ended.',
1214         },
1215         {
1216             # Multifeed videos (multiple cameras), URL is for Main Camera
1217             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1218             'info_dict': {
1219                 'id': 'jvGDaLqkpTg',
1220                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1221                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1222             },
1223             'playlist': [{
1224                 'info_dict': {
1225                     'id': 'jvGDaLqkpTg',
1226                     'ext': 'mp4',
1227                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1228                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1229                     'duration': 10643,
1230                     'upload_date': '20161111',
1231                     'uploader': 'Team PGP',
1232                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1233                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1234                 },
1235             }, {
1236                 'info_dict': {
1237                     'id': '3AKt1R1aDnw',
1238                     'ext': 'mp4',
1239                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1240                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1241                     'duration': 10991,
1242                     'upload_date': '20161111',
1243                     'uploader': 'Team PGP',
1244                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1245                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1246                 },
1247             }, {
1248                 'info_dict': {
1249                     'id': 'RtAMM00gpVc',
1250                     'ext': 'mp4',
1251                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1252                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1253                     'duration': 10995,
1254                     'upload_date': '20161111',
1255                     'uploader': 'Team PGP',
1256                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1257                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1258                 },
1259             }, {
1260                 'info_dict': {
1261                     'id': '6N2fdlP3C5U',
1262                     'ext': 'mp4',
1263                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1264                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1265                     'duration': 10990,
1266                     'upload_date': '20161111',
1267                     'uploader': 'Team PGP',
1268                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1269                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1270                 },
1271             }],
1272             'params': {
1273                 'skip_download': True,
1274             },
1275         },
1276         {
1277             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1278             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1279             'info_dict': {
1280                 'id': 'gVfLd0zydlo',
1281                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1282             },
1283             'playlist_count': 2,
1284             'skip': 'Not multifeed anymore',
1285         },
1286         {
1287             'url': 'https://vid.plus/FlRa-iH7PGw',
1288             'only_matching': True,
1289         },
1290         {
1291             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1292             'only_matching': True,
1293         },
1294         {
1295             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1296             # Also tests cut-off URL expansion in video description (see
1297             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1298             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1299             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1300             'info_dict': {
1301                 'id': 'lsguqyKfVQg',
1302                 'ext': 'mp4',
1303                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1304                 'alt_title': 'Dark Walk - Position Music',
1305                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1306                 'duration': 133,
1307                 'upload_date': '20151119',
1308                 'uploader_id': 'IronSoulElf',
1309                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1310                 'uploader': 'IronSoulElf',
1311                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
1312                 'track': 'Dark Walk - Position Music',
1313                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
1314                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1315             },
1316             'params': {
1317                 'skip_download': True,
1318             },
1319         },
1320         {
1321             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1322             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1323             'only_matching': True,
1324         },
1325         {
1326             # Video with yt:stretch=17:0
1327             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1328             'info_dict': {
1329                 'id': 'Q39EVAstoRM',
1330                 'ext': 'mp4',
1331                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1332                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1333                 'upload_date': '20151107',
1334                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1335                 'uploader': 'CH GAMER DROID',
1336             },
1337             'params': {
1338                 'skip_download': True,
1339             },
1340             'skip': 'This video does not exist.',
1341         },
1342         {
1343             # Video with incomplete 'yt:stretch=16:'
1344             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1345             'only_matching': True,
1346         },
1347         {
1348             # Video licensed under Creative Commons
1349             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1350             'info_dict': {
1351                 'id': 'M4gD1WSo5mA',
1352                 'ext': 'mp4',
1353                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1354                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1355                 'duration': 721,
1356                 'upload_date': '20150127',
1357                 'uploader_id': 'BerkmanCenter',
1358                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1359                 'uploader': 'The Berkman Klein Center for Internet & Society',
1360                 'license': 'Creative Commons Attribution license (reuse allowed)',
1361             },
1362             'params': {
1363                 'skip_download': True,
1364             },
1365         },
1366         {
1367             # Channel-like uploader_url
1368             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1369             'info_dict': {
1370                 'id': 'eQcmzGIKrzg',
1371                 'ext': 'mp4',
1372                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1373                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1374                 'duration': 4060,
1375                 'upload_date': '20151119',
1376                 'uploader': 'Bernie Sanders',
1377                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1378                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1379                 'license': 'Creative Commons Attribution license (reuse allowed)',
1380             },
1381             'params': {
1382                 'skip_download': True,
1383             },
1384         },
1385         {
1386             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1387             'only_matching': True,
1388         },
1389         {
1390             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1391             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1392             'only_matching': True,
1393         },
1394         {
1395             # Rental video preview
1396             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1397             'info_dict': {
1398                 'id': 'uGpuVWrhIzE',
1399                 'ext': 'mp4',
1400                 'title': 'Piku - Trailer',
1401                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1402                 'upload_date': '20150811',
1403                 'uploader': 'FlixMatrix',
1404                 'uploader_id': 'FlixMatrixKaravan',
1405                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1406                 'license': 'Standard YouTube License',
1407             },
1408             'params': {
1409                 'skip_download': True,
1410             },
1411             'skip': 'This video is not available.',
1412         },
1413         {
1414             # YouTube Red video with episode data
1415             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1416             'info_dict': {
1417                 'id': 'iqKdEhx-dD4',
1418                 'ext': 'mp4',
1419                 'title': 'Isolation - Mind Field (Ep 1)',
1420                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1421                 'duration': 2085,
1422                 'upload_date': '20170118',
1423                 'uploader': 'Vsauce',
1424                 'uploader_id': 'Vsauce',
1425                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1426                 'series': 'Mind Field',
1427                 'season_number': 1,
1428                 'episode_number': 1,
1429             },
1430             'params': {
1431                 'skip_download': True,
1432             },
1433             'expected_warnings': [
1434                 'Skipping DASH manifest',
1435             ],
1436         },
1437         {
1438             # The following content has been identified by the YouTube community
1439             # as inappropriate or offensive to some audiences.
1440             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1441             'info_dict': {
1442                 'id': '6SJNVb0GnPI',
1443                 'ext': 'mp4',
1444                 'title': 'Race Differences in Intelligence',
1445                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1446                 'duration': 965,
1447                 'upload_date': '20140124',
1448                 'uploader': 'New Century Foundation',
1449                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1450                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1451             },
1452             'params': {
1453                 'skip_download': True,
1454             },
1455             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1456         },
1457         {
1458             # itag 212
1459             'url': '1t24XAntNCY',
1460             'only_matching': True,
1461         },
1462         {
1463             # geo restricted to JP
1464             'url': 'sJL6WA-aGkQ',
1465             'only_matching': True,
1466         },
1467         {
1468             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1469             'only_matching': True,
1470         },
1471         {
1472             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1473             'only_matching': True,
1474         },
1475         {
1476             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1477             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1478             'only_matching': True,
1479         },
1480         {
1481             # DRM protected
1482             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1483             'only_matching': True,
1484         },
1485         {
1486             # Video with unsupported adaptive stream type formats
1487             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1488             'info_dict': {
1489                 'id': 'Z4Vy8R84T1U',
1490                 'ext': 'mp4',
1491                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1492                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1493                 'duration': 433,
1494                 'upload_date': '20130923',
1495                 'uploader': 'Amelia Putri Harwita',
1496                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1497                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1498                 'formats': 'maxcount:10',
1499             },
1500             'params': {
1501                 'skip_download': True,
1502                 'youtube_include_dash_manifest': False,
1503             },
1504             'skip': 'not actual anymore',
1505         },
1506         {
1507             # Youtube Music Auto-generated description
1508             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1509             'info_dict': {
1510                 'id': 'MgNrAu2pzNs',
1511                 'ext': 'mp4',
1512                 'title': 'Voyeur Girl',
1513                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1514                 'upload_date': '20190312',
1515                 'uploader': 'Stephen - Topic',
1516                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1517                 'artist': 'Stephen',
1518                 'track': 'Voyeur Girl',
1519                 'album': 'it\'s too much love to know my dear',
1520                 'release_date': '20190313',
1521                 'release_year': 2019,
1522             },
1523             'params': {
1524                 'skip_download': True,
1525             },
1526         },
1527         {
1528             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1529             'only_matching': True,
1530         },
1531         {
1532             # invalid -> valid video id redirection
1533             'url': 'DJztXj2GPfl',
1534             'info_dict': {
1535                 'id': 'DJztXj2GPfk',
1536                 'ext': 'mp4',
1537                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1538                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1539                 'upload_date': '20090125',
1540                 'uploader': 'Prochorowka',
1541                 'uploader_id': 'Prochorowka',
1542                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1543                 'artist': 'Panjabi MC',
1544                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1545                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1546             },
1547             'params': {
1548                 'skip_download': True,
1549             },
1550             'skip': 'Video unavailable',
1551         },
1552         {
1553             # empty description results in an empty string
1554             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1555             'info_dict': {
1556                 'id': 'x41yOUIvK2k',
1557                 'ext': 'mp4',
1558                 'title': 'IMG 3456',
1559                 'description': '',
1560                 'upload_date': '20170613',
1561                 'uploader_id': 'ElevageOrVert',
1562                 'uploader': 'ElevageOrVert',
1563             },
1564             'params': {
1565                 'skip_download': True,
1566             },
1567         },
1568         {
1569             # with '};' inside yt initial data (see [1])
1570             # see [2] for an example with '};' inside ytInitialPlayerResponse
1571             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1572             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1573             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1574             'info_dict': {
1575                 'id': 'CHqg6qOn4no',
1576                 'ext': 'mp4',
1577                 'title': 'Part 77   Sort a list of simple types in c#',
1578                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1579                 'upload_date': '20130831',
1580                 'uploader_id': 'kudvenkat',
1581                 'uploader': 'kudvenkat',
1582             },
1583             'params': {
1584                 'skip_download': True,
1585             },
1586         },
1587         {
1588             # another example of '};' in ytInitialData
1589             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1590             'only_matching': True,
1591         },
1592         {
1593             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1594             'only_matching': True,
1595         },
1596         {
1597             # https://github.com/ytdl-org/youtube-dl/pull/28094
1598             'url': 'OtqTfy26tG0',
1599             'info_dict': {
1600                 'id': 'OtqTfy26tG0',
1601                 'ext': 'mp4',
1602                 'title': 'Burn Out',
1603                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1604                 'upload_date': '20141120',
1605                 'uploader': 'The Cinematic Orchestra - Topic',
1606                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1607                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1608                 'artist': 'The Cinematic Orchestra',
1609                 'track': 'Burn Out',
1610                 'album': 'Every Day',
1611                 'release_data': None,
1612                 'release_year': None,
1613             },
1614             'params': {
1615                 'skip_download': True,
1616             },
1617         },
1618         {
1619             # controversial video, only works with bpctr when authenticated with cookies
1620             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1621             'only_matching': True,
1622         },
1623         {
1624             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1625             'url': 'cBvYw8_A0vQ',
1626             'info_dict': {
1627                 'id': 'cBvYw8_A0vQ',
1628                 'ext': 'mp4',
1629                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1630                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1631                 'upload_date': '20201120',
1632                 'uploader': 'Walk around Japan',
1633                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1634                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1635             },
1636             'params': {
1637                 'skip_download': True,
1638             },
1639         }, {
1640             # Has multiple audio streams
1641             'url': 'WaOKSUlf4TM',
1642             'only_matching': True
1643         }, {
1644             # Requires Premium: has format 141 when requested using YTM url
1645             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1646             'only_matching': True
1647         }, {
1648             # multiple subtitles with same lang_code
1649             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1650             'only_matching': True,
1651         }, {
1652             # Force use android client fallback
1653             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1654             'info_dict': {
1655                 'id': 'YOelRv7fMxY',
1656                 'title': 'Digging a Secret Tunnel from my Workshop',
1657                 'ext': '3gp',
1658                 'upload_date': '20210624',
1659                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1660                 'uploader': 'colinfurze',
1661                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1662                 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1663             },
1664             'params': {
1665                 'format': '17',  # 3gp format available on android
1666                 'extractor_args': {'youtube': {'player_client': ['android']}},
1667             },
1668         },
1669         {
1670             # Skip download of additional client configs (remix client config in this case)
1671             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1672             'only_matching': True,
1673             'params': {
1674                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1675             },
1676         }
1677     ]
1678
1679     @classmethod
1680     def suitable(cls, url):
1681         # Hack for lazy extractors until more generic solution is implemented
1682         # (see #28780)
1683         from .youtube import parse_qs
1684         qs = parse_qs(url)
1685         if qs.get('list', [None])[0]:
1686             return False
1687         return super(YoutubeIE, cls).suitable(url)
1688
1689     def __init__(self, *args, **kwargs):
1690         super(YoutubeIE, self).__init__(*args, **kwargs)
1691         self._code_cache = {}
1692         self._player_cache = {}
1693
1694     def _extract_player_url(self, ytcfg=None, webpage=None):
1695         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1696         if not player_url:
1697             player_url = self._search_regex(
1698                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1699                 webpage, 'player URL', fatal=False)
1700         if player_url.startswith('//'):
1701             player_url = 'https:' + player_url
1702         elif not re.match(r'https?://', player_url):
1703             player_url = compat_urlparse.urljoin(
1704                 'https://www.youtube.com', player_url)
1705         return player_url
1706
1707     def _signature_cache_id(self, example_sig):
1708         """ Return a string representation of a signature """
1709         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1710
1711     @classmethod
1712     def _extract_player_info(cls, player_url):
1713         for player_re in cls._PLAYER_INFO_RE:
1714             id_m = re.search(player_re, player_url)
1715             if id_m:
1716                 break
1717         else:
1718             raise ExtractorError('Cannot identify player %r' % player_url)
1719         return id_m.group('id')
1720
1721     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1722         player_id = self._extract_player_info(player_url)
1723         if player_id not in self._code_cache:
1724             self._code_cache[player_id] = self._download_webpage(
1725                 player_url, video_id, fatal=fatal,
1726                 note='Downloading player ' + player_id,
1727                 errnote='Download of %s failed' % player_url)
1728         return player_id in self._code_cache
1729
1730     def _extract_signature_function(self, video_id, player_url, example_sig):
1731         player_id = self._extract_player_info(player_url)
1732
1733         # Read from filesystem cache
1734         func_id = 'js_%s_%s' % (
1735             player_id, self._signature_cache_id(example_sig))
1736         assert os.path.basename(func_id) == func_id
1737
1738         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1739         if cache_spec is not None:
1740             return lambda s: ''.join(s[i] for i in cache_spec)
1741
1742         if self._load_player(video_id, player_url):
1743             code = self._code_cache[player_id]
1744             res = self._parse_sig_js(code)
1745
1746             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1747             cache_res = res(test_string)
1748             cache_spec = [ord(c) for c in cache_res]
1749
1750             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1751             return res
1752
1753     def _print_sig_code(self, func, example_sig):
1754         def gen_sig_code(idxs):
1755             def _genslice(start, end, step):
1756                 starts = '' if start == 0 else str(start)
1757                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1758                 steps = '' if step == 1 else (':%d' % step)
1759                 return 's[%s%s%s]' % (starts, ends, steps)
1760
1761             step = None
1762             # Quelch pyflakes warnings - start will be set when step is set
1763             start = '(Never used)'
1764             for i, prev in zip(idxs[1:], idxs[:-1]):
1765                 if step is not None:
1766                     if i - prev == step:
1767                         continue
1768                     yield _genslice(start, prev, step)
1769                     step = None
1770                     continue
1771                 if i - prev in [-1, 1]:
1772                     step = i - prev
1773                     start = prev
1774                     continue
1775                 else:
1776                     yield 's[%d]' % prev
1777             if step is None:
1778                 yield 's[%d]' % i
1779             else:
1780                 yield _genslice(start, i, step)
1781
1782         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1783         cache_res = func(test_string)
1784         cache_spec = [ord(c) for c in cache_res]
1785         expr_code = ' + '.join(gen_sig_code(cache_spec))
1786         signature_id_tuple = '(%s)' % (
1787             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1788         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1789                 '    return %s\n') % (signature_id_tuple, expr_code)
1790         self.to_screen('Extracted signature function:\n' + code)
1791
1792     def _parse_sig_js(self, jscode):
1793         funcname = self._search_regex(
1794             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1795              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1796              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1797              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1798              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1799              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1800              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1801              # Obsolete patterns
1802              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1803              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1804              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1805              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1806              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1807              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1808              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1809              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1810             jscode, 'Initial JS player signature function name', group='sig')
1811
1812         jsi = JSInterpreter(jscode)
1813         initial_function = jsi.extract_function(funcname)
1814         return lambda s: initial_function([s])
1815
1816     def _decrypt_signature(self, s, video_id, player_url):
1817         """Turn the encrypted s field into a working signature"""
1818
1819         if player_url is None:
1820             raise ExtractorError('Cannot decrypt signature without player_url')
1821
1822         try:
1823             player_id = (player_url, self._signature_cache_id(s))
1824             if player_id not in self._player_cache:
1825                 func = self._extract_signature_function(
1826                     video_id, player_url, s
1827                 )
1828                 self._player_cache[player_id] = func
1829             func = self._player_cache[player_id]
1830             if self.get_param('youtube_print_sig_code'):
1831                 self._print_sig_code(func, s)
1832             return func(s)
1833         except Exception as e:
1834             tb = traceback.format_exc()
1835             raise ExtractorError(
1836                 'Signature extraction failed: ' + tb, cause=e)
1837
1838     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1839         """
1840         Extract signatureTimestamp (sts)
1841         Required to tell API what sig/player version is in use.
1842         """
1843         sts = None
1844         if isinstance(ytcfg, dict):
1845             sts = int_or_none(ytcfg.get('STS'))
1846
1847         if not sts:
1848             # Attempt to extract from player
1849             if player_url is None:
1850                 error_msg = 'Cannot extract signature timestamp without player_url.'
1851                 if fatal:
1852                     raise ExtractorError(error_msg)
1853                 self.report_warning(error_msg)
1854                 return
1855             if self._load_player(video_id, player_url, fatal=fatal):
1856                 player_id = self._extract_player_info(player_url)
1857                 code = self._code_cache[player_id]
1858                 sts = int_or_none(self._search_regex(
1859                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1860                     'JS player signature timestamp', group='sts', fatal=fatal))
1861         return sts
1862
1863     def _mark_watched(self, video_id, player_response):
1864         playback_url = url_or_none(try_get(
1865             player_response,
1866             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
1867         if not playback_url:
1868             return
1869         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1870         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1871
1872         # cpn generation algorithm is reverse engineered from base.js.
1873         # In fact it works even with dummy cpn.
1874         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1875         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1876
1877         qs.update({
1878             'ver': ['2'],
1879             'cpn': [cpn],
1880         })
1881         playback_url = compat_urlparse.urlunparse(
1882             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1883
1884         self._download_webpage(
1885             playback_url, video_id, 'Marking watched',
1886             'Unable to mark watched', fatal=False)
1887
1888     @staticmethod
1889     def _extract_urls(webpage):
1890         # Embedded YouTube player
1891         entries = [
1892             unescapeHTML(mobj.group('url'))
1893             for mobj in re.finditer(r'''(?x)
1894             (?:
1895                 <iframe[^>]+?src=|
1896                 data-video-url=|
1897                 <embed[^>]+?src=|
1898                 embedSWF\(?:\s*|
1899                 <object[^>]+data=|
1900                 new\s+SWFObject\(
1901             )
1902             (["\'])
1903                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1904                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1905             \1''', webpage)]
1906
1907         # lazyYT YouTube embed
1908         entries.extend(list(map(
1909             unescapeHTML,
1910             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1911
1912         # Wordpress "YouTube Video Importer" plugin
1913         matches = re.findall(r'''(?x)<div[^>]+
1914             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1915             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1916         entries.extend(m[-1] for m in matches)
1917
1918         return entries
1919
1920     @staticmethod
1921     def _extract_url(webpage):
1922         urls = YoutubeIE._extract_urls(webpage)
1923         return urls[0] if urls else None
1924
1925     @classmethod
1926     def extract_id(cls, url):
1927         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1928         if mobj is None:
1929             raise ExtractorError('Invalid URL: %s' % url)
1930         video_id = mobj.group(2)
1931         return video_id
1932
1933     def _extract_chapters_from_json(self, data, video_id, duration):
1934         chapters_list = try_get(
1935             data,
1936             lambda x: x['playerOverlays']
1937                        ['playerOverlayRenderer']
1938                        ['decoratedPlayerBarRenderer']
1939                        ['decoratedPlayerBarRenderer']
1940                        ['playerBar']
1941                        ['chapteredPlayerBarRenderer']
1942                        ['chapters'],
1943             list)
1944         if not chapters_list:
1945             return
1946
1947         def chapter_time(chapter):
1948             return float_or_none(
1949                 try_get(
1950                     chapter,
1951                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1952                     int),
1953                 scale=1000)
1954         chapters = []
1955         for next_num, chapter in enumerate(chapters_list, start=1):
1956             start_time = chapter_time(chapter)
1957             if start_time is None:
1958                 continue
1959             end_time = (chapter_time(chapters_list[next_num])
1960                         if next_num < len(chapters_list) else duration)
1961             if end_time is None:
1962                 continue
1963             title = try_get(
1964                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1965                 compat_str)
1966             chapters.append({
1967                 'start_time': start_time,
1968                 'end_time': end_time,
1969                 'title': title,
1970             })
1971         return chapters
1972
1973     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1974         return self._parse_json(self._search_regex(
1975             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1976              regex), webpage, name, default='{}'), video_id, fatal=False)
1977
1978     @staticmethod
1979     def parse_time_text(time_text):
1980         """
1981         Parse the comment time text
1982         time_text is in the format 'X units ago (edited)'
1983         """
1984         time_text_split = time_text.split(' ')
1985         if len(time_text_split) >= 3:
1986             return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1987
1988     def _extract_comment(self, comment_renderer, parent=None):
1989         comment_id = comment_renderer.get('commentId')
1990         if not comment_id:
1991             return
1992
1993         text = self._get_text(comment_renderer.get('contentText'))
1994
1995         # note: timestamp is an estimate calculated from the current time and time_text
1996         time_text = self._get_text(comment_renderer.get('publishedTimeText')) or ''
1997         time_text_dt = self.parse_time_text(time_text)
1998         if isinstance(time_text_dt, datetime.datetime):
1999             timestamp = calendar.timegm(time_text_dt.timetuple())
2000         author = self._get_text(comment_renderer.get('authorText'))
2001         author_id = try_get(comment_renderer,
2002                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2003
2004         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2005                                                        lambda x: x['likeCount']), compat_str)) or 0
2006         author_thumbnail = try_get(comment_renderer,
2007                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2008
2009         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2010         is_favorited = 'creatorHeart' in (try_get(
2011             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2012         return {
2013             'id': comment_id,
2014             'text': text,
2015             'timestamp': timestamp,
2016             'time_text': time_text,
2017             'like_count': votes,
2018             'is_favorited': is_favorited,
2019             'author': author,
2020             'author_id': author_id,
2021             'author_thumbnail': author_thumbnail,
2022             'author_is_uploader': author_is_uploader,
2023             'parent': parent or 'root'
2024         }
2025
2026     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2027                          ytcfg, video_id, parent=None, comment_counts=None):
2028
2029         def extract_header(contents):
2030             _total_comments = 0
2031             _continuation = None
2032             for content in contents:
2033                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2034                 expected_comment_count = parse_count(self._get_text(
2035                     comments_header_renderer, (lambda x: x['countText'], lambda x: x['commentsCount']), max_runs=1))
2036
2037                 if expected_comment_count:
2038                     comment_counts[1] = expected_comment_count
2039                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2040                     _total_comments = comment_counts[1]
2041                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2042                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2043
2044                 sort_menu_item = try_get(
2045                     comments_header_renderer,
2046                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2047                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2048
2049                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2050                 if not _continuation:
2051                     continue
2052
2053                 sort_text = sort_menu_item.get('title')
2054                 if isinstance(sort_text, compat_str):
2055                     sort_text = sort_text.lower()
2056                 else:
2057                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2058                 self.to_screen('Sorting comments by %s' % sort_text)
2059                 break
2060             return _total_comments, _continuation
2061
2062         def extract_thread(contents):
2063             if not parent:
2064                 comment_counts[2] = 0
2065             for content in contents:
2066                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2067                 comment_renderer = try_get(
2068                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2069                     content, (lambda x: x['commentRenderer'], dict))
2070
2071                 if not comment_renderer:
2072                     continue
2073                 comment = self._extract_comment(comment_renderer, parent)
2074                 if not comment:
2075                     continue
2076                 comment_counts[0] += 1
2077                 yield comment
2078                 # Attempt to get the replies
2079                 comment_replies_renderer = try_get(
2080                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2081
2082                 if comment_replies_renderer:
2083                     comment_counts[2] += 1
2084                     comment_entries_iter = self._comment_entries(
2085                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2086                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2087
2088                     for reply_comment in comment_entries_iter:
2089                         yield reply_comment
2090
2091         # YouTube comments have a max depth of 2
2092         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2093         if max_depth == 1 and parent:
2094             return
2095         if not comment_counts:
2096             # comment so far, est. total comments, current comment thread #
2097             comment_counts = [0, 0, 0]
2098
2099         continuation = self._extract_continuation(root_continuation_data)
2100         if continuation and len(continuation['continuation']) < 27:
2101             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2102             continuation_token = self._generate_comment_continuation(video_id)
2103             continuation = self._build_api_continuation_query(continuation_token, None)
2104
2105         visitor_data = None
2106         is_first_continuation = parent is None
2107
2108         for page_num in itertools.count(0):
2109             if not continuation:
2110                 break
2111             headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2112             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2113             if page_num == 0:
2114                 if is_first_continuation:
2115                     note_prefix = 'Downloading comment section API JSON'
2116                 else:
2117                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2118                         comment_counts[2], comment_prog_str)
2119             else:
2120                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2121                     '       ' if parent else '', ' replies' if parent else '',
2122                     page_num, comment_prog_str)
2123
2124             response = self._extract_response(
2125                 item_id=None, query=continuation,
2126                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2127                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2128             if not response:
2129                 break
2130             visitor_data = try_get(
2131                 response,
2132                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2133                 compat_str) or visitor_data
2134
2135             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2136
2137             continuation = None
2138             if isinstance(continuation_contents, list):
2139                 for continuation_section in continuation_contents:
2140                     if not isinstance(continuation_section, dict):
2141                         continue
2142                     continuation_items = try_get(
2143                         continuation_section,
2144                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2145                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2146                         list) or []
2147                     if is_first_continuation:
2148                         total_comments, continuation = extract_header(continuation_items)
2149                         if total_comments:
2150                             yield total_comments
2151                         is_first_continuation = False
2152                         if continuation:
2153                             break
2154                         continue
2155                     count = 0
2156                     for count, entry in enumerate(extract_thread(continuation_items)):
2157                         yield entry
2158                     continuation = self._extract_continuation({'contents': continuation_items})
2159                     if continuation:
2160                         # Sometimes YouTube provides a continuation without any comments
2161                         # In most cases we end up just downloading these with very little comments to come.
2162                         if count == 0:
2163                             if not parent:
2164                                 self.report_warning('No comments received - assuming end of comments')
2165                             continuation = None
2166                         break
2167
2168             # Deprecated response structure
2169             elif isinstance(continuation_contents, dict):
2170                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2171                 for key, continuation_renderer in continuation_contents.items():
2172                     if key not in known_continuation_renderers:
2173                         continue
2174                     if not isinstance(continuation_renderer, dict):
2175                         continue
2176                     if is_first_continuation:
2177                         header_continuation_items = [continuation_renderer.get('header') or {}]
2178                         total_comments, continuation = extract_header(header_continuation_items)
2179                         if total_comments:
2180                             yield total_comments
2181                         is_first_continuation = False
2182                         if continuation:
2183                             break
2184
2185                     # Sometimes YouTube provides a continuation without any comments
2186                     # In most cases we end up just downloading these with very little comments to come.
2187                     count = 0
2188                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2189                         yield entry
2190                     continuation = self._extract_continuation(continuation_renderer)
2191                     if count == 0:
2192                         if not parent:
2193                             self.report_warning('No comments received - assuming end of comments')
2194                         continuation = None
2195                     break
2196
2197     @staticmethod
2198     def _generate_comment_continuation(video_id):
2199         """
2200         Generates initial comment section continuation token from given video id
2201         """
2202         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2203         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2204         new_continuation_intlist = list(itertools.chain.from_iterable(
2205             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2206         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2207
2208     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2209         """Entry for comment extraction"""
2210         def _real_comment_extract(contents):
2211             if isinstance(contents, list):
2212                 for entry in contents:
2213                     for key, renderer in entry.items():
2214                         if key not in known_entry_comment_renderers:
2215                             continue
2216                         yield from self._comment_entries(
2217                             renderer, video_id=video_id, ytcfg=ytcfg,
2218                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2219                             account_syncid=self._extract_account_syncid(ytcfg))
2220                         break
2221         comments = []
2222         known_entry_comment_renderers = ('itemSectionRenderer',)
2223         estimated_total = 0
2224         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2225
2226         try:
2227             for comment in _real_comment_extract(contents):
2228                 if len(comments) >= max_comments:
2229                     break
2230                 if isinstance(comment, int):
2231                     estimated_total = comment
2232                     continue
2233                 comments.append(comment)
2234         except KeyboardInterrupt:
2235             self.to_screen('Interrupted by user')
2236         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2237         return {
2238             'comments': comments,
2239             'comment_count': len(comments),
2240         }
2241
2242     @staticmethod
2243     def _generate_player_context(sts=None):
2244         context = {
2245             'html5Preference': 'HTML5_PREF_WANTS',
2246         }
2247         if sts is not None:
2248             context['signatureTimestamp'] = sts
2249         return {
2250             'playbackContext': {
2251                 'contentPlaybackContext': context
2252             }
2253         }
2254
2255     @staticmethod
2256     def _get_video_info_params(video_id, client='TVHTML5'):
2257         GVI_CLIENTS = {
2258             'ANDROID': {
2259                 'c': 'ANDROID',
2260                 'cver': '16.20',
2261             },
2262             'TVHTML5': {
2263                 'c': 'TVHTML5',
2264                 'cver': '6.20180913',
2265             }
2266         }
2267         query = {
2268             'video_id': video_id,
2269             'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2270             'html5': '1'
2271         }
2272         query.update(GVI_CLIENTS.get(client))
2273         return query
2274
2275     def _real_extract(self, url):
2276         url, smuggled_data = unsmuggle_url(url, {})
2277         video_id = self._match_id(url)
2278
2279         is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2280
2281         base_url = self.http_scheme() + '//www.youtube.com/'
2282         webpage_url = base_url + 'watch?v=' + video_id
2283         webpage = self._download_webpage(
2284             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2285
2286         ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2287         identity_token = self._extract_identity_token(webpage, video_id)
2288         session_index = self._extract_session_index(ytcfg)
2289         player_url = self._extract_player_url(ytcfg, webpage)
2290
2291         player_client = self._configuration_arg('player_client', [''])[0]
2292         if player_client not in ('web', 'android', ''):
2293             self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2294         force_mobile_client = player_client != 'web'
2295         player_skip = self._configuration_arg('player_skip')
2296         player_response = None
2297         if webpage:
2298             player_response = self._extract_yt_initial_variable(
2299                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2300                 video_id, 'initial player response')
2301
2302         syncid = self._extract_account_syncid(ytcfg, player_response)
2303         headers = self._generate_api_headers(ytcfg, identity_token, syncid, session_index=session_index)
2304
2305         ytm_streaming_data = {}
2306         if is_music_url:
2307             ytm_webpage = None
2308             sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2309             if sts and not force_mobile_client and 'configs' not in player_skip:
2310                 ytm_webpage = self._download_webpage(
2311                     'https://music.youtube.com',
2312                     video_id, fatal=False, note='Downloading remix client config')
2313
2314             ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2315             ytm_client = 'WEB_REMIX'
2316             if not sts or force_mobile_client:
2317                 # Android client already has signature descrambled
2318                 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2319                 if not sts:
2320                     self.report_warning('Falling back to android remix client for player API.')
2321                 ytm_client = 'ANDROID_MUSIC'
2322                 ytm_cfg = {}
2323
2324             ytm_headers = self._generate_api_headers(
2325                 ytm_cfg, identity_token, syncid,
2326                 client=ytm_client, session_index=session_index)
2327             ytm_query = {'videoId': video_id}
2328             ytm_query.update(self._generate_player_context(sts))
2329
2330             ytm_player_response = self._extract_response(
2331                 item_id=video_id, ep='player', query=ytm_query,
2332                 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2333                 default_client=ytm_client,
2334                 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2335             ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
2336
2337         if not player_response or force_mobile_client:
2338             sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2339             yt_client = 'WEB'
2340             ytpcfg = ytcfg
2341             ytp_headers = headers
2342             if not sts or force_mobile_client:
2343                 # Android client already has signature descrambled
2344                 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2345                 if not sts:
2346                     self.report_warning('Falling back to android client for player API.')
2347                 yt_client = 'ANDROID'
2348                 ytpcfg = {}
2349                 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid,
2350                                                          client=yt_client, session_index=session_index)
2351
2352             yt_query = {'videoId': video_id}
2353             yt_query.update(self._generate_player_context(sts))
2354             player_response = self._extract_response(
2355                 item_id=video_id, ep='player', query=yt_query,
2356                 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2357                 default_client=yt_client,
2358                 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2359             ) or player_response
2360
2361         # Age-gate workarounds
2362         playability_status = player_response.get('playabilityStatus') or {}
2363         if playability_status.get('reason') in self._AGE_GATE_REASONS:
2364             gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2365             for gvi_client in gvi_clients:
2366                 pr = self._parse_json(try_get(compat_parse_qs(
2367                     self._download_webpage(
2368                         base_url + 'get_video_info', video_id,
2369                         'Refetching age-gated %s info webpage' % gvi_client.lower(),
2370                         'unable to download video info webpage', fatal=False,
2371                         query=self._get_video_info_params(video_id, client=gvi_client))),
2372                     lambda x: x['player_response'][0],
2373                     compat_str) or '{}', video_id)
2374                 if pr:
2375                     break
2376             if not pr:
2377                 self.report_warning('Falling back to embedded-only age-gate workaround.')
2378                 embed_webpage = None
2379                 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2380                 if sts and not force_mobile_client and 'configs' not in player_skip:
2381                     embed_webpage = self._download_webpage(
2382                         'https://www.youtube.com/embed/%s?html5=1' % video_id,
2383                         video_id=video_id, note='Downloading age-gated embed config')
2384
2385                 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2386                 # If we extracted the embed webpage, it'll tell us if we can view the video
2387                 embedded_pr = self._parse_json(
2388                     try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2389                     video_id=video_id)
2390                 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2391                 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2392                     yt_client = 'WEB_EMBEDDED_PLAYER'
2393                     if not sts or force_mobile_client:
2394                         # Android client already has signature descrambled
2395                         # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2396                         if not sts:
2397                             self.report_warning(
2398                                 'Falling back to android embedded client for player API (note: some formats may be missing).')
2399                         yt_client = 'ANDROID_EMBEDDED_PLAYER'
2400                         ytcfg_age = {}
2401
2402                     ytage_headers = self._generate_api_headers(
2403                         ytcfg_age, identity_token, syncid,
2404                         client=yt_client, session_index=session_index)
2405                     yt_age_query = {'videoId': video_id}
2406                     yt_age_query.update(self._generate_player_context(sts))
2407                     pr = self._extract_response(
2408                         item_id=video_id, ep='player', query=yt_age_query,
2409                         ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2410                         default_client=yt_client,
2411                         note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
2412                     ) or {}
2413
2414             if pr:
2415                 player_response = pr
2416
2417         trailer_video_id = try_get(
2418             playability_status,
2419             lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2420             compat_str)
2421         if trailer_video_id:
2422             return self.url_result(
2423                 trailer_video_id, self.ie_key(), trailer_video_id)
2424
2425         search_meta = (
2426             lambda x: self._html_search_meta(x, webpage, default=None)) \
2427             if webpage else lambda x: None
2428
2429         video_details = player_response.get('videoDetails') or {}
2430         microformat = try_get(
2431             player_response,
2432             lambda x: x['microformat']['playerMicroformatRenderer'],
2433             dict) or {}
2434         video_title = video_details.get('title') \
2435             or self._get_text(microformat.get('title')) \
2436             or search_meta(['og:title', 'twitter:title', 'title'])
2437         video_description = video_details.get('shortDescription')
2438
2439         if not smuggled_data.get('force_singlefeed', False):
2440             if not self.get_param('noplaylist'):
2441                 multifeed_metadata_list = try_get(
2442                     player_response,
2443                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2444                     compat_str)
2445                 if multifeed_metadata_list:
2446                     entries = []
2447                     feed_ids = []
2448                     for feed in multifeed_metadata_list.split(','):
2449                         # Unquote should take place before split on comma (,) since textual
2450                         # fields may contain comma as well (see
2451                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2452                         feed_data = compat_parse_qs(
2453                             compat_urllib_parse_unquote_plus(feed))
2454
2455                         def feed_entry(name):
2456                             return try_get(
2457                                 feed_data, lambda x: x[name][0], compat_str)
2458
2459                         feed_id = feed_entry('id')
2460                         if not feed_id:
2461                             continue
2462                         feed_title = feed_entry('title')
2463                         title = video_title
2464                         if feed_title:
2465                             title += ' (%s)' % feed_title
2466                         entries.append({
2467                             '_type': 'url_transparent',
2468                             'ie_key': 'Youtube',
2469                             'url': smuggle_url(
2470                                 base_url + 'watch?v=' + feed_data['id'][0],
2471                                 {'force_singlefeed': True}),
2472                             'title': title,
2473                         })
2474                         feed_ids.append(feed_id)
2475                     self.to_screen(
2476                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2477                         % (', '.join(feed_ids), video_id))
2478                     return self.playlist_result(
2479                         entries, video_id, video_title, video_description)
2480             else:
2481                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2482
2483         formats, itags, stream_ids = [], [], []
2484         itag_qualities = {}
2485         q = qualities([
2486             # "tiny" is the smallest video-only format. But some audio-only formats
2487             # was also labeled "tiny". It is not clear if such formats still exist
2488             'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2489             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2490         ])
2491
2492         streaming_data = player_response.get('streamingData') or {}
2493         streaming_formats = streaming_data.get('formats') or []
2494         streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
2495         streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2496         streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2497
2498         for fmt in streaming_formats:
2499             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2500                 continue
2501
2502             itag = str_or_none(fmt.get('itag'))
2503             audio_track = fmt.get('audioTrack') or {}
2504             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2505             if stream_id in stream_ids:
2506                 continue
2507
2508             quality = fmt.get('quality')
2509             if quality == 'tiny' or not quality:
2510                 quality = fmt.get('audioQuality', '').lower() or quality
2511             if itag and quality:
2512                 itag_qualities[itag] = quality
2513             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2514             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2515             # number of fragment that would subsequently requested with (`&sq=N`)
2516             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2517                 continue
2518
2519             fmt_url = fmt.get('url')
2520             if not fmt_url:
2521                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2522                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2523                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2524                 if not (sc and fmt_url and encrypted_sig):
2525                     continue
2526                 if not player_url:
2527                     continue
2528                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2529                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2530                 fmt_url += '&' + sp + '=' + signature
2531
2532             if itag:
2533                 itags.append(itag)
2534                 stream_ids.append(stream_id)
2535
2536             tbr = float_or_none(
2537                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2538             dct = {
2539                 'asr': int_or_none(fmt.get('audioSampleRate')),
2540                 'filesize': int_or_none(fmt.get('contentLength')),
2541                 'format_id': itag,
2542                 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
2543                 'fps': int_or_none(fmt.get('fps')),
2544                 'height': int_or_none(fmt.get('height')),
2545                 'quality': q(quality),
2546                 'tbr': tbr,
2547                 'url': fmt_url,
2548                 'width': fmt.get('width'),
2549                 'language': audio_track.get('id', '').split('.')[0],
2550             }
2551             mime_mobj = re.match(
2552                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2553             if mime_mobj:
2554                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2555                 dct.update(parse_codecs(mime_mobj.group(2)))
2556                 # The 3gp format in android client has a quality of "small",
2557                 # but is actually worse than all other formats
2558                 if dct['ext'] == '3gp':
2559                     dct['quality'] = q('tiny')
2560             no_audio = dct.get('acodec') == 'none'
2561             no_video = dct.get('vcodec') == 'none'
2562             if no_audio:
2563                 dct['vbr'] = tbr
2564             if no_video:
2565                 dct['abr'] = tbr
2566             if no_audio or no_video:
2567                 dct['downloader_options'] = {
2568                     # Youtube throttles chunks >~10M
2569                     'http_chunk_size': 10485760,
2570                 }
2571                 if dct.get('ext'):
2572                     dct['container'] = dct['ext'] + '_dash'
2573             formats.append(dct)
2574
2575         skip_manifests = self._configuration_arg('skip')
2576         get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2577         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2578
2579         for sd in (streaming_data, ytm_streaming_data):
2580             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2581             if hls_manifest_url:
2582                 for f in self._extract_m3u8_formats(
2583                         hls_manifest_url, video_id, 'mp4', fatal=False):
2584                     itag = self._search_regex(
2585                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2586                     if itag:
2587                         f['format_id'] = itag
2588                     formats.append(f)
2589
2590             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2591             if dash_manifest_url:
2592                 for f in self._extract_mpd_formats(
2593                         dash_manifest_url, video_id, fatal=False):
2594                     itag = f['format_id']
2595                     if itag in itags:
2596                         continue
2597                     if itag in itag_qualities:
2598                         f['quality'] = q(itag_qualities[itag])
2599                     filesize = int_or_none(self._search_regex(
2600                         r'/clen/(\d+)', f.get('fragment_base_url')
2601                         or f['url'], 'file size', default=None))
2602                     if filesize:
2603                         f['filesize'] = filesize
2604                     formats.append(f)
2605
2606         if not formats:
2607             if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
2608                 self.raise_no_formats(
2609                     'This video is DRM protected.', expected=True)
2610             pemr = try_get(
2611                 playability_status,
2612                 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2613                 dict) or {}
2614             reason = self._get_text(pemr.get('reason')) or playability_status.get('reason')
2615             subreason = pemr.get('subreason')
2616             if subreason:
2617                 subreason = clean_html(self._get_text(subreason))
2618                 if subreason == 'The uploader has not made this video available in your country.':
2619                     countries = microformat.get('availableCountries')
2620                     if not countries:
2621                         regions_allowed = search_meta('regionsAllowed')
2622                         countries = regions_allowed.split(',') if regions_allowed else None
2623                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2624                 reason += '\n' + subreason
2625             if reason:
2626                 self.raise_no_formats(reason, expected=True)
2627
2628         self._sort_formats(formats)
2629
2630         keywords = video_details.get('keywords') or []
2631         if not keywords and webpage:
2632             keywords = [
2633                 unescapeHTML(m.group('content'))
2634                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2635         for keyword in keywords:
2636             if keyword.startswith('yt:stretch='):
2637                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2638                 if mobj:
2639                     # NB: float is intentional for forcing float division
2640                     w, h = (float(v) for v in mobj.groups())
2641                     if w > 0 and h > 0:
2642                         ratio = w / h
2643                         for f in formats:
2644                             if f.get('vcodec') != 'none':
2645                                 f['stretched_ratio'] = ratio
2646                         break
2647
2648         thumbnails = []
2649         for container in (video_details, microformat):
2650             for thumbnail in (try_get(
2651                     container,
2652                     lambda x: x['thumbnail']['thumbnails'], list) or []):
2653                 thumbnail_url = thumbnail.get('url')
2654                 if not thumbnail_url:
2655                     continue
2656                 # Sometimes youtube gives a wrong thumbnail URL. See:
2657                 # https://github.com/yt-dlp/yt-dlp/issues/233
2658                 # https://github.com/ytdl-org/youtube-dl/issues/28023
2659                 if 'maxresdefault' in thumbnail_url:
2660                     thumbnail_url = thumbnail_url.split('?')[0]
2661                 thumbnails.append({
2662                     'url': thumbnail_url,
2663                     'height': int_or_none(thumbnail.get('height')),
2664                     'width': int_or_none(thumbnail.get('width')),
2665                     'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2666                 })
2667         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2668         if thumbnail_url:
2669             thumbnails.append({
2670                 'url': thumbnail_url,
2671                 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2672             })
2673         # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2674         # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2675         thumbnails.append({
2676             'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2677             'preference': 1,
2678         })
2679         self._remove_duplicate_formats(thumbnails)
2680
2681         category = microformat.get('category') or search_meta('genre')
2682         channel_id = video_details.get('channelId') \
2683             or microformat.get('externalChannelId') \
2684             or search_meta('channelId')
2685         duration = int_or_none(
2686             video_details.get('lengthSeconds')
2687             or microformat.get('lengthSeconds')) \
2688             or parse_duration(search_meta('duration'))
2689         is_live = video_details.get('isLive')
2690         is_upcoming = video_details.get('isUpcoming')
2691         owner_profile_url = microformat.get('ownerProfileUrl')
2692
2693         info = {
2694             'id': video_id,
2695             'title': self._live_title(video_title) if is_live else video_title,
2696             'formats': formats,
2697             'thumbnails': thumbnails,
2698             'description': video_description,
2699             'upload_date': unified_strdate(
2700                 microformat.get('uploadDate')
2701                 or search_meta('uploadDate')),
2702             'uploader': video_details['author'],
2703             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2704             'uploader_url': owner_profile_url,
2705             'channel_id': channel_id,
2706             'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2707             'duration': duration,
2708             'view_count': int_or_none(
2709                 video_details.get('viewCount')
2710                 or microformat.get('viewCount')
2711                 or search_meta('interactionCount')),
2712             'average_rating': float_or_none(video_details.get('averageRating')),
2713             'age_limit': 18 if (
2714                 microformat.get('isFamilySafe') is False
2715                 or search_meta('isFamilyFriendly') == 'false'
2716                 or search_meta('og:restrictions:age') == '18+') else 0,
2717             'webpage_url': webpage_url,
2718             'categories': [category] if category else None,
2719             'tags': keywords,
2720             'is_live': is_live,
2721             'playable_in_embed': playability_status.get('playableInEmbed'),
2722             'was_live': video_details.get('isLiveContent'),
2723         }
2724
2725         pctr = try_get(
2726             player_response,
2727             lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2728         subtitles = {}
2729         if pctr:
2730             def process_language(container, base_url, lang_code, sub_name, query):
2731                 lang_subs = container.setdefault(lang_code, [])
2732                 for fmt in self._SUBTITLE_FORMATS:
2733                     query.update({
2734                         'fmt': fmt,
2735                     })
2736                     lang_subs.append({
2737                         'ext': fmt,
2738                         'url': update_url_query(base_url, query),
2739                         'name': sub_name,
2740                     })
2741
2742             for caption_track in (pctr.get('captionTracks') or []):
2743                 base_url = caption_track.get('baseUrl')
2744                 if not base_url:
2745                     continue
2746                 if caption_track.get('kind') != 'asr':
2747                     lang_code = (
2748                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2749                         or caption_track.get('languageCode'))
2750                     if not lang_code:
2751                         continue
2752                     process_language(
2753                         subtitles, base_url, lang_code,
2754                         try_get(caption_track, lambda x: x['name']['simpleText']),
2755                         {})
2756                     continue
2757                 automatic_captions = {}
2758                 for translation_language in (pctr.get('translationLanguages') or []):
2759                     translation_language_code = translation_language.get('languageCode')
2760                     if not translation_language_code:
2761                         continue
2762                     process_language(
2763                         automatic_captions, base_url, translation_language_code,
2764                         self._get_text(translation_language.get('languageName'), max_runs=1),
2765                         {'tlang': translation_language_code})
2766                 info['automatic_captions'] = automatic_captions
2767         info['subtitles'] = subtitles
2768
2769         parsed_url = compat_urllib_parse_urlparse(url)
2770         for component in [parsed_url.fragment, parsed_url.query]:
2771             query = compat_parse_qs(component)
2772             for k, v in query.items():
2773                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2774                     d_k += '_time'
2775                     if d_k not in info and k in s_ks:
2776                         info[d_k] = parse_duration(query[k][0])
2777
2778         # Youtube Music Auto-generated description
2779         if video_description:
2780             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2781             if mobj:
2782                 release_year = mobj.group('release_year')
2783                 release_date = mobj.group('release_date')
2784                 if release_date:
2785                     release_date = release_date.replace('-', '')
2786                     if not release_year:
2787                         release_year = release_date[:4]
2788                 info.update({
2789                     'album': mobj.group('album'.strip()),
2790                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2791                     'track': mobj.group('track').strip(),
2792                     'release_date': release_date,
2793                     'release_year': int_or_none(release_year),
2794                 })
2795
2796         initial_data = None
2797         if webpage:
2798             initial_data = self._extract_yt_initial_variable(
2799                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2800                 'yt initial data')
2801         if not initial_data:
2802             initial_data = self._extract_response(
2803                 item_id=video_id, ep='next', fatal=False,
2804                 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2805                 note='Downloading initial data API JSON')
2806
2807         try:
2808             # This will error if there is no livechat
2809             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2810             info['subtitles']['live_chat'] = [{
2811                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2812                 'video_id': video_id,
2813                 'ext': 'json',
2814                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2815             }]
2816         except (KeyError, IndexError, TypeError):
2817             pass
2818
2819         if initial_data:
2820             chapters = self._extract_chapters_from_json(
2821                 initial_data, video_id, duration)
2822             if not chapters:
2823                 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2824                     contents = try_get(
2825                         engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2826                         list)
2827                     if not contents:
2828                         continue
2829
2830                     def chapter_time(mmlir):
2831                         return parse_duration(
2832                             self._get_text(mmlir.get('timeDescription')))
2833
2834                     chapters = []
2835                     for next_num, content in enumerate(contents, start=1):
2836                         mmlir = content.get('macroMarkersListItemRenderer') or {}
2837                         start_time = chapter_time(mmlir)
2838                         end_time = chapter_time(try_get(
2839                             contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2840                             if next_num < len(contents) else duration
2841                         if start_time is None or end_time is None:
2842                             continue
2843                         chapters.append({
2844                             'start_time': start_time,
2845                             'end_time': end_time,
2846                             'title': self._get_text(mmlir.get('title')),
2847                         })
2848                     if chapters:
2849                         break
2850             if chapters:
2851                 info['chapters'] = chapters
2852
2853             contents = try_get(
2854                 initial_data,
2855                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2856                 list) or []
2857             for content in contents:
2858                 vpir = content.get('videoPrimaryInfoRenderer')
2859                 if vpir:
2860                     stl = vpir.get('superTitleLink')
2861                     if stl:
2862                         stl = self._get_text(stl)
2863                         if try_get(
2864                                 vpir,
2865                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2866                             info['location'] = stl
2867                         else:
2868                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2869                             if mobj:
2870                                 info.update({
2871                                     'series': mobj.group(1),
2872                                     'season_number': int(mobj.group(2)),
2873                                     'episode_number': int(mobj.group(3)),
2874                                 })
2875                     for tlb in (try_get(
2876                             vpir,
2877                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2878                             list) or []):
2879                         tbr = tlb.get('toggleButtonRenderer') or {}
2880                         for getter, regex in [(
2881                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2882                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2883                                     lambda x: x['accessibility'],
2884                                     lambda x: x['accessibilityData']['accessibilityData'],
2885                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2886                             label = (try_get(tbr, getter, dict) or {}).get('label')
2887                             if label:
2888                                 mobj = re.match(regex, label)
2889                                 if mobj:
2890                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2891                                     break
2892                     sbr_tooltip = try_get(
2893                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2894                     if sbr_tooltip:
2895                         like_count, dislike_count = sbr_tooltip.split(' / ')
2896                         info.update({
2897                             'like_count': str_to_int(like_count),
2898                             'dislike_count': str_to_int(dislike_count),
2899                         })
2900                 vsir = content.get('videoSecondaryInfoRenderer')
2901                 if vsir:
2902                     info['channel'] = self._get_text(try_get(
2903                         vsir,
2904                         lambda x: x['owner']['videoOwnerRenderer']['title'],
2905                         dict))
2906                     rows = try_get(
2907                         vsir,
2908                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2909                         list) or []
2910                     multiple_songs = False
2911                     for row in rows:
2912                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2913                             multiple_songs = True
2914                             break
2915                     for row in rows:
2916                         mrr = row.get('metadataRowRenderer') or {}
2917                         mrr_title = mrr.get('title')
2918                         if not mrr_title:
2919                             continue
2920                         mrr_title = self._get_text(mrr['title'])
2921                         mrr_contents_text = self._get_text(mrr['contents'][0])
2922                         if mrr_title == 'License':
2923                             info['license'] = mrr_contents_text
2924                         elif not multiple_songs:
2925                             if mrr_title == 'Album':
2926                                 info['album'] = mrr_contents_text
2927                             elif mrr_title == 'Artist':
2928                                 info['artist'] = mrr_contents_text
2929                             elif mrr_title == 'Song':
2930                                 info['track'] = mrr_contents_text
2931
2932         fallbacks = {
2933             'channel': 'uploader',
2934             'channel_id': 'uploader_id',
2935             'channel_url': 'uploader_url',
2936         }
2937         for to, frm in fallbacks.items():
2938             if not info.get(to):
2939                 info[to] = info.get(frm)
2940
2941         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2942             v = info.get(s_k)
2943             if v:
2944                 info[d_k] = v
2945
2946         is_private = bool_or_none(video_details.get('isPrivate'))
2947         is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2948         is_membersonly = None
2949         is_premium = None
2950         if initial_data and is_private is not None:
2951             is_membersonly = False
2952             is_premium = False
2953             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2954             badge_labels = set()
2955             for content in contents:
2956                 if not isinstance(content, dict):
2957                     continue
2958                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
2959             for badge_label in badge_labels:
2960                 if badge_label.lower() == 'members only':
2961                     is_membersonly = True
2962                 elif badge_label.lower() == 'premium':
2963                     is_premium = True
2964                 elif badge_label.lower() == 'unlisted':
2965                     is_unlisted = True
2966
2967         info['availability'] = self._availability(
2968             is_private=is_private,
2969             needs_premium=is_premium,
2970             needs_subscription=is_membersonly,
2971             needs_auth=info['age_limit'] >= 18,
2972             is_unlisted=None if is_private is None else is_unlisted)
2973
2974         # get xsrf for annotations or comments
2975         get_annotations = self.get_param('writeannotations', False)
2976         get_comments = self.get_param('getcomments', False)
2977         if get_annotations or get_comments:
2978             xsrf_token = None
2979             ytcfg = self._extract_ytcfg(video_id, webpage)
2980             if ytcfg:
2981                 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2982             if not xsrf_token:
2983                 xsrf_token = self._search_regex(
2984                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2985                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
2986
2987         # annotations
2988         if get_annotations:
2989             invideo_url = try_get(
2990                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2991             if xsrf_token and invideo_url:
2992                 xsrf_field_name = None
2993                 if ytcfg:
2994                     xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2995                 if not xsrf_field_name:
2996                     xsrf_field_name = self._search_regex(
2997                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2998                         webpage, 'xsrf field name',
2999                         group='xsrf_field_name', default='session_token')
3000                 info['annotations'] = self._download_webpage(
3001                     self._proto_relative_url(invideo_url),
3002                     video_id, note='Downloading annotations',
3003                     errnote='Unable to download video annotations', fatal=False,
3004                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3005
3006         if get_comments:
3007             info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
3008
3009         self.mark_watched(video_id, player_response)
3010
3011         return info
3012
3013
3014 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3015     IE_DESC = 'YouTube.com tab'
3016     _VALID_URL = r'''(?x)
3017                     https?://
3018                         (?:\w+\.)?
3019                         (?:
3020                             youtube(?:kids)?\.com|
3021                             invidio\.us
3022                         )/
3023                         (?:
3024                             (?P<channel_type>channel|c|user|browse)/|
3025                             (?P<not_channel>
3026                                 feed/|hashtag/|
3027                                 (?:playlist|watch)\?.*?\blist=
3028                             )|
3029                             (?!(?:%s)\b)  # Direct URLs
3030                         )
3031                         (?P<id>[^/?\#&]+)
3032                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3033     IE_NAME = 'youtube:tab'
3034
3035     _TESTS = [{
3036         'note': 'playlists, multipage',
3037         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3038         'playlist_mincount': 94,
3039         'info_dict': {
3040             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3041             'title': 'Игорь Клейнер - Playlists',
3042             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3043             'uploader': 'Игорь Клейнер',
3044             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3045         },
3046     }, {
3047         'note': 'playlists, multipage, different order',
3048         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3049         'playlist_mincount': 94,
3050         'info_dict': {
3051             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3052             'title': 'Игорь Клейнер - Playlists',
3053             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3054             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3055             'uploader': 'Игорь Клейнер',
3056         },
3057     }, {
3058         'note': 'playlists, series',
3059         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3060         'playlist_mincount': 5,
3061         'info_dict': {
3062             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3063             'title': '3Blue1Brown - Playlists',
3064             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3065             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3066             'uploader': '3Blue1Brown',
3067         },
3068     }, {
3069         'note': 'playlists, singlepage',
3070         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3071         'playlist_mincount': 4,
3072         'info_dict': {
3073             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3074             'title': 'ThirstForScience - Playlists',
3075             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3076             'uploader': 'ThirstForScience',
3077             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3078         }
3079     }, {
3080         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3081         'only_matching': True,
3082     }, {
3083         'note': 'basic, single video playlist',
3084         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3085         'info_dict': {
3086             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3087             'uploader': 'Sergey M.',
3088             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3089             'title': 'youtube-dl public playlist',
3090         },
3091         'playlist_count': 1,
3092     }, {
3093         'note': 'empty playlist',
3094         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3095         'info_dict': {
3096             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3097             'uploader': 'Sergey M.',
3098             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3099             'title': 'youtube-dl empty playlist',
3100         },
3101         'playlist_count': 0,
3102     }, {
3103         'note': 'Home tab',
3104         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3105         'info_dict': {
3106             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3107             'title': 'lex will - Home',
3108             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3109             'uploader': 'lex will',
3110             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3111         },
3112         'playlist_mincount': 2,
3113     }, {
3114         'note': 'Videos tab',
3115         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3116         'info_dict': {
3117             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3118             'title': 'lex will - Videos',
3119             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3120             'uploader': 'lex will',
3121             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3122         },
3123         'playlist_mincount': 975,
3124     }, {
3125         'note': 'Videos tab, sorted by popular',
3126         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3127         'info_dict': {
3128             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3129             'title': 'lex will - Videos',
3130             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3131             'uploader': 'lex will',
3132             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3133         },
3134         'playlist_mincount': 199,
3135     }, {
3136         'note': 'Playlists tab',
3137         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3138         'info_dict': {
3139             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3140             'title': 'lex will - Playlists',
3141             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3142             'uploader': 'lex will',
3143             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3144         },
3145         'playlist_mincount': 17,
3146     }, {
3147         'note': 'Community tab',
3148         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3149         'info_dict': {
3150             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3151             'title': 'lex will - Community',
3152             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3153             'uploader': 'lex will',
3154             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3155         },
3156         'playlist_mincount': 18,
3157     }, {
3158         'note': 'Channels tab',
3159         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3160         'info_dict': {
3161             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3162             'title': 'lex will - Channels',
3163             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3164             'uploader': 'lex will',
3165             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3166         },
3167         'playlist_mincount': 12,
3168     }, {
3169         'note': 'Search tab',
3170         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3171         'playlist_mincount': 40,
3172         'info_dict': {
3173             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3174             'title': '3Blue1Brown - Search - linear algebra',
3175             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3176             'uploader': '3Blue1Brown',
3177             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3178         },
3179     }, {
3180         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3181         'only_matching': True,
3182     }, {
3183         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3184         'only_matching': True,
3185     }, {
3186         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3187         'only_matching': True,
3188     }, {
3189         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3190         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3191         'info_dict': {
3192             'title': '29C3: Not my department',
3193             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3194             'uploader': 'Christiaan008',
3195             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3196             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3197         },
3198         'playlist_count': 96,
3199     }, {
3200         'note': 'Large playlist',
3201         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3202         'info_dict': {
3203             'title': 'Uploads from Cauchemar',
3204             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3205             'uploader': 'Cauchemar',
3206             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3207         },
3208         'playlist_mincount': 1123,
3209     }, {
3210         'note': 'even larger playlist, 8832 videos',
3211         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3212         'only_matching': True,
3213     }, {
3214         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3215         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3216         'info_dict': {
3217             'title': 'Uploads from Interstellar Movie',
3218             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3219             'uploader': 'Interstellar Movie',
3220             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3221         },
3222         'playlist_mincount': 21,
3223     }, {
3224         'note': 'Playlist with "show unavailable videos" button',
3225         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3226         'info_dict': {
3227             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3228             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3229             'uploader': 'Phim Siêu Nhân Nhật Bản',
3230             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3231         },
3232         'playlist_mincount': 200,
3233     }, {
3234         'note': 'Playlist with unavailable videos in page 7',
3235         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3236         'info_dict': {
3237             'title': 'Uploads from BlankTV',
3238             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3239             'uploader': 'BlankTV',
3240             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3241         },
3242         'playlist_mincount': 1000,
3243     }, {
3244         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3245         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3246         'info_dict': {
3247             'title': 'Data Analysis with Dr Mike Pound',
3248             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3249             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3250             'uploader': 'Computerphile',
3251             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3252         },
3253         'playlist_mincount': 11,
3254     }, {
3255         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3256         'only_matching': True,
3257     }, {
3258         'note': 'Playlist URL that does not actually serve a playlist',
3259         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3260         'info_dict': {
3261             'id': 'FqZTN594JQw',
3262             'ext': 'webm',
3263             'title': "Smiley's People 01 detective, Adventure Series, Action",
3264             'uploader': 'STREEM',
3265             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3266             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3267             'upload_date': '20150526',
3268             'license': 'Standard YouTube License',
3269             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3270             'categories': ['People & Blogs'],
3271             'tags': list,
3272             'view_count': int,
3273             'like_count': int,
3274             'dislike_count': int,
3275         },
3276         'params': {
3277             'skip_download': True,
3278         },
3279         'skip': 'This video is not available.',
3280         'add_ie': [YoutubeIE.ie_key()],
3281     }, {
3282         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3283         'only_matching': True,
3284     }, {
3285         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3286         'only_matching': True,
3287     }, {
3288         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3289         'info_dict': {
3290             'id': 'X1whbWASnNQ',  # This will keep changing
3291             'ext': 'mp4',
3292             'title': compat_str,
3293             'uploader': 'Sky News',
3294             'uploader_id': 'skynews',
3295             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3296             'upload_date': r're:\d{8}',
3297             'description': compat_str,
3298             'categories': ['News & Politics'],
3299             'tags': list,
3300             'like_count': int,
3301             'dislike_count': int,
3302         },
3303         'params': {
3304             'skip_download': True,
3305         },
3306         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3307     }, {
3308         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3309         'info_dict': {
3310             'id': 'a48o2S1cPoo',
3311             'ext': 'mp4',
3312             'title': 'The Young Turks - Live Main Show',
3313             'uploader': 'The Young Turks',
3314             'uploader_id': 'TheYoungTurks',
3315             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3316             'upload_date': '20150715',
3317             'license': 'Standard YouTube License',
3318             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3319             'categories': ['News & Politics'],
3320             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3321             'like_count': int,
3322             'dislike_count': int,
3323         },
3324         'params': {
3325             'skip_download': True,
3326         },
3327         'only_matching': True,
3328     }, {
3329         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3330         'only_matching': True,
3331     }, {
3332         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3333         'only_matching': True,
3334     }, {
3335         'note': 'A channel that is not live. Should raise error',
3336         'url': 'https://www.youtube.com/user/numberphile/live',
3337         'only_matching': True,
3338     }, {
3339         'url': 'https://www.youtube.com/feed/trending',
3340         'only_matching': True,
3341     }, {
3342         'url': 'https://www.youtube.com/feed/library',
3343         'only_matching': True,
3344     }, {
3345         'url': 'https://www.youtube.com/feed/history',
3346         'only_matching': True,
3347     }, {
3348         'url': 'https://www.youtube.com/feed/subscriptions',
3349         'only_matching': True,
3350     }, {
3351         'url': 'https://www.youtube.com/feed/watch_later',
3352         'only_matching': True,
3353     }, {
3354         'note': 'Recommended - redirects to home page',
3355         'url': 'https://www.youtube.com/feed/recommended',
3356         'only_matching': True,
3357     }, {
3358         'note': 'inline playlist with not always working continuations',
3359         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3360         'only_matching': True,
3361     }, {
3362         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3363         'only_matching': True,
3364     }, {
3365         'url': 'https://www.youtube.com/course',
3366         'only_matching': True,
3367     }, {
3368         'url': 'https://www.youtube.com/zsecurity',
3369         'only_matching': True,
3370     }, {
3371         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3372         'only_matching': True,
3373     }, {
3374         'url': 'https://www.youtube.com/TheYoungTurks/live',
3375         'only_matching': True,
3376     }, {
3377         'url': 'https://www.youtube.com/hashtag/cctv9',
3378         'info_dict': {
3379             'id': 'cctv9',
3380             'title': '#cctv9',
3381         },
3382         'playlist_mincount': 350,
3383     }, {
3384         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3385         'only_matching': True,
3386     }, {
3387         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3388         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3389         'only_matching': True
3390     }, {
3391         'note': '/browse/ should redirect to /channel/',
3392         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3393         'only_matching': True
3394     }, {
3395         'note': 'VLPL, should redirect to playlist?list=PL...',
3396         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3397         'info_dict': {
3398             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3399             'uploader': 'NoCopyrightSounds',
3400             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3401             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3402             'title': 'NCS Releases',
3403         },
3404         'playlist_mincount': 166,
3405     }, {
3406         'note': 'Topic, should redirect to playlist?list=UU...',
3407         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3408         'info_dict': {
3409             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3410             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3411             'title': 'Uploads from Royalty Free Music - Topic',
3412             'uploader': 'Royalty Free Music - Topic',
3413         },
3414         'expected_warnings': [
3415             'A channel/user page was given',
3416             'The URL does not have a videos tab',
3417         ],
3418         'playlist_mincount': 101,
3419     }, {
3420         'note': 'Topic without a UU playlist',
3421         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3422         'info_dict': {
3423             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3424             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3425         },
3426         'expected_warnings': [
3427             'A channel/user page was given',
3428             'The URL does not have a videos tab',
3429             'Falling back to channel URL',
3430         ],
3431         'playlist_mincount': 9,
3432     }, {
3433         'note': 'Youtube music Album',
3434         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3435         'info_dict': {
3436             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3437             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3438         },
3439         'playlist_count': 50,
3440     }, {
3441         'note': 'unlisted single video playlist',
3442         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3443         'info_dict': {
3444             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3445             'uploader': 'colethedj',
3446             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3447             'title': 'yt-dlp unlisted playlist test',
3448             'availability': 'unlisted'
3449         },
3450         'playlist_count': 1,
3451     }]
3452
3453     @classmethod
3454     def suitable(cls, url):
3455         return False if YoutubeIE.suitable(url) else super(
3456             YoutubeTabIE, cls).suitable(url)
3457
3458     def _extract_channel_id(self, webpage):
3459         channel_id = self._html_search_meta(
3460             'channelId', webpage, 'channel id', default=None)
3461         if channel_id:
3462             return channel_id
3463         channel_url = self._html_search_meta(
3464             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3465              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3466              'twitter:app:url:googleplay'), webpage, 'channel url')
3467         return self._search_regex(
3468             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3469             channel_url, 'channel id')
3470
3471     @staticmethod
3472     def _extract_basic_item_renderer(item):
3473         # Modified from _extract_grid_item_renderer
3474         known_basic_renderers = (
3475             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3476         )
3477         for key, renderer in item.items():
3478             if not isinstance(renderer, dict):
3479                 continue
3480             elif key in known_basic_renderers:
3481                 return renderer
3482             elif key.startswith('grid') and key.endswith('Renderer'):
3483                 return renderer
3484
3485     def _grid_entries(self, grid_renderer):
3486         for item in grid_renderer['items']:
3487             if not isinstance(item, dict):
3488                 continue
3489             renderer = self._extract_basic_item_renderer(item)
3490             if not isinstance(renderer, dict):
3491                 continue
3492             title = self._get_text(renderer.get('title'))
3493
3494             # playlist
3495             playlist_id = renderer.get('playlistId')
3496             if playlist_id:
3497                 yield self.url_result(
3498                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3499                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3500                     video_title=title)
3501                 continue
3502             # video
3503             video_id = renderer.get('videoId')
3504             if video_id:
3505                 yield self._extract_video(renderer)
3506                 continue
3507             # channel
3508             channel_id = renderer.get('channelId')
3509             if channel_id:
3510                 yield self.url_result(
3511                     'https://www.youtube.com/channel/%s' % channel_id,
3512                     ie=YoutubeTabIE.ie_key(), video_title=title)
3513                 continue
3514             # generic endpoint URL support
3515             ep_url = urljoin('https://www.youtube.com/', try_get(
3516                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3517                 compat_str))
3518             if ep_url:
3519                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3520                     if ie.suitable(ep_url):
3521                         yield self.url_result(
3522                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3523                         break
3524
3525     def _shelf_entries_from_content(self, shelf_renderer):
3526         content = shelf_renderer.get('content')
3527         if not isinstance(content, dict):
3528             return
3529         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3530         if renderer:
3531             # TODO: add support for nested playlists so each shelf is processed
3532             # as separate playlist
3533             # TODO: this includes only first N items
3534             for entry in self._grid_entries(renderer):
3535                 yield entry
3536         renderer = content.get('horizontalListRenderer')
3537         if renderer:
3538             # TODO
3539             pass
3540
3541     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3542         ep = try_get(
3543             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3544             compat_str)
3545         shelf_url = urljoin('https://www.youtube.com', ep)
3546         if shelf_url:
3547             # Skipping links to another channels, note that checking for
3548             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3549             # will not work
3550             if skip_channels and '/channels?' in shelf_url:
3551                 return
3552             title = self._get_text(shelf_renderer, lambda x: x['title'])
3553             yield self.url_result(shelf_url, video_title=title)
3554         # Shelf may not contain shelf URL, fallback to extraction from content
3555         for entry in self._shelf_entries_from_content(shelf_renderer):
3556             yield entry
3557
3558     def _playlist_entries(self, video_list_renderer):
3559         for content in video_list_renderer['contents']:
3560             if not isinstance(content, dict):
3561                 continue
3562             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3563             if not isinstance(renderer, dict):
3564                 continue
3565             video_id = renderer.get('videoId')
3566             if not video_id:
3567                 continue
3568             yield self._extract_video(renderer)
3569
3570     def _rich_entries(self, rich_grid_renderer):
3571         renderer = try_get(
3572             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3573         video_id = renderer.get('videoId')
3574         if not video_id:
3575             return
3576         yield self._extract_video(renderer)
3577
3578     def _video_entry(self, video_renderer):
3579         video_id = video_renderer.get('videoId')
3580         if video_id:
3581             return self._extract_video(video_renderer)
3582
3583     def _post_thread_entries(self, post_thread_renderer):
3584         post_renderer = try_get(
3585             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3586         if not post_renderer:
3587             return
3588         # video attachment
3589         video_renderer = try_get(
3590             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3591         video_id = video_renderer.get('videoId')
3592         if video_id:
3593             entry = self._extract_video(video_renderer)
3594             if entry:
3595                 yield entry
3596         # playlist attachment
3597         playlist_id = try_get(
3598             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3599         if playlist_id:
3600             yield self.url_result(
3601                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3602                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3603         # inline video links
3604         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3605         for run in runs:
3606             if not isinstance(run, dict):
3607                 continue
3608             ep_url = try_get(
3609                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3610             if not ep_url:
3611                 continue
3612             if not YoutubeIE.suitable(ep_url):
3613                 continue
3614             ep_video_id = YoutubeIE._match_id(ep_url)
3615             if video_id == ep_video_id:
3616                 continue
3617             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3618
3619     def _post_thread_continuation_entries(self, post_thread_continuation):
3620         contents = post_thread_continuation.get('contents')
3621         if not isinstance(contents, list):
3622             return
3623         for content in contents:
3624             renderer = content.get('backstagePostThreadRenderer')
3625             if not isinstance(renderer, dict):
3626                 continue
3627             for entry in self._post_thread_entries(renderer):
3628                 yield entry
3629
3630     r''' # unused
3631     def _rich_grid_entries(self, contents):
3632         for content in contents:
3633             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3634             if video_renderer:
3635                 entry = self._video_entry(video_renderer)
3636                 if entry:
3637                     yield entry
3638     '''
3639     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3640
3641         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3642             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3643             for content in contents:
3644                 if not isinstance(content, dict):
3645                     continue
3646                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3647                 if not is_renderer:
3648                     renderer = content.get('richItemRenderer')
3649                     if renderer:
3650                         for entry in self._rich_entries(renderer):
3651                             yield entry
3652                         continuation_list[0] = self._extract_continuation(parent_renderer)
3653                     continue
3654                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3655                 for isr_content in isr_contents:
3656                     if not isinstance(isr_content, dict):
3657                         continue
3658
3659                     known_renderers = {
3660                         'playlistVideoListRenderer': self._playlist_entries,
3661                         'gridRenderer': self._grid_entries,
3662                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3663                         'backstagePostThreadRenderer': self._post_thread_entries,
3664                         'videoRenderer': lambda x: [self._video_entry(x)],
3665                     }
3666                     for key, renderer in isr_content.items():
3667                         if key not in known_renderers:
3668                             continue
3669                         for entry in known_renderers[key](renderer):
3670                             if entry:
3671                                 yield entry
3672                         continuation_list[0] = self._extract_continuation(renderer)
3673                         break
3674
3675                 if not continuation_list[0]:
3676                     continuation_list[0] = self._extract_continuation(is_renderer)
3677
3678             if not continuation_list[0]:
3679                 continuation_list[0] = self._extract_continuation(parent_renderer)
3680
3681         continuation_list = [None]  # Python 2 doesnot support nonlocal
3682         tab_content = try_get(tab, lambda x: x['content'], dict)
3683         if not tab_content:
3684             return
3685         parent_renderer = (
3686             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3687             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3688         for entry in extract_entries(parent_renderer):
3689             yield entry
3690         continuation = continuation_list[0]
3691         visitor_data = None
3692
3693         for page_num in itertools.count(1):
3694             if not continuation:
3695                 break
3696             headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3697             response = self._extract_response(
3698                 item_id='%s page %s' % (item_id, page_num),
3699                 query=continuation, headers=headers, ytcfg=ytcfg,
3700                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3701
3702             if not response:
3703                 break
3704             visitor_data = try_get(
3705                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3706
3707             known_continuation_renderers = {
3708                 'playlistVideoListContinuation': self._playlist_entries,
3709                 'gridContinuation': self._grid_entries,
3710                 'itemSectionContinuation': self._post_thread_continuation_entries,
3711                 'sectionListContinuation': extract_entries,  # for feeds
3712             }
3713             continuation_contents = try_get(
3714                 response, lambda x: x['continuationContents'], dict) or {}
3715             continuation_renderer = None
3716             for key, value in continuation_contents.items():
3717                 if key not in known_continuation_renderers:
3718                     continue
3719                 continuation_renderer = value
3720                 continuation_list = [None]
3721                 for entry in known_continuation_renderers[key](continuation_renderer):
3722                     yield entry
3723                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3724                 break
3725             if continuation_renderer:
3726                 continue
3727
3728             known_renderers = {
3729                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3730                 'gridVideoRenderer': (self._grid_entries, 'items'),
3731                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3732                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3733                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3734                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3735             }
3736             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3737             continuation_items = try_get(
3738                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3739             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3740             video_items_renderer = None
3741             for key, value in continuation_item.items():
3742                 if key not in known_renderers:
3743                     continue
3744                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3745                 continuation_list = [None]
3746                 for entry in known_renderers[key][0](video_items_renderer):
3747                     yield entry
3748                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3749                 break
3750             if video_items_renderer:
3751                 continue
3752             break
3753
3754     @staticmethod
3755     def _extract_selected_tab(tabs):
3756         for tab in tabs:
3757             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3758             if renderer.get('selected') is True:
3759                 return renderer
3760         else:
3761             raise ExtractorError('Unable to find selected tab')
3762
3763     @classmethod
3764     def _extract_uploader(cls, data):
3765         uploader = {}
3766         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3767         owner = try_get(
3768             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3769         if owner:
3770             uploader['uploader'] = owner.get('text')
3771             uploader['uploader_id'] = try_get(
3772                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3773             uploader['uploader_url'] = urljoin(
3774                 'https://www.youtube.com/',
3775                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3776         return {k: v for k, v in uploader.items() if v is not None}
3777
3778     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3779         playlist_id = title = description = channel_url = channel_name = channel_id = None
3780         thumbnails_list = tags = []
3781
3782         selected_tab = self._extract_selected_tab(tabs)
3783         renderer = try_get(
3784             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3785         if renderer:
3786             channel_name = renderer.get('title')
3787             channel_url = renderer.get('channelUrl')
3788             channel_id = renderer.get('externalId')
3789         else:
3790             renderer = try_get(
3791                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3792
3793         if renderer:
3794             title = renderer.get('title')
3795             description = renderer.get('description', '')
3796             playlist_id = channel_id
3797             tags = renderer.get('keywords', '').split()
3798             thumbnails_list = (
3799                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3800                 or try_get(
3801                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3802                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3803                     list)
3804                 or [])
3805
3806         thumbnails = []
3807         for t in thumbnails_list:
3808             if not isinstance(t, dict):
3809                 continue
3810             thumbnail_url = url_or_none(t.get('url'))
3811             if not thumbnail_url:
3812                 continue
3813             thumbnails.append({
3814                 'url': thumbnail_url,
3815                 'width': int_or_none(t.get('width')),
3816                 'height': int_or_none(t.get('height')),
3817             })
3818         if playlist_id is None:
3819             playlist_id = item_id
3820         if title is None:
3821             title = (
3822                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3823                 or playlist_id)
3824         title += format_field(selected_tab, 'title', ' - %s')
3825         title += format_field(selected_tab, 'expandedText', ' - %s')
3826         metadata = {
3827             'playlist_id': playlist_id,
3828             'playlist_title': title,
3829             'playlist_description': description,
3830             'uploader': channel_name,
3831             'uploader_id': channel_id,
3832             'uploader_url': channel_url,
3833             'thumbnails': thumbnails,
3834             'tags': tags,
3835         }
3836         availability = self._extract_availability(data)
3837         if availability:
3838             metadata['availability'] = availability
3839         if not channel_id:
3840             metadata.update(self._extract_uploader(data))
3841         metadata.update({
3842             'channel': metadata['uploader'],
3843             'channel_id': metadata['uploader_id'],
3844             'channel_url': metadata['uploader_url']})
3845         ytcfg = self._extract_ytcfg(item_id, webpage)
3846         return self.playlist_result(
3847             self._entries(
3848                 selected_tab, playlist_id,
3849                 self._extract_identity_token(webpage, item_id),
3850                 self._extract_account_syncid(ytcfg, data), ytcfg),
3851             **metadata)
3852
3853     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3854         first_id = last_id = None
3855         ytcfg = self._extract_ytcfg(playlist_id, webpage)
3856         headers = self._generate_api_headers(
3857             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3858             identity_token=self._extract_identity_token(webpage, item_id=playlist_id))
3859         for page_num in itertools.count(1):
3860             videos = list(self._playlist_entries(playlist))
3861             if not videos:
3862                 return
3863             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3864             if start >= len(videos):
3865                 return
3866             for video in videos[start:]:
3867                 if video['id'] == first_id:
3868                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3869                     return
3870                 yield video
3871             first_id = first_id or videos[0]['id']
3872             last_id = videos[-1]['id']
3873             watch_endpoint = try_get(
3874                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3875             query = {
3876                 'playlistId': playlist_id,
3877                 'videoId': watch_endpoint.get('videoId') or last_id,
3878                 'index': watch_endpoint.get('index') or len(videos),
3879                 'params': watch_endpoint.get('params') or 'OAE%3D'
3880             }
3881             response = self._extract_response(
3882                 item_id='%s page %d' % (playlist_id, page_num),
3883                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3884                 check_get_keys='contents'
3885             )
3886             playlist = try_get(
3887                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3888
3889     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3890         title = playlist.get('title') or try_get(
3891             data, lambda x: x['titleText']['simpleText'], compat_str)
3892         playlist_id = playlist.get('playlistId') or item_id
3893
3894         # Delegating everything except mix playlists to regular tab-based playlist URL
3895         playlist_url = urljoin(url, try_get(
3896             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3897             compat_str))
3898         if playlist_url and playlist_url != url:
3899             return self.url_result(
3900                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3901                 video_title=title)
3902
3903         return self.playlist_result(
3904             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
3905             playlist_id=playlist_id, playlist_title=title)
3906
3907     def _extract_availability(self, data):
3908         """
3909         Gets the availability of a given playlist/tab.
3910         Note: Unless YouTube tells us explicitly, we do not assume it is public
3911         @param data: response
3912         """
3913         is_private = is_unlisted = None
3914         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3915         badge_labels = self._extract_badges(renderer)
3916
3917         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3918         privacy_dropdown_entries = try_get(
3919             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3920         for renderer_dict in privacy_dropdown_entries:
3921             is_selected = try_get(
3922                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3923             if not is_selected:
3924                 continue
3925             label = self._get_text(
3926                 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label'], dict) or [])
3927             if label:
3928                 badge_labels.add(label.lower())
3929                 break
3930
3931         for badge_label in badge_labels:
3932             if badge_label == 'unlisted':
3933                 is_unlisted = True
3934             elif badge_label == 'private':
3935                 is_private = True
3936             elif badge_label == 'public':
3937                 is_unlisted = is_private = False
3938         return self._availability(is_private, False, False, False, is_unlisted)
3939
3940     @staticmethod
3941     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3942         sidebar_renderer = try_get(
3943             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3944         for item in sidebar_renderer:
3945             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3946             if renderer:
3947                 return renderer
3948
3949     def _reload_with_unavailable_videos(self, item_id, data, webpage):
3950         """
3951         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3952         """
3953         browse_id = params = None
3954         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3955         if not renderer:
3956             return
3957         menu_renderer = try_get(
3958             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3959         for menu_item in menu_renderer:
3960             if not isinstance(menu_item, dict):
3961                 continue
3962             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3963             text = try_get(
3964                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3965             if not text or text.lower() != 'show unavailable videos':
3966                 continue
3967             browse_endpoint = try_get(
3968                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3969             browse_id = browse_endpoint.get('browseId')
3970             params = browse_endpoint.get('params')
3971             break
3972
3973         ytcfg = self._extract_ytcfg(item_id, webpage)
3974         headers = self._generate_api_headers(
3975             ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3976             identity_token=self._extract_identity_token(webpage, item_id=item_id),
3977             visitor_data=try_get(
3978                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3979         query = {
3980             'params': params or 'wgYCCAA=',
3981             'browseId': browse_id or 'VL%s' % item_id
3982         }
3983         return self._extract_response(
3984             item_id=item_id, headers=headers, query=query,
3985             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3986             note='Downloading API JSON with unavailable videos')
3987
3988     def _extract_webpage(self, url, item_id):
3989         retries = self.get_param('extractor_retries', 3)
3990         count = -1
3991         last_error = 'Incomplete yt initial data recieved'
3992         while count < retries:
3993             count += 1
3994             # Sometimes youtube returns a webpage with incomplete ytInitialData
3995             # See: https://github.com/yt-dlp/yt-dlp/issues/116
3996             if count:
3997                 self.report_warning('%s. Retrying ...' % last_error)
3998             webpage = self._download_webpage(
3999                 url, item_id,
4000                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4001             data = self._extract_yt_initial_data(item_id, webpage)
4002             if data.get('contents') or data.get('currentVideoEndpoint'):
4003                 break
4004             # Extract alerts here only when there is error
4005             self._extract_and_report_alerts(data)
4006             if count >= retries:
4007                 raise ExtractorError(last_error)
4008         return webpage, data
4009
4010     @staticmethod
4011     def _smuggle_data(entries, data):
4012         for entry in entries:
4013             if data:
4014                 entry['url'] = smuggle_url(entry['url'], data)
4015             yield entry
4016
4017     def _real_extract(self, url):
4018         url, smuggled_data = unsmuggle_url(url, {})
4019         if self.is_music_url(url):
4020             smuggled_data['is_music_url'] = True
4021         info_dict = self.__real_extract(url, smuggled_data)
4022         if info_dict.get('entries'):
4023             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4024         return info_dict
4025
4026     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4027
4028     def __real_extract(self, url, smuggled_data):
4029         item_id = self._match_id(url)
4030         url = compat_urlparse.urlunparse(
4031             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4032         compat_opts = self.get_param('compat_opts', [])
4033
4034         def get_mobj(url):
4035             mobj = self._url_re.match(url).groupdict()
4036             mobj.update((k, '') for k, v in mobj.items() if v is None)
4037             return mobj
4038
4039         mobj = get_mobj(url)
4040         # Youtube returns incomplete data if tabname is not lower case
4041         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4042
4043         if is_channel:
4044             if smuggled_data.get('is_music_url'):
4045                 if item_id[:2] == 'VL':
4046                     # Youtube music VL channels have an equivalent playlist
4047                     item_id = item_id[2:]
4048                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4049                 elif item_id[:2] == 'MP':
4050                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4051                     item_id = self._search_regex(
4052                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4053                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4054                         'playlist id')
4055                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4056                 elif mobj['channel_type'] == 'browse':
4057                     # Youtube music /browse/ should be changed to /channel/
4058                     pre = 'https://www.youtube.com/channel/%s' % item_id
4059         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4060             # Home URLs should redirect to /videos/
4061             self.report_warning(
4062                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4063                 'To download only the videos in the home page, add a "/featured" to the URL')
4064             tab = '/videos'
4065
4066         url = ''.join((pre, tab, post))
4067         mobj = get_mobj(url)
4068
4069         # Handle both video/playlist URLs
4070         qs = parse_qs(url)
4071         video_id = qs.get('v', [None])[0]
4072         playlist_id = qs.get('list', [None])[0]
4073
4074         if not video_id and mobj['not_channel'].startswith('watch'):
4075             if not playlist_id:
4076                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4077                 raise ExtractorError('Unable to recognize tab page')
4078             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4079             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4080             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4081             mobj = get_mobj(url)
4082
4083         if video_id and playlist_id:
4084             if self.get_param('noplaylist'):
4085                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4086                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4087             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4088
4089         webpage, data = self._extract_webpage(url, item_id)
4090
4091         tabs = try_get(
4092             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4093         if tabs:
4094             selected_tab = self._extract_selected_tab(tabs)
4095             tab_name = selected_tab.get('title', '')
4096             if 'no-youtube-channel-redirect' not in compat_opts:
4097                 if mobj['tab'] == '/live':
4098                     # Live tab should have redirected to the video
4099                     raise ExtractorError('The channel is not currently live', expected=True)
4100                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4101                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4102                         # Topic channels don't have /videos. Use the equivalent playlist instead
4103                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4104                         pl_id = 'UU%s' % item_id[2:]
4105                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4106                         try:
4107                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4108                             for alert_type, alert_message in self._extract_alerts(pl_data):
4109                                 if alert_type == 'error':
4110                                     raise ExtractorError('Youtube said: %s' % alert_message)
4111                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4112                         except ExtractorError:
4113                             self.report_warning('The playlist gave error. Falling back to channel URL')
4114                     else:
4115                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4116
4117         self.write_debug('Final URL: %s' % url)
4118
4119         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4120         if 'no-youtube-unavailable-videos' not in compat_opts:
4121             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4122         self._extract_and_report_alerts(data)
4123         tabs = try_get(
4124             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4125         if tabs:
4126             return self._extract_from_tabs(item_id, webpage, data, tabs)
4127
4128         playlist = try_get(
4129             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4130         if playlist:
4131             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4132
4133         video_id = try_get(
4134             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4135             compat_str) or video_id
4136         if video_id:
4137             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4138                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4139             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4140
4141         raise ExtractorError('Unable to recognize tab page')
4142
4143
4144 class YoutubePlaylistIE(InfoExtractor):
4145     IE_DESC = 'YouTube.com playlists'
4146     _VALID_URL = r'''(?x)(?:
4147                         (?:https?://)?
4148                         (?:\w+\.)?
4149                         (?:
4150                             (?:
4151                                 youtube(?:kids)?\.com|
4152                                 invidio\.us
4153                             )
4154                             /.*?\?.*?\blist=
4155                         )?
4156                         (?P<id>%(playlist_id)s)
4157                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4158     IE_NAME = 'youtube:playlist'
4159     _TESTS = [{
4160         'note': 'issue #673',
4161         'url': 'PLBB231211A4F62143',
4162         'info_dict': {
4163             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4164             'id': 'PLBB231211A4F62143',
4165             'uploader': 'Wickydoo',
4166             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4167         },
4168         'playlist_mincount': 29,
4169     }, {
4170         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4171         'info_dict': {
4172             'title': 'YDL_safe_search',
4173             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4174         },
4175         'playlist_count': 2,
4176         'skip': 'This playlist is private',
4177     }, {
4178         'note': 'embedded',
4179         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4180         'playlist_count': 4,
4181         'info_dict': {
4182             'title': 'JODA15',
4183             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4184             'uploader': 'milan',
4185             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4186         }
4187     }, {
4188         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4189         'playlist_mincount': 982,
4190         'info_dict': {
4191             'title': '2018 Chinese New Singles (11/6 updated)',
4192             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4193             'uploader': 'LBK',
4194             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4195         }
4196     }, {
4197         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4198         'only_matching': True,
4199     }, {
4200         # music album playlist
4201         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4202         'only_matching': True,
4203     }]
4204
4205     @classmethod
4206     def suitable(cls, url):
4207         if YoutubeTabIE.suitable(url):
4208             return False
4209         # Hack for lazy extractors until more generic solution is implemented
4210         # (see #28780)
4211         from .youtube import parse_qs
4212         qs = parse_qs(url)
4213         if qs.get('v', [None])[0]:
4214             return False
4215         return super(YoutubePlaylistIE, cls).suitable(url)
4216
4217     def _real_extract(self, url):
4218         playlist_id = self._match_id(url)
4219         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4220         url = update_url_query(
4221             'https://www.youtube.com/playlist',
4222             parse_qs(url) or {'list': playlist_id})
4223         if is_music_url:
4224             url = smuggle_url(url, {'is_music_url': True})
4225         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4226
4227
4228 class YoutubeYtBeIE(InfoExtractor):
4229     IE_DESC = 'youtu.be'
4230     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4231     _TESTS = [{
4232         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4233         'info_dict': {
4234             'id': 'yeWKywCrFtk',
4235             'ext': 'mp4',
4236             'title': 'Small Scale Baler and Braiding Rugs',
4237             'uploader': 'Backus-Page House Museum',
4238             'uploader_id': 'backuspagemuseum',
4239             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4240             'upload_date': '20161008',
4241             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4242             'categories': ['Nonprofits & Activism'],
4243             'tags': list,
4244             'like_count': int,
4245             'dislike_count': int,
4246         },
4247         'params': {
4248             'noplaylist': True,
4249             'skip_download': True,
4250         },
4251     }, {
4252         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4253         'only_matching': True,
4254     }]
4255
4256     def _real_extract(self, url):
4257         mobj = re.match(self._VALID_URL, url)
4258         video_id = mobj.group('id')
4259         playlist_id = mobj.group('playlist_id')
4260         return self.url_result(
4261             update_url_query('https://www.youtube.com/watch', {
4262                 'v': video_id,
4263                 'list': playlist_id,
4264                 'feature': 'youtu.be',
4265             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4266
4267
4268 class YoutubeYtUserIE(InfoExtractor):
4269     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4270     _VALID_URL = r'ytuser:(?P<id>.+)'
4271     _TESTS = [{
4272         'url': 'ytuser:phihag',
4273         'only_matching': True,
4274     }]
4275
4276     def _real_extract(self, url):
4277         user_id = self._match_id(url)
4278         return self.url_result(
4279             'https://www.youtube.com/user/%s' % user_id,
4280             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4281
4282
4283 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4284     IE_NAME = 'youtube:favorites'
4285     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4286     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4287     _LOGIN_REQUIRED = True
4288     _TESTS = [{
4289         'url': ':ytfav',
4290         'only_matching': True,
4291     }, {
4292         'url': ':ytfavorites',
4293         'only_matching': True,
4294     }]
4295
4296     def _real_extract(self, url):
4297         return self.url_result(
4298             'https://www.youtube.com/playlist?list=LL',
4299             ie=YoutubeTabIE.ie_key())
4300
4301
4302 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4303     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4304     # there doesn't appear to be a real limit, for example if you search for
4305     # 'python' you get more than 8.000.000 results
4306     _MAX_RESULTS = float('inf')
4307     IE_NAME = 'youtube:search'
4308     _SEARCH_KEY = 'ytsearch'
4309     _SEARCH_PARAMS = None
4310     _TESTS = []
4311
4312     def _entries(self, query, n):
4313         data = {'query': query}
4314         if self._SEARCH_PARAMS:
4315             data['params'] = self._SEARCH_PARAMS
4316         total = 0
4317         continuation = {}
4318         for page_num in itertools.count(1):
4319             data.update(continuation)
4320             search = self._extract_response(
4321                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4322                 check_get_keys=('contents', 'onResponseReceivedCommands')
4323             )
4324             if not search:
4325                 break
4326             slr_contents = try_get(
4327                 search,
4328                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4329                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4330                 list)
4331             if not slr_contents:
4332                 break
4333
4334             # Youtube sometimes adds promoted content to searches,
4335             # changing the index location of videos and token.
4336             # So we search through all entries till we find them.
4337             continuation = None
4338             for slr_content in slr_contents:
4339                 if not continuation:
4340                     continuation = self._extract_continuation({'contents': [slr_content]})
4341
4342                 isr_contents = try_get(
4343                     slr_content,
4344                     lambda x: x['itemSectionRenderer']['contents'],
4345                     list)
4346                 if not isr_contents:
4347                     continue
4348                 for content in isr_contents:
4349                     if not isinstance(content, dict):
4350                         continue
4351                     video = content.get('videoRenderer')
4352                     if not isinstance(video, dict):
4353                         continue
4354                     video_id = video.get('videoId')
4355                     if not video_id:
4356                         continue
4357
4358                     yield self._extract_video(video)
4359                     total += 1
4360                     if total == n:
4361                         return
4362
4363             if not continuation:
4364                 break
4365
4366     def _get_n_results(self, query, n):
4367         """Get a specified number of results for a query"""
4368         return self.playlist_result(self._entries(query, n), query)
4369
4370
4371 class YoutubeSearchDateIE(YoutubeSearchIE):
4372     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4373     _SEARCH_KEY = 'ytsearchdate'
4374     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4375     _SEARCH_PARAMS = 'CAI%3D'
4376
4377
4378 class YoutubeSearchURLIE(YoutubeSearchIE):
4379     IE_DESC = 'YouTube.com search URLs'
4380     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4381     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4382     # _MAX_RESULTS = 100
4383     _TESTS = [{
4384         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4385         'playlist_mincount': 5,
4386         'info_dict': {
4387             'title': 'youtube-dl test video',
4388         }
4389     }, {
4390         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4391         'only_matching': True,
4392     }]
4393
4394     @classmethod
4395     def _make_valid_url(cls):
4396         return cls._VALID_URL
4397
4398     def _real_extract(self, url):
4399         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4400         query = (qs.get('search_query') or qs.get('q'))[0]
4401         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4402         return self._get_n_results(query, self._MAX_RESULTS)
4403
4404
4405 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4406     """
4407     Base class for feed extractors
4408     Subclasses must define the _FEED_NAME property.
4409     """
4410     _LOGIN_REQUIRED = True
4411     _TESTS = []
4412
4413     @property
4414     def IE_NAME(self):
4415         return 'youtube:%s' % self._FEED_NAME
4416
4417     def _real_extract(self, url):
4418         return self.url_result(
4419             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4420             ie=YoutubeTabIE.ie_key())
4421
4422
4423 class YoutubeWatchLaterIE(InfoExtractor):
4424     IE_NAME = 'youtube:watchlater'
4425     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4426     _VALID_URL = r':ytwatchlater'
4427     _TESTS = [{
4428         'url': ':ytwatchlater',
4429         'only_matching': True,
4430     }]
4431
4432     def _real_extract(self, url):
4433         return self.url_result(
4434             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4435
4436
4437 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4438     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4439     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4440     _FEED_NAME = 'recommended'
4441     _LOGIN_REQUIRED = False
4442     _TESTS = [{
4443         'url': ':ytrec',
4444         'only_matching': True,
4445     }, {
4446         'url': ':ytrecommended',
4447         'only_matching': True,
4448     }, {
4449         'url': 'https://youtube.com',
4450         'only_matching': True,
4451     }]
4452
4453
4454 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4455     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4456     _VALID_URL = r':ytsub(?:scription)?s?'
4457     _FEED_NAME = 'subscriptions'
4458     _TESTS = [{
4459         'url': ':ytsubs',
4460         'only_matching': True,
4461     }, {
4462         'url': ':ytsubscriptions',
4463         'only_matching': True,
4464     }]
4465
4466
4467 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4468     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4469     _VALID_URL = r':ythis(?:tory)?'
4470     _FEED_NAME = 'history'
4471     _TESTS = [{
4472         'url': ':ythistory',
4473         'only_matching': True,
4474     }]
4475
4476
4477 class YoutubeTruncatedURLIE(InfoExtractor):
4478     IE_NAME = 'youtube:truncated_url'
4479     IE_DESC = False  # Do not list
4480     _VALID_URL = r'''(?x)
4481         (?:https?://)?
4482         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4483         (?:watch\?(?:
4484             feature=[a-z_]+|
4485             annotation_id=annotation_[^&]+|
4486             x-yt-cl=[0-9]+|
4487             hl=[^&]*|
4488             t=[0-9]+
4489         )?
4490         |
4491             attribution_link\?a=[^&]+
4492         )
4493         $
4494     '''
4495
4496     _TESTS = [{
4497         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4498         'only_matching': True,
4499     }, {
4500         'url': 'https://www.youtube.com/watch?',
4501         'only_matching': True,
4502     }, {
4503         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4504         'only_matching': True,
4505     }, {
4506         'url': 'https://www.youtube.com/watch?feature=foo',
4507         'only_matching': True,
4508     }, {
4509         'url': 'https://www.youtube.com/watch?hl=en-GB',
4510         'only_matching': True,
4511     }, {
4512         'url': 'https://www.youtube.com/watch?t=2372',
4513         'only_matching': True,
4514     }]
4515
4516     def _real_extract(self, url):
4517         raise ExtractorError(
4518             'Did you forget to quote the URL? Remember that & is a meta '
4519             'character in most shells, so you want to put the URL in quotes, '
4520             'like  youtube-dl '
4521             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4522             ' or simply  youtube-dl BaW_jenozKc  .',
4523             expected=True)
4524
4525
4526 class YoutubeTruncatedIDIE(InfoExtractor):
4527     IE_NAME = 'youtube:truncated_id'
4528     IE_DESC = False  # Do not list
4529     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4530
4531     _TESTS = [{
4532         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4533         'only_matching': True,
4534     }]
4535
4536     def _real_extract(self, url):
4537         video_id = self._match_id(url)
4538         raise ExtractorError(
4539             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4540             expected=True)