yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import hashlib
   9 import itertools
  10 import json
  11 import os.path
  12 import random
  13 import re
  14 import time
  15 import traceback
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from ..compat import (
  19     compat_chr,
  20     compat_HTTPError,
  21     compat_parse_qs,
  22     compat_str,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27 )
  28 from ..jsinterp import JSInterpreter
  29 from ..utils import (
  30     bool_or_none,
  31     bytes_to_intlist,
  32     clean_html,
  33     dict_get,
  34     datetime_from_str,
  35     error_to_compat_str,
  36     ExtractorError,
  37     format_field,
  38     float_or_none,
  39     int_or_none,
  40     intlist_to_bytes,
  41     mimetype2ext,
  42     parse_codecs,
  43     parse_duration,
  44     qualities,
  45     remove_start,
  46     smuggle_url,
  47     str_or_none,
  48     str_to_int,
  49     try_get,
  50     unescapeHTML,
  51     unified_strdate,
  52     unsmuggle_url,
  53     update_url_query,
  54     url_or_none,
  55     urlencode_postdata,
  56     urljoin
  57 )
  58
  59
  60 def parse_qs(url):
  61     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  62
  63
  64 class YoutubeBaseInfoExtractor(InfoExtractor):
  65     """Provide base functions for Youtube extractors"""
  66     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  67     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  68
  69     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  70     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  71     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  72
  73     _RESERVED_NAMES = (
  74         r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
  75         r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
  76         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
  77
  78     _NETRC_MACHINE = 'youtube'
  79     # If True it will raise an error if no login info is provided
  80     _LOGIN_REQUIRED = False
  81
  82     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  83
  84     def _login(self):
  85         """
  86         Attempt to log in to YouTube.
  87         True is returned if successful or skipped.
  88         False is returned if login failed.
  89
  90         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  91         """
  92
  93         def warn(message):
  94             self.report_warning(message)
  95
  96         # username+password login is broken
  97         if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
  98             self.raise_login_required(
  99                 'Login details are needed to download this content', method='cookies')
 100         username, password = self._get_login_info()
 101         if username:
 102             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 103         return
 104
 105         # Everything below this is broken!
 106         r'''
 107         # No authentication to be performed
 108         if username is None:
 109             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 110                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 111             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 112             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 113             return True
 114
 115         login_page = self._download_webpage(
 116             self._LOGIN_URL, None,
 117             note='Downloading login page',
 118             errnote='unable to fetch login page', fatal=False)
 119         if login_page is False:
 120             return
 121
 122         login_form = self._hidden_inputs(login_page)
 123
 124         def req(url, f_req, note, errnote):
 125             data = login_form.copy()
 126             data.update({
 127                 'pstMsg': 1,
 128                 'checkConnection': 'youtube',
 129                 'checkedDomains': 'youtube',
 130                 'hl': 'en',
 131                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 132                 'f.req': json.dumps(f_req),
 133                 'flowName': 'GlifWebSignIn',
 134                 'flowEntry': 'ServiceLogin',
 135                 # TODO: reverse actual botguard identifier generation algo
 136                 'bgRequest': '["identifier",""]',
 137             })
 138             return self._download_json(
 139                 url, None, note=note, errnote=errnote,
 140                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 141                 fatal=False,
 142                 data=urlencode_postdata(data), headers={
 143                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 144                     'Google-Accounts-XSRF': 1,
 145                 })
 146
 147         lookup_req = [
 148             username,
 149             None, [], None, 'US', None, None, 2, False, True,
 150             [
 151                 None, None,
 152                 [2, 1, None, 1,
 153                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 154                  None, [], 4],
 155                 1, [None, None, []], None, None, None, True
 156             ],
 157             username,
 158         ]
 159
 160         lookup_results = req(
 161             self._LOOKUP_URL, lookup_req,
 162             'Looking up account info', 'Unable to look up account info')
 163
 164         if lookup_results is False:
 165             return False
 166
 167         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 168         if not user_hash:
 169             warn('Unable to extract user hash')
 170             return False
 171
 172         challenge_req = [
 173             user_hash,
 174             None, 1, None, [1, None, None, None, [password, None, True]],
 175             [
 176                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 177                 1, [None, None, []], None, None, None, True
 178             ]]
 179
 180         challenge_results = req(
 181             self._CHALLENGE_URL, challenge_req,
 182             'Logging in', 'Unable to log in')
 183
 184         if challenge_results is False:
 185             return
 186
 187         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 188         if login_res:
 189             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 190             warn(
 191                 'Unable to login: %s' % 'Invalid password'
 192                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 193             return False
 194
 195         res = try_get(challenge_results, lambda x: x[0][-1], list)
 196         if not res:
 197             warn('Unable to extract result entry')
 198             return False
 199
 200         login_challenge = try_get(res, lambda x: x[0][0], list)
 201         if login_challenge:
 202             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 203             if challenge_str == 'TWO_STEP_VERIFICATION':
 204                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 205                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 206                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 207                 if status == 'QUOTA_EXCEEDED':
 208                     warn('Exceeded the limit of TFA codes, try later')
 209                     return False
 210
 211                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 212                 if not tl:
 213                     warn('Unable to extract TL')
 214                     return False
 215
 216                 tfa_code = self._get_tfa_info('2-step verification code')
 217
 218                 if not tfa_code:
 219                     warn(
 220                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 221                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 222                     return False
 223
 224                 tfa_code = remove_start(tfa_code, 'G-')
 225
 226                 tfa_req = [
 227                     user_hash, None, 2, None,
 228                     [
 229                         9, None, None, None, None, None, None, None,
 230                         [None, tfa_code, True, 2]
 231                     ]]
 232
 233                 tfa_results = req(
 234                     self._TFA_URL.format(tl), tfa_req,
 235                     'Submitting TFA code', 'Unable to submit TFA code')
 236
 237                 if tfa_results is False:
 238                     return False
 239
 240                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 241                 if tfa_res:
 242                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 243                     warn(
 244                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 245                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 246                     return False
 247
 248                 check_cookie_url = try_get(
 249                     tfa_results, lambda x: x[0][-1][2], compat_str)
 250             else:
 251                 CHALLENGES = {
 252                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 253                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 254                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 255                 }
 256                 challenge = CHALLENGES.get(
 257                     challenge_str,
 258                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 259                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 260                 return False
 261         else:
 262             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 263
 264         if not check_cookie_url:
 265             warn('Unable to extract CheckCookie URL')
 266             return False
 267
 268         check_cookie_results = self._download_webpage(
 269             check_cookie_url, None, 'Checking cookie', fatal=False)
 270
 271         if check_cookie_results is False:
 272             return False
 273
 274         if 'https://myaccount.google.com/' not in check_cookie_results:
 275             warn('Unable to log in')
 276             return False
 277
 278         return True
 279         '''
 280
 281     def _initialize_consent(self):
 282         cookies = self._get_cookies('https://www.youtube.com/')
 283         if cookies.get('__Secure-3PSID'):
 284             return
 285         consent_id = None
 286         consent = cookies.get('CONSENT')
 287         if consent:
 288             if 'YES' in consent.value:
 289                 return
 290             consent_id = self._search_regex(
 291                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 292         if not consent_id:
 293             consent_id = random.randint(100, 999)
 294         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 295
 296     def _real_initialize(self):
 297         self._initialize_consent()
 298         if self._downloader is None:
 299             return
 300         if not self._login():
 301             return
 302
 303     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 304     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 305     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 306
 307     _YT_DEFAULT_YTCFGS = {
 308         'WEB': {
 309             'INNERTUBE_API_VERSION': 'v1',
 310             'INNERTUBE_CLIENT_NAME': 'WEB',
 311             'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
 312             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 313             'INNERTUBE_CONTEXT': {
 314                 'client': {
 315                     'clientName': 'WEB',
 316                     'clientVersion': '2.20210622.10.00',
 317                     'hl': 'en',
 318                 }
 319             },
 320             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
 321         },
 322         'WEB_REMIX': {
 323             'INNERTUBE_API_VERSION': 'v1',
 324             'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
 325             'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
 326             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 327             'INNERTUBE_CONTEXT': {
 328                 'client': {
 329                     'clientName': 'WEB_REMIX',
 330                     'clientVersion': '1.20210621.00.00',
 331                     'hl': 'en',
 332                 }
 333             },
 334             'INNERTUBE_CONTEXT_CLIENT_NAME': 67
 335         },
 336         'WEB_EMBEDDED_PLAYER': {
 337             'INNERTUBE_API_VERSION': 'v1',
 338             'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
 339             'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
 340             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 341             'INNERTUBE_CONTEXT': {
 342                 'client': {
 343                     'clientName': 'WEB_EMBEDDED_PLAYER',
 344                     'clientVersion': '1.20210620.0.1',
 345                     'hl': 'en',
 346                 }
 347             },
 348             'INNERTUBE_CONTEXT_CLIENT_NAME': 56
 349         },
 350         'ANDROID': {
 351             'INNERTUBE_API_VERSION': 'v1',
 352             'INNERTUBE_CLIENT_NAME': 'ANDROID',
 353             'INNERTUBE_CLIENT_VERSION': '16.20',
 354             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 355             'INNERTUBE_CONTEXT': {
 356                 'client': {
 357                     'clientName': 'ANDROID',
 358                     'clientVersion': '16.20',
 359                     'hl': 'en',
 360                 }
 361             },
 362             'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
 363         },
 364         'ANDROID_EMBEDDED_PLAYER': {
 365             'INNERTUBE_API_VERSION': 'v1',
 366             'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
 367             'INNERTUBE_CLIENT_VERSION': '16.20',
 368             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 369             'INNERTUBE_CONTEXT': {
 370                 'client': {
 371                     'clientName': 'ANDROID_EMBEDDED_PLAYER',
 372                     'clientVersion': '16.20',
 373                     'hl': 'en',
 374                 }
 375             },
 376             'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
 377         },
 378         'ANDROID_MUSIC': {
 379             'INNERTUBE_API_VERSION': 'v1',
 380             'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
 381             'INNERTUBE_CLIENT_VERSION': '4.32',
 382             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 383             'INNERTUBE_CONTEXT': {
 384                 'client': {
 385                     'clientName': 'ANDROID_MUSIC',
 386                     'clientVersion': '4.32',
 387                     'hl': 'en',
 388                 }
 389             },
 390             'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
 391         }
 392     }
 393
 394     _YT_DEFAULT_INNERTUBE_HOSTS = {
 395         'DIRECT': 'youtubei.googleapis.com',
 396         'WEB': 'www.youtube.com',
 397         'WEB_REMIX': 'music.youtube.com',
 398         'ANDROID_MUSIC': 'music.youtube.com'
 399     }
 400
 401     def _get_default_ytcfg(self, client='WEB'):
 402         if client in self._YT_DEFAULT_YTCFGS:
 403             return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
 404         self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
 405         return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
 406
 407     def _get_innertube_host(self, client='WEB'):
 408         return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
 409
 410     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
 411         # try_get but with fallback to default ytcfg client values when present
 412         _func = lambda y: try_get(y, getter, expected_type)
 413         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 414
 415     def _extract_client_name(self, ytcfg, default_client='WEB'):
 416         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
 417
 418     def _extract_client_version(self, ytcfg, default_client='WEB'):
 419         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
 420
 421     def _extract_api_key(self, ytcfg=None, default_client='WEB'):
 422         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 423
 424     def _extract_context(self, ytcfg=None, default_client='WEB'):
 425         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 426         context = _get_context(ytcfg)
 427         if context:
 428             return context
 429
 430         context = _get_context(self._get_default_ytcfg(default_client))
 431         if not ytcfg:
 432             return context
 433
 434         # Recreate the client context (required)
 435         context['client'].update({
 436             'clientVersion': self._extract_client_version(ytcfg, default_client),
 437             'clientName': self._extract_client_name(ytcfg, default_client),
 438         })
 439         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 440         if visitor_data:
 441             context['client']['visitorData'] = visitor_data
 442         return context
 443
 444     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 445         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 446         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 447         yt_cookies = self._get_cookies('https://www.youtube.com')
 448         sapisid_cookie = dict_get(
 449             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 450         if sapisid_cookie is None:
 451             return
 452         time_now = round(time.time())
 453         # SAPISID cookie is required if not already present
 454         if not yt_cookies.get('SAPISID'):
 455             self._set_cookie(
 456                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 457         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 458         sapisidhash = hashlib.sha1(
 459             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 460         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 461
 462     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 463                   note='Downloading API JSON', errnote='Unable to download API page',
 464                   context=None, api_key=None, api_hostname=None, default_client='WEB'):
 465
 466         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 467         data.update(query)
 468         real_headers = self._generate_api_headers(client=default_client)
 469         real_headers.update({'content-type': 'application/json'})
 470         if headers:
 471             real_headers.update(headers)
 472         return self._download_json(
 473             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 474             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 475             data=json.dumps(data).encode('utf8'), headers=real_headers,
 476             query={'key': api_key or self._extract_api_key()})
 477
 478     def _extract_yt_initial_data(self, video_id, webpage):
 479         return self._parse_json(
 480             self._search_regex(
 481                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 482                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 483             video_id)
 484
 485     def _extract_identity_token(self, webpage, item_id):
 486         ytcfg = self._extract_ytcfg(item_id, webpage)
 487         if ytcfg:
 488             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 489             if token:
 490                 return token
 491         return self._search_regex(
 492             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 493             'identity token', default=None)
 494
 495     @staticmethod
 496     def _extract_account_syncid(data):
 497         """
 498         Extract syncId required to download private playlists of secondary channels
 499         @param data Either response or ytcfg
 500         """
 501         sync_ids = (try_get(
 502             data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 503                    lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 504         if len(sync_ids) >= 2 and sync_ids[1]:
 505             # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 506             # and just "user_syncid||" for primary channel. We only want the channel_syncid
 507             return sync_ids[0]
 508         # ytcfg includes channel_syncid if on secondary channel
 509         return data.get('DELEGATED_SESSION_ID')
 510
 511     def _extract_ytcfg(self, video_id, webpage):
 512         if not webpage:
 513             return {}
 514         return self._parse_json(
 515             self._search_regex(
 516                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 517                 default='{}'), video_id, fatal=False) or {}
 518
 519     def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
 520                               visitor_data=None, api_hostname=None, client='WEB'):
 521         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
 522         headers = {
 523             'X-YouTube-Client-Name': compat_str(
 524                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
 525             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
 526             'Origin': origin
 527         }
 528         if not visitor_data and ytcfg:
 529             visitor_data = try_get(
 530                 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
 531         if identity_token:
 532             headers['X-Youtube-Identity-Token'] = identity_token
 533         if account_syncid:
 534             headers['X-Goog-PageId'] = account_syncid
 535             headers['X-Goog-AuthUser'] = 0
 536         if visitor_data:
 537             headers['X-Goog-Visitor-Id'] = visitor_data
 538         auth = self._generate_sapisidhash_header(origin)
 539         if auth is not None:
 540             headers['Authorization'] = auth
 541             headers['X-Origin'] = origin
 542         return headers
 543
 544     @staticmethod
 545     def _build_api_continuation_query(continuation, ctp=None):
 546         query = {
 547             'continuation': continuation
 548         }
 549         # TODO: Inconsistency with clickTrackingParams.
 550         # Currently we have a fixed ctp contained within context (from ytcfg)
 551         # and a ctp in root query for continuation.
 552         if ctp:
 553             query['clickTracking'] = {'clickTrackingParams': ctp}
 554         return query
 555
 556     @classmethod
 557     def _continuation_query_ajax_to_api(cls, continuation_query):
 558         continuation = dict_get(continuation_query, ('continuation', 'ctoken'))
 559         return cls._build_api_continuation_query(continuation, continuation_query.get('itct'))
 560
 561     @staticmethod
 562     def _build_continuation_query(continuation, ctp=None):
 563         query = {
 564             'ctoken': continuation,
 565             'continuation': continuation,
 566         }
 567         if ctp:
 568             query['itct'] = ctp
 569         return query
 570
 571     @classmethod
 572     def _extract_next_continuation_data(cls, renderer):
 573         next_continuation = try_get(
 574             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 575                        lambda x: x['continuation']['reloadContinuationData']), dict)
 576         if not next_continuation:
 577             return
 578         continuation = next_continuation.get('continuation')
 579         if not continuation:
 580             return
 581         ctp = next_continuation.get('clickTrackingParams')
 582         return cls._build_continuation_query(continuation, ctp)
 583
 584     @classmethod
 585     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 586         if isinstance(continuation_ep, dict):
 587             continuation = try_get(
 588                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 589             if not continuation:
 590                 return
 591             ctp = continuation_ep.get('clickTrackingParams')
 592             return cls._build_continuation_query(continuation, ctp)
 593
 594     @classmethod
 595     def _extract_continuation(cls, renderer):
 596         next_continuation = cls._extract_next_continuation_data(renderer)
 597         if next_continuation:
 598             return next_continuation
 599         contents = []
 600         for key in ('contents', 'items'):
 601             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 602         for content in contents:
 603             if not isinstance(content, dict):
 604                 continue
 605             continuation_ep = try_get(
 606                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 607                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 608                 dict)
 609             continuation = cls._extract_continuation_ep_data(continuation_ep)
 610             if continuation:
 611                 return continuation
 612
 613     @staticmethod
 614     def _extract_alerts(data):
 615         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 616             if not isinstance(alert_dict, dict):
 617                 continue
 618             for alert in alert_dict.values():
 619                 alert_type = alert.get('type')
 620                 if not alert_type:
 621                     continue
 622                 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
 623                 if message:
 624                     yield alert_type, message
 625                 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
 626                     message += try_get(run, lambda x: x['text'], compat_str)
 627                 if message:
 628                     yield alert_type, message
 629
 630     def _report_alerts(self, alerts, expected=True):
 631         errors = []
 632         warnings = []
 633         for alert_type, alert_message in alerts:
 634             if alert_type.lower() == 'error':
 635                 errors.append([alert_type, alert_message])
 636             else:
 637                 warnings.append([alert_type, alert_message])
 638
 639         for alert_type, alert_message in (warnings + errors[:-1]):
 640             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 641         if errors:
 642             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 643
 644     def _extract_and_report_alerts(self, data, *args, **kwargs):
 645         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 646
 647     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 648                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 649                           default_client='WEB'):
 650         response = None
 651         last_error = None
 652         count = -1
 653         retries = self.get_param('extractor_retries', 3)
 654         if check_get_keys is None:
 655             check_get_keys = []
 656         while count < retries:
 657             count += 1
 658             if last_error:
 659                 self.report_warning('%s. Retrying ...' % last_error)
 660             try:
 661                 response = self._call_api(
 662                     ep=ep, fatal=True, headers=headers,
 663                     video_id=item_id, query=query,
 664                     context=self._extract_context(ytcfg, default_client),
 665                     api_key=self._extract_api_key(ytcfg, default_client),
 666                     api_hostname=api_hostname, default_client=default_client,
 667                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 668             except ExtractorError as e:
 669                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
 670                     # Downloading page may result in intermittent 5xx HTTP error
 671                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 672                     last_error = 'HTTP Error %s' % e.cause.code
 673                     if count < retries:
 674                         continue
 675                 if fatal:
 676                     raise
 677                 else:
 678                     self.report_warning(error_to_compat_str(e))
 679                     return
 680
 681             else:
 682                 # Youtube may send alerts if there was an issue with the continuation page
 683                 try:
 684                     self._extract_and_report_alerts(response, expected=False)
 685                 except ExtractorError as e:
 686                     if fatal:
 687                         raise
 688                     self.report_warning(error_to_compat_str(e))
 689                     return
 690                 if not check_get_keys or dict_get(response, check_get_keys):
 691                     break
 692                 # Youtube sometimes sends incomplete data
 693                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 694                 last_error = 'Incomplete data received'
 695                 if count >= retries:
 696                     if fatal:
 697                         raise ExtractorError(last_error)
 698                     else:
 699                         self.report_warning(last_error)
 700                         return
 701         return response
 702
 703     @staticmethod
 704     def is_music_url(url):
 705         return re.match(r'https?://music\.youtube\.com/', url) is not None
 706
 707     def _extract_video(self, renderer):
 708         video_id = renderer.get('videoId')
 709         title = try_get(
 710             renderer,
 711             (lambda x: x['title']['runs'][0]['text'],
 712              lambda x: x['title']['simpleText']), compat_str)
 713         description = try_get(
 714             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
 715             compat_str)
 716         duration = parse_duration(try_get(
 717             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
 718         view_count_text = try_get(
 719             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
 720         view_count = str_to_int(self._search_regex(
 721             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 722             'view count', default=None))
 723         uploader = try_get(
 724             renderer,
 725             (lambda x: x['ownerText']['runs'][0]['text'],
 726              lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
 727         return {
 728             '_type': 'url',
 729             'ie_key': YoutubeIE.ie_key(),
 730             'id': video_id,
 731             'url': video_id,
 732             'title': title,
 733             'description': description,
 734             'duration': duration,
 735             'view_count': view_count,
 736             'uploader': uploader,
 737         }
 738
 739
 740 class YoutubeIE(YoutubeBaseInfoExtractor):
 741     IE_DESC = 'YouTube.com'
 742     _INVIDIOUS_SITES = (
 743         # invidious-redirect websites
 744         r'(?:www\.)?redirect\.invidious\.io',
 745         r'(?:(?:www|dev)\.)?invidio\.us',
 746         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 747         r'(?:www\.)?invidious\.pussthecat\.org',
 748         r'(?:www\.)?invidious\.zee\.li',
 749         r'(?:www\.)?invidious\.ethibox\.fr',
 750         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 751         # youtube-dl invidious instances list
 752         r'(?:(?:www|no)\.)?invidiou\.sh',
 753         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 754         r'(?:www\.)?invidious\.kabi\.tk',
 755         r'(?:www\.)?invidious\.mastodon\.host',
 756         r'(?:www\.)?invidious\.zapashcanon\.fr',
 757         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 758         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 759         r'(?:www\.)?invidious\.himiko\.cloud',
 760         r'(?:www\.)?invidious\.reallyancient\.tech',
 761         r'(?:www\.)?invidious\.tube',
 762         r'(?:www\.)?invidiou\.site',
 763         r'(?:www\.)?invidious\.site',
 764         r'(?:www\.)?invidious\.xyz',
 765         r'(?:www\.)?invidious\.nixnet\.xyz',
 766         r'(?:www\.)?invidious\.048596\.xyz',
 767         r'(?:www\.)?invidious\.drycat\.fr',
 768         r'(?:www\.)?inv\.skyn3t\.in',
 769         r'(?:www\.)?tube\.poal\.co',
 770         r'(?:www\.)?tube\.connect\.cafe',
 771         r'(?:www\.)?vid\.wxzm\.sx',
 772         r'(?:www\.)?vid\.mint\.lgbt',
 773         r'(?:www\.)?vid\.puffyan\.us',
 774         r'(?:www\.)?yewtu\.be',
 775         r'(?:www\.)?yt\.elukerio\.org',
 776         r'(?:www\.)?yt\.lelux\.fi',
 777         r'(?:www\.)?invidious\.ggc-project\.de',
 778         r'(?:www\.)?yt\.maisputain\.ovh',
 779         r'(?:www\.)?ytprivate\.com',
 780         r'(?:www\.)?invidious\.13ad\.de',
 781         r'(?:www\.)?invidious\.toot\.koeln',
 782         r'(?:www\.)?invidious\.fdn\.fr',
 783         r'(?:www\.)?watch\.nettohikari\.com',
 784         r'(?:www\.)?invidious\.namazso\.eu',
 785         r'(?:www\.)?invidious\.silkky\.cloud',
 786         r'(?:www\.)?invidious\.exonip\.de',
 787         r'(?:www\.)?invidious\.riverside\.rocks',
 788         r'(?:www\.)?invidious\.blamefran\.net',
 789         r'(?:www\.)?invidious\.moomoo\.de',
 790         r'(?:www\.)?ytb\.trom\.tf',
 791         r'(?:www\.)?yt\.cyberhost\.uk',
 792         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 793         r'(?:www\.)?qklhadlycap4cnod\.onion',
 794         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 795         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 796         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 797         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 798         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 799         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 800         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 801         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 802         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 803         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 804     )
 805     _VALID_URL = r"""(?x)^
 806                      (
 807                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 808                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 809                             (?:www\.)?deturl\.com/www\.youtube\.com|
 810                             (?:www\.)?pwnyoutube\.com|
 811                             (?:www\.)?hooktube\.com|
 812                             (?:www\.)?yourepeat\.com|
 813                             tube\.majestyc\.net|
 814                             %(invidious)s|
 815                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 816                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 817                          (?:                                                  # the various things that can precede the ID:
 818                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 819                              |(?:                                             # or the v= param in all its forms
 820                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 821                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 822                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 823                                  v=
 824                              )
 825                          ))
 826                          |(?:
 827                             youtu\.be|                                        # just youtu.be/xxxx
 828                             vid\.plus|                                        # or vid.plus/xxxx
 829                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 830                             %(invidious)s
 831                          )/
 832                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 833                          )
 834                      )?                                                       # all until now is optional -> you can pass the naked ID
 835                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 836                      (?(1).+)?                                                # if we found the ID, everything can follow
 837                      (?:\#|$)""" % {
 838         'invidious': '|'.join(_INVIDIOUS_SITES),
 839     }
 840     _PLAYER_INFO_RE = (
 841         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 842         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 843         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 844     )
 845     _formats = {
 846         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 847         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 848         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 849         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 850         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 851         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 852         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 853         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 854         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 855         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 856         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 857         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 858         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 859         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 860         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 861         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 862         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 863         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 864
 865
 866         # 3D videos
 867         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 868         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 869         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 870         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 871         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 872         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 873         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 874
 875         # Apple HTTP Live Streaming
 876         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 877         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 878         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 879         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 880         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 881         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 882         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 883         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 884
 885         # DASH mp4 video
 886         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 887         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 888         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 889         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 890         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 891         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 892         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 893         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 894         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 895         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 896         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 897         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 898
 899         # Dash mp4 audio
 900         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 901         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 902         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 903         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 904         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 905         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 906         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 907
 908         # Dash webm
 909         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 910         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 911         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 912         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 913         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 914         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 915         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 916         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 917         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 918         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 919         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 920         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 921         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 922         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 923         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 924         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 925         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 926         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 927         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 928         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 929         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 930         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 931
 932         # Dash webm audio
 933         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 934         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 935
 936         # Dash webm audio with opus inside
 937         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 938         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 939         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 940
 941         # RTMP (unnamed)
 942         '_rtmp': {'protocol': 'rtmp'},
 943
 944         # av01 video only formats sometimes served with "unknown" codecs
 945         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 946         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 947         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 948         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 949     }
 950     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 951
 952     _AGE_GATE_REASONS = (
 953         'Sign in to confirm your age',
 954         'This video may be inappropriate for some users.',
 955         'Sorry, this content is age-restricted.')
 956
 957     _GEO_BYPASS = False
 958
 959     IE_NAME = 'youtube'
 960     _TESTS = [
 961         {
 962             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 963             'info_dict': {
 964                 'id': 'BaW_jenozKc',
 965                 'ext': 'mp4',
 966                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 967                 'uploader': 'Philipp Hagemeister',
 968                 'uploader_id': 'phihag',
 969                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 970                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 971                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 972                 'upload_date': '20121002',
 973                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 974                 'categories': ['Science & Technology'],
 975                 'tags': ['youtube-dl'],
 976                 'duration': 10,
 977                 'view_count': int,
 978                 'like_count': int,
 979                 'dislike_count': int,
 980                 'start_time': 1,
 981                 'end_time': 9,
 982             }
 983         },
 984         {
 985             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 986             'note': 'Embed-only video (#1746)',
 987             'info_dict': {
 988                 'id': 'yZIXLfi8CZQ',
 989                 'ext': 'mp4',
 990                 'upload_date': '20120608',
 991                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 992                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 993                 'uploader': 'SET India',
 994                 'uploader_id': 'setindia',
 995                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 996                 'age_limit': 18,
 997             },
 998             'skip': 'Private video',
 999         },
1000         {
1001             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1002             'note': 'Use the first video ID in the URL',
1003             'info_dict': {
1004                 'id': 'BaW_jenozKc',
1005                 'ext': 'mp4',
1006                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1007                 'uploader': 'Philipp Hagemeister',
1008                 'uploader_id': 'phihag',
1009                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1010                 'upload_date': '20121002',
1011                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1012                 'categories': ['Science & Technology'],
1013                 'tags': ['youtube-dl'],
1014                 'duration': 10,
1015                 'view_count': int,
1016                 'like_count': int,
1017                 'dislike_count': int,
1018             },
1019             'params': {
1020                 'skip_download': True,
1021             },
1022         },
1023         {
1024             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1025             'note': '256k DASH audio (format 141) via DASH manifest',
1026             'info_dict': {
1027                 'id': 'a9LDPn-MO4I',
1028                 'ext': 'm4a',
1029                 'upload_date': '20121002',
1030                 'uploader_id': '8KVIDEO',
1031                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1032                 'description': '',
1033                 'uploader': '8KVIDEO',
1034                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1035             },
1036             'params': {
1037                 'youtube_include_dash_manifest': True,
1038                 'format': '141',
1039             },
1040             'skip': 'format 141 not served anymore',
1041         },
1042         # DASH manifest with encrypted signature
1043         {
1044             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1045             'info_dict': {
1046                 'id': 'IB3lcPjvWLA',
1047                 'ext': 'm4a',
1048                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1049                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1050                 'duration': 244,
1051                 'uploader': 'AfrojackVEVO',
1052                 'uploader_id': 'AfrojackVEVO',
1053                 'upload_date': '20131011',
1054                 'abr': 129.495,
1055             },
1056             'params': {
1057                 'youtube_include_dash_manifest': True,
1058                 'format': '141/bestaudio[ext=m4a]',
1059             },
1060         },
1061         # Controversy video
1062         {
1063             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1064             'info_dict': {
1065                 'id': 'T4XJQO3qol8',
1066                 'ext': 'mp4',
1067                 'duration': 219,
1068                 'upload_date': '20100909',
1069                 'uploader': 'Amazing Atheist',
1070                 'uploader_id': 'TheAmazingAtheist',
1071                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
1072                 'title': 'Burning Everyone\'s Koran',
1073                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
1074             }
1075         },
1076         # Normal age-gate video (embed allowed)
1077         {
1078             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1079             'info_dict': {
1080                 'id': 'HtVdAasjOgU',
1081                 'ext': 'mp4',
1082                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1083                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1084                 'duration': 142,
1085                 'uploader': 'The Witcher',
1086                 'uploader_id': 'WitcherGame',
1087                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1088                 'upload_date': '20140605',
1089                 'age_limit': 18,
1090             },
1091         },
1092         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1093         # YouTube Red ad is not captured for creator
1094         {
1095             'url': '__2ABJjxzNo',
1096             'info_dict': {
1097                 'id': '__2ABJjxzNo',
1098                 'ext': 'mp4',
1099                 'duration': 266,
1100                 'upload_date': '20100430',
1101                 'uploader_id': 'deadmau5',
1102                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1103                 'creator': 'deadmau5',
1104                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1105                 'uploader': 'deadmau5',
1106                 'title': 'Deadmau5 - Some Chords (HD)',
1107                 'alt_title': 'Some Chords',
1108             },
1109             'expected_warnings': [
1110                 'DASH manifest missing',
1111             ]
1112         },
1113         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1114         {
1115             'url': 'lqQg6PlCWgI',
1116             'info_dict': {
1117                 'id': 'lqQg6PlCWgI',
1118                 'ext': 'mp4',
1119                 'duration': 6085,
1120                 'upload_date': '20150827',
1121                 'uploader_id': 'olympic',
1122                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1123                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1124                 'uploader': 'Olympic',
1125                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1126             },
1127             'params': {
1128                 'skip_download': 'requires avconv',
1129             }
1130         },
1131         # Non-square pixels
1132         {
1133             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1134             'info_dict': {
1135                 'id': '_b-2C3KPAM0',
1136                 'ext': 'mp4',
1137                 'stretched_ratio': 16 / 9.,
1138                 'duration': 85,
1139                 'upload_date': '20110310',
1140                 'uploader_id': 'AllenMeow',
1141                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1142                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1143                 'uploader': '孫ᄋᄅ',
1144                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1145             },
1146         },
1147         # url_encoded_fmt_stream_map is empty string
1148         {
1149             'url': 'qEJwOuvDf7I',
1150             'info_dict': {
1151                 'id': 'qEJwOuvDf7I',
1152                 'ext': 'webm',
1153                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1154                 'description': '',
1155                 'upload_date': '20150404',
1156                 'uploader_id': 'spbelect',
1157                 'uploader': 'Наблюдатели Петербурга',
1158             },
1159             'params': {
1160                 'skip_download': 'requires avconv',
1161             },
1162             'skip': 'This live event has ended.',
1163         },
1164         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1165         {
1166             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1167             'info_dict': {
1168                 'id': 'FIl7x6_3R5Y',
1169                 'ext': 'webm',
1170                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1171                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1172                 'duration': 220,
1173                 'upload_date': '20150625',
1174                 'uploader_id': 'dorappi2000',
1175                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1176                 'uploader': 'dorappi2000',
1177                 'formats': 'mincount:31',
1178             },
1179             'skip': 'not actual anymore',
1180         },
1181         # DASH manifest with segment_list
1182         {
1183             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1184             'md5': '8ce563a1d667b599d21064e982ab9e31',
1185             'info_dict': {
1186                 'id': 'CsmdDsKjzN8',
1187                 'ext': 'mp4',
1188                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1189                 'uploader': 'Airtek',
1190                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1191                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1192                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1193             },
1194             'params': {
1195                 'youtube_include_dash_manifest': True,
1196                 'format': '135',  # bestvideo
1197             },
1198             'skip': 'This live event has ended.',
1199         },
1200         {
1201             # Multifeed videos (multiple cameras), URL is for Main Camera
1202             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1203             'info_dict': {
1204                 'id': 'jvGDaLqkpTg',
1205                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1206                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1207             },
1208             'playlist': [{
1209                 'info_dict': {
1210                     'id': 'jvGDaLqkpTg',
1211                     'ext': 'mp4',
1212                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1213                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1214                     'duration': 10643,
1215                     'upload_date': '20161111',
1216                     'uploader': 'Team PGP',
1217                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1218                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1219                 },
1220             }, {
1221                 'info_dict': {
1222                     'id': '3AKt1R1aDnw',
1223                     'ext': 'mp4',
1224                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1225                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1226                     'duration': 10991,
1227                     'upload_date': '20161111',
1228                     'uploader': 'Team PGP',
1229                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1230                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1231                 },
1232             }, {
1233                 'info_dict': {
1234                     'id': 'RtAMM00gpVc',
1235                     'ext': 'mp4',
1236                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1237                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1238                     'duration': 10995,
1239                     'upload_date': '20161111',
1240                     'uploader': 'Team PGP',
1241                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1242                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1243                 },
1244             }, {
1245                 'info_dict': {
1246                     'id': '6N2fdlP3C5U',
1247                     'ext': 'mp4',
1248                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1249                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1250                     'duration': 10990,
1251                     'upload_date': '20161111',
1252                     'uploader': 'Team PGP',
1253                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1254                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1255                 },
1256             }],
1257             'params': {
1258                 'skip_download': True,
1259             },
1260         },
1261         {
1262             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1263             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1264             'info_dict': {
1265                 'id': 'gVfLd0zydlo',
1266                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1267             },
1268             'playlist_count': 2,
1269             'skip': 'Not multifeed anymore',
1270         },
1271         {
1272             'url': 'https://vid.plus/FlRa-iH7PGw',
1273             'only_matching': True,
1274         },
1275         {
1276             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1277             'only_matching': True,
1278         },
1279         {
1280             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1281             # Also tests cut-off URL expansion in video description (see
1282             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1283             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1284             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1285             'info_dict': {
1286                 'id': 'lsguqyKfVQg',
1287                 'ext': 'mp4',
1288                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1289                 'alt_title': 'Dark Walk - Position Music',
1290                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1291                 'duration': 133,
1292                 'upload_date': '20151119',
1293                 'uploader_id': 'IronSoulElf',
1294                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1295                 'uploader': 'IronSoulElf',
1296                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
1297                 'track': 'Dark Walk - Position Music',
1298                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
1299                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1300             },
1301             'params': {
1302                 'skip_download': True,
1303             },
1304         },
1305         {
1306             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1307             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1308             'only_matching': True,
1309         },
1310         {
1311             # Video with yt:stretch=17:0
1312             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1313             'info_dict': {
1314                 'id': 'Q39EVAstoRM',
1315                 'ext': 'mp4',
1316                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1317                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1318                 'upload_date': '20151107',
1319                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1320                 'uploader': 'CH GAMER DROID',
1321             },
1322             'params': {
1323                 'skip_download': True,
1324             },
1325             'skip': 'This video does not exist.',
1326         },
1327         {
1328             # Video with incomplete 'yt:stretch=16:'
1329             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1330             'only_matching': True,
1331         },
1332         {
1333             # Video licensed under Creative Commons
1334             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1335             'info_dict': {
1336                 'id': 'M4gD1WSo5mA',
1337                 'ext': 'mp4',
1338                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1339                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1340                 'duration': 721,
1341                 'upload_date': '20150127',
1342                 'uploader_id': 'BerkmanCenter',
1343                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1344                 'uploader': 'The Berkman Klein Center for Internet & Society',
1345                 'license': 'Creative Commons Attribution license (reuse allowed)',
1346             },
1347             'params': {
1348                 'skip_download': True,
1349             },
1350         },
1351         {
1352             # Channel-like uploader_url
1353             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1354             'info_dict': {
1355                 'id': 'eQcmzGIKrzg',
1356                 'ext': 'mp4',
1357                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1358                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1359                 'duration': 4060,
1360                 'upload_date': '20151119',
1361                 'uploader': 'Bernie Sanders',
1362                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1363                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1364                 'license': 'Creative Commons Attribution license (reuse allowed)',
1365             },
1366             'params': {
1367                 'skip_download': True,
1368             },
1369         },
1370         {
1371             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1372             'only_matching': True,
1373         },
1374         {
1375             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1376             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1377             'only_matching': True,
1378         },
1379         {
1380             # Rental video preview
1381             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1382             'info_dict': {
1383                 'id': 'uGpuVWrhIzE',
1384                 'ext': 'mp4',
1385                 'title': 'Piku - Trailer',
1386                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1387                 'upload_date': '20150811',
1388                 'uploader': 'FlixMatrix',
1389                 'uploader_id': 'FlixMatrixKaravan',
1390                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1391                 'license': 'Standard YouTube License',
1392             },
1393             'params': {
1394                 'skip_download': True,
1395             },
1396             'skip': 'This video is not available.',
1397         },
1398         {
1399             # YouTube Red video with episode data
1400             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1401             'info_dict': {
1402                 'id': 'iqKdEhx-dD4',
1403                 'ext': 'mp4',
1404                 'title': 'Isolation - Mind Field (Ep 1)',
1405                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1406                 'duration': 2085,
1407                 'upload_date': '20170118',
1408                 'uploader': 'Vsauce',
1409                 'uploader_id': 'Vsauce',
1410                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1411                 'series': 'Mind Field',
1412                 'season_number': 1,
1413                 'episode_number': 1,
1414             },
1415             'params': {
1416                 'skip_download': True,
1417             },
1418             'expected_warnings': [
1419                 'Skipping DASH manifest',
1420             ],
1421         },
1422         {
1423             # The following content has been identified by the YouTube community
1424             # as inappropriate or offensive to some audiences.
1425             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1426             'info_dict': {
1427                 'id': '6SJNVb0GnPI',
1428                 'ext': 'mp4',
1429                 'title': 'Race Differences in Intelligence',
1430                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1431                 'duration': 965,
1432                 'upload_date': '20140124',
1433                 'uploader': 'New Century Foundation',
1434                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1435                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1436             },
1437             'params': {
1438                 'skip_download': True,
1439             },
1440             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1441         },
1442         {
1443             # itag 212
1444             'url': '1t24XAntNCY',
1445             'only_matching': True,
1446         },
1447         {
1448             # geo restricted to JP
1449             'url': 'sJL6WA-aGkQ',
1450             'only_matching': True,
1451         },
1452         {
1453             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1454             'only_matching': True,
1455         },
1456         {
1457             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1458             'only_matching': True,
1459         },
1460         {
1461             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1462             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1463             'only_matching': True,
1464         },
1465         {
1466             # DRM protected
1467             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1468             'only_matching': True,
1469         },
1470         {
1471             # Video with unsupported adaptive stream type formats
1472             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1473             'info_dict': {
1474                 'id': 'Z4Vy8R84T1U',
1475                 'ext': 'mp4',
1476                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1477                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1478                 'duration': 433,
1479                 'upload_date': '20130923',
1480                 'uploader': 'Amelia Putri Harwita',
1481                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1482                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1483                 'formats': 'maxcount:10',
1484             },
1485             'params': {
1486                 'skip_download': True,
1487                 'youtube_include_dash_manifest': False,
1488             },
1489             'skip': 'not actual anymore',
1490         },
1491         {
1492             # Youtube Music Auto-generated description
1493             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1494             'info_dict': {
1495                 'id': 'MgNrAu2pzNs',
1496                 'ext': 'mp4',
1497                 'title': 'Voyeur Girl',
1498                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1499                 'upload_date': '20190312',
1500                 'uploader': 'Stephen - Topic',
1501                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1502                 'artist': 'Stephen',
1503                 'track': 'Voyeur Girl',
1504                 'album': 'it\'s too much love to know my dear',
1505                 'release_date': '20190313',
1506                 'release_year': 2019,
1507             },
1508             'params': {
1509                 'skip_download': True,
1510             },
1511         },
1512         {
1513             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1514             'only_matching': True,
1515         },
1516         {
1517             # invalid -> valid video id redirection
1518             'url': 'DJztXj2GPfl',
1519             'info_dict': {
1520                 'id': 'DJztXj2GPfk',
1521                 'ext': 'mp4',
1522                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1523                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1524                 'upload_date': '20090125',
1525                 'uploader': 'Prochorowka',
1526                 'uploader_id': 'Prochorowka',
1527                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1528                 'artist': 'Panjabi MC',
1529                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1530                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1531             },
1532             'params': {
1533                 'skip_download': True,
1534             },
1535             'skip': 'Video unavailable',
1536         },
1537         {
1538             # empty description results in an empty string
1539             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1540             'info_dict': {
1541                 'id': 'x41yOUIvK2k',
1542                 'ext': 'mp4',
1543                 'title': 'IMG 3456',
1544                 'description': '',
1545                 'upload_date': '20170613',
1546                 'uploader_id': 'ElevageOrVert',
1547                 'uploader': 'ElevageOrVert',
1548             },
1549             'params': {
1550                 'skip_download': True,
1551             },
1552         },
1553         {
1554             # with '};' inside yt initial data (see [1])
1555             # see [2] for an example with '};' inside ytInitialPlayerResponse
1556             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1557             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1558             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1559             'info_dict': {
1560                 'id': 'CHqg6qOn4no',
1561                 'ext': 'mp4',
1562                 'title': 'Part 77   Sort a list of simple types in c#',
1563                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1564                 'upload_date': '20130831',
1565                 'uploader_id': 'kudvenkat',
1566                 'uploader': 'kudvenkat',
1567             },
1568             'params': {
1569                 'skip_download': True,
1570             },
1571         },
1572         {
1573             # another example of '};' in ytInitialData
1574             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1575             'only_matching': True,
1576         },
1577         {
1578             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1579             'only_matching': True,
1580         },
1581         {
1582             # https://github.com/ytdl-org/youtube-dl/pull/28094
1583             'url': 'OtqTfy26tG0',
1584             'info_dict': {
1585                 'id': 'OtqTfy26tG0',
1586                 'ext': 'mp4',
1587                 'title': 'Burn Out',
1588                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1589                 'upload_date': '20141120',
1590                 'uploader': 'The Cinematic Orchestra - Topic',
1591                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1592                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1593                 'artist': 'The Cinematic Orchestra',
1594                 'track': 'Burn Out',
1595                 'album': 'Every Day',
1596                 'release_data': None,
1597                 'release_year': None,
1598             },
1599             'params': {
1600                 'skip_download': True,
1601             },
1602         },
1603         {
1604             # controversial video, only works with bpctr when authenticated with cookies
1605             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1606             'only_matching': True,
1607         },
1608         {
1609             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1610             'url': 'cBvYw8_A0vQ',
1611             'info_dict': {
1612                 'id': 'cBvYw8_A0vQ',
1613                 'ext': 'mp4',
1614                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1615                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1616                 'upload_date': '20201120',
1617                 'uploader': 'Walk around Japan',
1618                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1619                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1620             },
1621             'params': {
1622                 'skip_download': True,
1623             },
1624         }, {
1625             # Has multiple audio streams
1626             'url': 'WaOKSUlf4TM',
1627             'only_matching': True
1628         }, {
1629             # Requires Premium: has format 141 when requested using YTM url
1630             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1631             'only_matching': True
1632         }, {
1633             # multiple subtitles with same lang_code
1634             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1635             'only_matching': True,
1636         }, {
1637             # Force use android client fallback
1638             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1639             'info_dict': {
1640                 'id': 'YOelRv7fMxY',
1641                 'title': 'Digging a Secret Tunnel from my Workshop',
1642                 'ext': '3gp',
1643                 'upload_date': '20210624',
1644                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1645                 'uploader': 'colinfurze',
1646                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1647                 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1648             },
1649             'params': {
1650                 'format': '17',  # 3gp format available on android
1651                 'extractor_args': {'youtube': {'player_client': ['android']}},
1652             },
1653         },
1654         {
1655             # Skip download of additional client configs (remix client config in this case)
1656             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1657             'only_matching': True,
1658             'params': {
1659                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1660             },
1661         }
1662     ]
1663
1664     @classmethod
1665     def suitable(cls, url):
1666         # Hack for lazy extractors until more generic solution is implemented
1667         # (see #28780)
1668         from .youtube import parse_qs
1669         qs = parse_qs(url)
1670         if qs.get('list', [None])[0]:
1671             return False
1672         return super(YoutubeIE, cls).suitable(url)
1673
1674     def __init__(self, *args, **kwargs):
1675         super(YoutubeIE, self).__init__(*args, **kwargs)
1676         self._code_cache = {}
1677         self._player_cache = {}
1678
1679     def _extract_player_url(self, ytcfg=None, webpage=None):
1680         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1681         if not player_url:
1682             player_url = self._search_regex(
1683                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1684                 webpage, 'player URL', fatal=False)
1685         if player_url.startswith('//'):
1686             player_url = 'https:' + player_url
1687         elif not re.match(r'https?://', player_url):
1688             player_url = compat_urlparse.urljoin(
1689                 'https://www.youtube.com', player_url)
1690         return player_url
1691
1692     def _signature_cache_id(self, example_sig):
1693         """ Return a string representation of a signature """
1694         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1695
1696     @classmethod
1697     def _extract_player_info(cls, player_url):
1698         for player_re in cls._PLAYER_INFO_RE:
1699             id_m = re.search(player_re, player_url)
1700             if id_m:
1701                 break
1702         else:
1703             raise ExtractorError('Cannot identify player %r' % player_url)
1704         return id_m.group('id')
1705
1706     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1707         player_id = self._extract_player_info(player_url)
1708         if player_id not in self._code_cache:
1709             self._code_cache[player_id] = self._download_webpage(
1710                 player_url, video_id, fatal=fatal,
1711                 note='Downloading player ' + player_id,
1712                 errnote='Download of %s failed' % player_url)
1713         return player_id in self._code_cache
1714
1715     def _extract_signature_function(self, video_id, player_url, example_sig):
1716         player_id = self._extract_player_info(player_url)
1717
1718         # Read from filesystem cache
1719         func_id = 'js_%s_%s' % (
1720             player_id, self._signature_cache_id(example_sig))
1721         assert os.path.basename(func_id) == func_id
1722
1723         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1724         if cache_spec is not None:
1725             return lambda s: ''.join(s[i] for i in cache_spec)
1726
1727         if self._load_player(video_id, player_url):
1728             code = self._code_cache[player_id]
1729             res = self._parse_sig_js(code)
1730
1731             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1732             cache_res = res(test_string)
1733             cache_spec = [ord(c) for c in cache_res]
1734
1735             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1736             return res
1737
1738     def _print_sig_code(self, func, example_sig):
1739         def gen_sig_code(idxs):
1740             def _genslice(start, end, step):
1741                 starts = '' if start == 0 else str(start)
1742                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1743                 steps = '' if step == 1 else (':%d' % step)
1744                 return 's[%s%s%s]' % (starts, ends, steps)
1745
1746             step = None
1747             # Quelch pyflakes warnings - start will be set when step is set
1748             start = '(Never used)'
1749             for i, prev in zip(idxs[1:], idxs[:-1]):
1750                 if step is not None:
1751                     if i - prev == step:
1752                         continue
1753                     yield _genslice(start, prev, step)
1754                     step = None
1755                     continue
1756                 if i - prev in [-1, 1]:
1757                     step = i - prev
1758                     start = prev
1759                     continue
1760                 else:
1761                     yield 's[%d]' % prev
1762             if step is None:
1763                 yield 's[%d]' % i
1764             else:
1765                 yield _genslice(start, i, step)
1766
1767         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1768         cache_res = func(test_string)
1769         cache_spec = [ord(c) for c in cache_res]
1770         expr_code = ' + '.join(gen_sig_code(cache_spec))
1771         signature_id_tuple = '(%s)' % (
1772             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1773         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1774                 '    return %s\n') % (signature_id_tuple, expr_code)
1775         self.to_screen('Extracted signature function:\n' + code)
1776
1777     def _parse_sig_js(self, jscode):
1778         funcname = self._search_regex(
1779             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1780              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1781              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1782              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1783              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1784              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1785              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1786              # Obsolete patterns
1787              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1788              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1789              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1790              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1791              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1792              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1793              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1794              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1795             jscode, 'Initial JS player signature function name', group='sig')
1796
1797         jsi = JSInterpreter(jscode)
1798         initial_function = jsi.extract_function(funcname)
1799         return lambda s: initial_function([s])
1800
1801     def _decrypt_signature(self, s, video_id, player_url):
1802         """Turn the encrypted s field into a working signature"""
1803
1804         if player_url is None:
1805             raise ExtractorError('Cannot decrypt signature without player_url')
1806
1807         try:
1808             player_id = (player_url, self._signature_cache_id(s))
1809             if player_id not in self._player_cache:
1810                 func = self._extract_signature_function(
1811                     video_id, player_url, s
1812                 )
1813                 self._player_cache[player_id] = func
1814             func = self._player_cache[player_id]
1815             if self.get_param('youtube_print_sig_code'):
1816                 self._print_sig_code(func, s)
1817             return func(s)
1818         except Exception as e:
1819             tb = traceback.format_exc()
1820             raise ExtractorError(
1821                 'Signature extraction failed: ' + tb, cause=e)
1822
1823     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1824         """
1825         Extract signatureTimestamp (sts)
1826         Required to tell API what sig/player version is in use.
1827         """
1828         sts = None
1829         if isinstance(ytcfg, dict):
1830             sts = int_or_none(ytcfg.get('STS'))
1831
1832         if not sts:
1833             # Attempt to extract from player
1834             if player_url is None:
1835                 error_msg = 'Cannot extract signature timestamp without player_url.'
1836                 if fatal:
1837                     raise ExtractorError(error_msg)
1838                 self.report_warning(error_msg)
1839                 return
1840             if self._load_player(video_id, player_url, fatal=fatal):
1841                 player_id = self._extract_player_info(player_url)
1842                 code = self._code_cache[player_id]
1843                 sts = int_or_none(self._search_regex(
1844                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1845                     'JS player signature timestamp', group='sts', fatal=fatal))
1846         return sts
1847
1848     def _mark_watched(self, video_id, player_response):
1849         playback_url = url_or_none(try_get(
1850             player_response,
1851             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
1852         if not playback_url:
1853             return
1854         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1855         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1856
1857         # cpn generation algorithm is reverse engineered from base.js.
1858         # In fact it works even with dummy cpn.
1859         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1860         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1861
1862         qs.update({
1863             'ver': ['2'],
1864             'cpn': [cpn],
1865         })
1866         playback_url = compat_urlparse.urlunparse(
1867             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1868
1869         self._download_webpage(
1870             playback_url, video_id, 'Marking watched',
1871             'Unable to mark watched', fatal=False)
1872
1873     @staticmethod
1874     def _extract_urls(webpage):
1875         # Embedded YouTube player
1876         entries = [
1877             unescapeHTML(mobj.group('url'))
1878             for mobj in re.finditer(r'''(?x)
1879             (?:
1880                 <iframe[^>]+?src=|
1881                 data-video-url=|
1882                 <embed[^>]+?src=|
1883                 embedSWF\(?:\s*|
1884                 <object[^>]+data=|
1885                 new\s+SWFObject\(
1886             )
1887             (["\'])
1888                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1889                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1890             \1''', webpage)]
1891
1892         # lazyYT YouTube embed
1893         entries.extend(list(map(
1894             unescapeHTML,
1895             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1896
1897         # Wordpress "YouTube Video Importer" plugin
1898         matches = re.findall(r'''(?x)<div[^>]+
1899             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1900             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1901         entries.extend(m[-1] for m in matches)
1902
1903         return entries
1904
1905     @staticmethod
1906     def _extract_url(webpage):
1907         urls = YoutubeIE._extract_urls(webpage)
1908         return urls[0] if urls else None
1909
1910     @classmethod
1911     def extract_id(cls, url):
1912         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1913         if mobj is None:
1914             raise ExtractorError('Invalid URL: %s' % url)
1915         video_id = mobj.group(2)
1916         return video_id
1917
1918     def _extract_chapters_from_json(self, data, video_id, duration):
1919         chapters_list = try_get(
1920             data,
1921             lambda x: x['playerOverlays']
1922                        ['playerOverlayRenderer']
1923                        ['decoratedPlayerBarRenderer']
1924                        ['decoratedPlayerBarRenderer']
1925                        ['playerBar']
1926                        ['chapteredPlayerBarRenderer']
1927                        ['chapters'],
1928             list)
1929         if not chapters_list:
1930             return
1931
1932         def chapter_time(chapter):
1933             return float_or_none(
1934                 try_get(
1935                     chapter,
1936                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1937                     int),
1938                 scale=1000)
1939         chapters = []
1940         for next_num, chapter in enumerate(chapters_list, start=1):
1941             start_time = chapter_time(chapter)
1942             if start_time is None:
1943                 continue
1944             end_time = (chapter_time(chapters_list[next_num])
1945                         if next_num < len(chapters_list) else duration)
1946             if end_time is None:
1947                 continue
1948             title = try_get(
1949                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1950                 compat_str)
1951             chapters.append({
1952                 'start_time': start_time,
1953                 'end_time': end_time,
1954                 'title': title,
1955             })
1956         return chapters
1957
1958     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1959         return self._parse_json(self._search_regex(
1960             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1961              regex), webpage, name, default='{}'), video_id, fatal=False)
1962
1963     @staticmethod
1964     def parse_time_text(time_text):
1965         """
1966         Parse the comment time text
1967         time_text is in the format 'X units ago (edited)'
1968         """
1969         time_text_split = time_text.split(' ')
1970         if len(time_text_split) >= 3:
1971             return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1972
1973     @staticmethod
1974     def _join_text_entries(runs):
1975         text = None
1976         for run in runs:
1977             if not isinstance(run, dict):
1978                 continue
1979             sub_text = try_get(run, lambda x: x['text'], compat_str)
1980             if sub_text:
1981                 if not text:
1982                     text = sub_text
1983                     continue
1984                 text += sub_text
1985         return text
1986
1987     def _extract_comment(self, comment_renderer, parent=None):
1988         comment_id = comment_renderer.get('commentId')
1989         if not comment_id:
1990             return
1991         comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1992         text = self._join_text_entries(comment_text_runs) or ''
1993         comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1994         time_text = self._join_text_entries(comment_time_text)
1995         timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
1996         author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1997         author_id = try_get(comment_renderer,
1998                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1999         votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2000                                                       lambda x: x['likeCount']), compat_str)) or 0
2001         author_thumbnail = try_get(comment_renderer,
2002                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2003
2004         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2005         is_liked = try_get(comment_renderer, lambda x: x['isLiked'], bool)
2006         return {
2007             'id': comment_id,
2008             'text': text,
2009             'timestamp': timestamp,
2010             'time_text': time_text,
2011             'like_count': votes,
2012             'is_favorited': is_liked,
2013             'author': author,
2014             'author_id': author_id,
2015             'author_thumbnail': author_thumbnail,
2016             'author_is_uploader': author_is_uploader,
2017             'parent': parent or 'root'
2018         }
2019
2020     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2021                          ytcfg, video_id, parent=None, comment_counts=None):
2022
2023         def extract_header(contents):
2024             _total_comments = 0
2025             _continuation = None
2026             for content in contents:
2027                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2028                 expected_comment_count = try_get(comments_header_renderer,
2029                                                  (lambda x: x['countText']['runs'][0]['text'],
2030                                                   lambda x: x['commentsCount']['runs'][0]['text']),
2031                                                  compat_str)
2032                 if expected_comment_count:
2033                     comment_counts[1] = str_to_int(expected_comment_count)
2034                     self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
2035                     _total_comments = comment_counts[1]
2036                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2037                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2038
2039                 sort_menu_item = try_get(
2040                     comments_header_renderer,
2041                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2042                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2043
2044                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2045                 if not _continuation:
2046                     continue
2047
2048                 sort_text = sort_menu_item.get('title')
2049                 if isinstance(sort_text, compat_str):
2050                     sort_text = sort_text.lower()
2051                 else:
2052                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2053                 self.to_screen('Sorting comments by %s' % sort_text)
2054                 break
2055             return _total_comments, _continuation
2056
2057         def extract_thread(contents):
2058             if not parent:
2059                 comment_counts[2] = 0
2060             for content in contents:
2061                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2062                 comment_renderer = try_get(
2063                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2064                     content, (lambda x: x['commentRenderer'], dict))
2065
2066                 if not comment_renderer:
2067                     continue
2068                 comment = self._extract_comment(comment_renderer, parent)
2069                 if not comment:
2070                     continue
2071                 comment_counts[0] += 1
2072                 yield comment
2073                 # Attempt to get the replies
2074                 comment_replies_renderer = try_get(
2075                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2076
2077                 if comment_replies_renderer:
2078                     comment_counts[2] += 1
2079                     comment_entries_iter = self._comment_entries(
2080                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2081                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2082
2083                     for reply_comment in comment_entries_iter:
2084                         yield reply_comment
2085
2086         # YouTube comments have a max depth of 2
2087         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2088         if max_depth == 1 and parent:
2089             return
2090         if not comment_counts:
2091             # comment so far, est. total comments, current comment thread #
2092             comment_counts = [0, 0, 0]
2093
2094         continuation = self._extract_continuation(root_continuation_data)
2095         if continuation and len(continuation['ctoken']) < 27:
2096             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2097             continuation_token = self._generate_comment_continuation(video_id)
2098             continuation = self._build_continuation_query(continuation_token, None)
2099
2100         visitor_data = None
2101         is_first_continuation = parent is None
2102
2103         for page_num in itertools.count(0):
2104             if not continuation:
2105                 break
2106             headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2107             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2108             if page_num == 0:
2109                 if is_first_continuation:
2110                     note_prefix = 'Downloading comment section API JSON'
2111                 else:
2112                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2113                         comment_counts[2], comment_prog_str)
2114             else:
2115                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2116                     '       ' if parent else '', ' replies' if parent else '',
2117                     page_num, comment_prog_str)
2118
2119             response = self._extract_response(
2120                 item_id=None, query=self._continuation_query_ajax_to_api(continuation),
2121                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2122                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2123             if not response:
2124                 break
2125             visitor_data = try_get(
2126                 response,
2127                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2128                 compat_str) or visitor_data
2129
2130             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2131
2132             continuation = None
2133             if isinstance(continuation_contents, list):
2134                 for continuation_section in continuation_contents:
2135                     if not isinstance(continuation_section, dict):
2136                         continue
2137                     continuation_items = try_get(
2138                         continuation_section,
2139                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2140                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2141                         list) or []
2142                     if is_first_continuation:
2143                         total_comments, continuation = extract_header(continuation_items)
2144                         if total_comments:
2145                             yield total_comments
2146                         is_first_continuation = False
2147                         if continuation:
2148                             break
2149                         continue
2150                     count = 0
2151                     for count, entry in enumerate(extract_thread(continuation_items)):
2152                         yield entry
2153                     continuation = self._extract_continuation({'contents': continuation_items})
2154                     if continuation:
2155                         # Sometimes YouTube provides a continuation without any comments
2156                         # In most cases we end up just downloading these with very little comments to come.
2157                         if count == 0:
2158                             if not parent:
2159                                 self.report_warning('No comments received - assuming end of comments')
2160                             continuation = None
2161                         break
2162
2163             # Deprecated response structure
2164             elif isinstance(continuation_contents, dict):
2165                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2166                 for key, continuation_renderer in continuation_contents.items():
2167                     if key not in known_continuation_renderers:
2168                         continue
2169                     if not isinstance(continuation_renderer, dict):
2170                         continue
2171                     if is_first_continuation:
2172                         header_continuation_items = [continuation_renderer.get('header') or {}]
2173                         total_comments, continuation = extract_header(header_continuation_items)
2174                         if total_comments:
2175                             yield total_comments
2176                         is_first_continuation = False
2177                         if continuation:
2178                             break
2179
2180                     # Sometimes YouTube provides a continuation without any comments
2181                     # In most cases we end up just downloading these with very little comments to come.
2182                     count = 0
2183                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2184                         yield entry
2185                     continuation = self._extract_continuation(continuation_renderer)
2186                     if count == 0:
2187                         if not parent:
2188                             self.report_warning('No comments received - assuming end of comments')
2189                         continuation = None
2190                     break
2191
2192     @staticmethod
2193     def _generate_comment_continuation(video_id):
2194         """
2195         Generates initial comment section continuation token from given video id
2196         """
2197         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2198         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2199         new_continuation_intlist = list(itertools.chain.from_iterable(
2200             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2201         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2202
2203     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2204         """Entry for comment extraction"""
2205         def _real_comment_extract(contents):
2206             if isinstance(contents, list):
2207                 for entry in contents:
2208                     for key, renderer in entry.items():
2209                         if key not in known_entry_comment_renderers:
2210                             continue
2211                         yield from self._comment_entries(
2212                             renderer, video_id=video_id, ytcfg=ytcfg,
2213                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2214                             account_syncid=self._extract_account_syncid(ytcfg))
2215                         break
2216         comments = []
2217         known_entry_comment_renderers = ('itemSectionRenderer',)
2218         estimated_total = 0
2219         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2220
2221         try:
2222             for comment in _real_comment_extract(contents):
2223                 if len(comments) >= max_comments:
2224                     break
2225                 if isinstance(comment, int):
2226                     estimated_total = comment
2227                     continue
2228                 comments.append(comment)
2229         except KeyboardInterrupt:
2230             self.to_screen('Interrupted by user')
2231         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2232         return {
2233             'comments': comments,
2234             'comment_count': len(comments),
2235         }
2236
2237     @staticmethod
2238     def _generate_player_context(sts=None):
2239         context = {
2240             'html5Preference': 'HTML5_PREF_WANTS',
2241         }
2242         if sts is not None:
2243             context['signatureTimestamp'] = sts
2244         return {
2245             'playbackContext': {
2246                 'contentPlaybackContext': context
2247             }
2248         }
2249
2250     @staticmethod
2251     def _get_video_info_params(video_id):
2252         return {
2253             'video_id': video_id,
2254             'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2255             'html5': '1',
2256             'c': 'TVHTML5',
2257             'cver': '6.20180913',
2258         }
2259
2260     def _real_extract(self, url):
2261         url, smuggled_data = unsmuggle_url(url, {})
2262         video_id = self._match_id(url)
2263
2264         is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2265
2266         base_url = self.http_scheme() + '//www.youtube.com/'
2267         webpage_url = base_url + 'watch?v=' + video_id
2268         webpage = self._download_webpage(
2269             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2270
2271         ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2272         identity_token = self._extract_identity_token(webpage, video_id)
2273         syncid = self._extract_account_syncid(ytcfg)
2274         headers = self._generate_api_headers(ytcfg, identity_token, syncid)
2275
2276         player_url = self._extract_player_url(ytcfg, webpage)
2277
2278         player_client = self._configuration_arg('player_client', [''])[0]
2279         if player_client not in ('web', 'android', ''):
2280             self.report_warning(f'Invalid player_client {player_client} given. Falling back to WEB')
2281         force_mobile_client = player_client == 'android'
2282         player_skip = self._configuration_arg('player_skip')
2283
2284         def get_text(x):
2285             if not x:
2286                 return
2287             text = x.get('simpleText')
2288             if text and isinstance(text, compat_str):
2289                 return text
2290             runs = x.get('runs')
2291             if not isinstance(runs, list):
2292                 return
2293             return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
2294
2295         ytm_streaming_data = {}
2296         if is_music_url:
2297             ytm_webpage = None
2298             sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2299             if sts and not force_mobile_client and 'configs' not in player_skip:
2300                 ytm_webpage = self._download_webpage(
2301                     'https://music.youtube.com',
2302                     video_id, fatal=False, note='Downloading remix client config')
2303
2304             ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2305             ytm_client = 'WEB_REMIX'
2306             if not sts or force_mobile_client:
2307                 # Android client already has signature descrambled
2308                 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2309                 if not sts:
2310                     self.report_warning('Falling back to mobile remix client for player API.')
2311                 ytm_client = 'ANDROID_MUSIC'
2312                 ytm_cfg = {}
2313
2314             ytm_headers = self._generate_api_headers(
2315                 ytm_cfg, identity_token, syncid,
2316                 client=ytm_client)
2317             ytm_query = {'videoId': video_id}
2318             ytm_query.update(self._generate_player_context(sts))
2319
2320             ytm_player_response = self._extract_response(
2321                 item_id=video_id, ep='player', query=ytm_query,
2322                 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2323                 default_client=ytm_client,
2324                 note='Downloading %sremix player API JSON' % ('mobile ' if force_mobile_client else ''))
2325             ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
2326
2327         player_response = None
2328         if webpage:
2329             player_response = self._extract_yt_initial_variable(
2330                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2331                 video_id, 'initial player response')
2332
2333         if not player_response or force_mobile_client:
2334             sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2335             yt_client = 'WEB'
2336             ytpcfg = ytcfg
2337             ytp_headers = headers
2338             if not sts or force_mobile_client:
2339                 # Android client already has signature descrambled
2340                 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2341                 if not sts:
2342                     self.report_warning('Falling back to mobile client for player API.')
2343                 yt_client = 'ANDROID'
2344                 ytpcfg = {}
2345                 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)
2346
2347             yt_query = {'videoId': video_id}
2348             yt_query.update(self._generate_player_context(sts))
2349             player_response = self._extract_response(
2350                 item_id=video_id, ep='player', query=yt_query,
2351                 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2352                 default_client=yt_client,
2353                 note='Downloading %splayer API JSON' % ('mobile ' if force_mobile_client else '')
2354             )
2355
2356         # Age-gate workarounds
2357         playability_status = player_response.get('playabilityStatus') or {}
2358         if playability_status.get('reason') in self._AGE_GATE_REASONS:
2359             pr = self._parse_json(try_get(compat_parse_qs(
2360                 self._download_webpage(
2361                     base_url + 'get_video_info', video_id,
2362                     'Refetching age-gated info webpage', 'unable to download video info webpage',
2363                     query=self._get_video_info_params(video_id), fatal=False)),
2364                 lambda x: x['player_response'][0],
2365                 compat_str) or '{}', video_id)
2366             if not pr:
2367                 self.report_warning('Falling back to embedded-only age-gate workaround.')
2368                 embed_webpage = None
2369                 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2370                 if sts and not force_mobile_client and 'configs' not in player_skip:
2371                     embed_webpage = self._download_webpage(
2372                         'https://www.youtube.com/embed/%s?html5=1' % video_id,
2373                         video_id=video_id, note='Downloading age-gated embed config')
2374
2375                 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2376                 # If we extracted the embed webpage, it'll tell us if we can view the video
2377                 embedded_pr = self._parse_json(
2378                     try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2379                     video_id=video_id)
2380                 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2381                 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2382                     yt_client = 'WEB_EMBEDDED_PLAYER'
2383                     if not sts or force_mobile_client:
2384                         # Android client already has signature descrambled
2385                         # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2386                         if not sts:
2387                             self.report_warning(
2388                                 'Falling back to mobile embedded client for player API (note: some formats may be missing).')
2389                         yt_client = 'ANDROID_EMBEDDED_PLAYER'
2390                         ytcfg_age = {}
2391
2392                     ytage_headers = self._generate_api_headers(
2393                         ytcfg_age, identity_token, syncid, client=yt_client)
2394                     yt_age_query = {'videoId': video_id}
2395                     yt_age_query.update(self._generate_player_context(sts))
2396                     pr = self._extract_response(
2397                         item_id=video_id, ep='player', query=yt_age_query,
2398                         ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2399                         default_client=yt_client,
2400                         note='Downloading %sage-gated player API JSON' % ('mobile ' if force_mobile_client else '')
2401                     ) or {}
2402
2403             if pr:
2404                 player_response = pr
2405
2406         trailer_video_id = try_get(
2407             playability_status,
2408             lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2409             compat_str)
2410         if trailer_video_id:
2411             return self.url_result(
2412                 trailer_video_id, self.ie_key(), trailer_video_id)
2413
2414         search_meta = (
2415             lambda x: self._html_search_meta(x, webpage, default=None)) \
2416             if webpage else lambda x: None
2417
2418         video_details = player_response.get('videoDetails') or {}
2419         microformat = try_get(
2420             player_response,
2421             lambda x: x['microformat']['playerMicroformatRenderer'],
2422             dict) or {}
2423         video_title = video_details.get('title') \
2424             or get_text(microformat.get('title')) \
2425             or search_meta(['og:title', 'twitter:title', 'title'])
2426         video_description = video_details.get('shortDescription')
2427
2428         if not smuggled_data.get('force_singlefeed', False):
2429             if not self.get_param('noplaylist'):
2430                 multifeed_metadata_list = try_get(
2431                     player_response,
2432                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2433                     compat_str)
2434                 if multifeed_metadata_list:
2435                     entries = []
2436                     feed_ids = []
2437                     for feed in multifeed_metadata_list.split(','):
2438                         # Unquote should take place before split on comma (,) since textual
2439                         # fields may contain comma as well (see
2440                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2441                         feed_data = compat_parse_qs(
2442                             compat_urllib_parse_unquote_plus(feed))
2443
2444                         def feed_entry(name):
2445                             return try_get(
2446                                 feed_data, lambda x: x[name][0], compat_str)
2447
2448                         feed_id = feed_entry('id')
2449                         if not feed_id:
2450                             continue
2451                         feed_title = feed_entry('title')
2452                         title = video_title
2453                         if feed_title:
2454                             title += ' (%s)' % feed_title
2455                         entries.append({
2456                             '_type': 'url_transparent',
2457                             'ie_key': 'Youtube',
2458                             'url': smuggle_url(
2459                                 base_url + 'watch?v=' + feed_data['id'][0],
2460                                 {'force_singlefeed': True}),
2461                             'title': title,
2462                         })
2463                         feed_ids.append(feed_id)
2464                     self.to_screen(
2465                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2466                         % (', '.join(feed_ids), video_id))
2467                     return self.playlist_result(
2468                         entries, video_id, video_title, video_description)
2469             else:
2470                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2471
2472         formats, itags, stream_ids = [], [], []
2473         itag_qualities = {}
2474         q = qualities([
2475             # "tiny" is the smallest video-only format. But some audio-only formats
2476             # was also labeled "tiny". It is not clear if such formats still exist
2477             'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2478             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2479         ])
2480
2481         streaming_data = player_response.get('streamingData') or {}
2482         streaming_formats = streaming_data.get('formats') or []
2483         streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
2484         streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2485         streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2486
2487         for fmt in streaming_formats:
2488             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2489                 continue
2490
2491             itag = str_or_none(fmt.get('itag'))
2492             audio_track = fmt.get('audioTrack') or {}
2493             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2494             if stream_id in stream_ids:
2495                 continue
2496
2497             quality = fmt.get('quality')
2498             if quality == 'tiny' or not quality:
2499                 quality = fmt.get('audioQuality', '').lower() or quality
2500             if itag and quality:
2501                 itag_qualities[itag] = quality
2502             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2503             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2504             # number of fragment that would subsequently requested with (`&sq=N`)
2505             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2506                 continue
2507
2508             fmt_url = fmt.get('url')
2509             if not fmt_url:
2510                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2511                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2512                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2513                 if not (sc and fmt_url and encrypted_sig):
2514                     continue
2515                 if not player_url:
2516                     continue
2517                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2518                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2519                 fmt_url += '&' + sp + '=' + signature
2520
2521             if itag:
2522                 itags.append(itag)
2523                 stream_ids.append(stream_id)
2524
2525             tbr = float_or_none(
2526                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2527             dct = {
2528                 'asr': int_or_none(fmt.get('audioSampleRate')),
2529                 'filesize': int_or_none(fmt.get('contentLength')),
2530                 'format_id': itag,
2531                 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
2532                 'fps': int_or_none(fmt.get('fps')),
2533                 'height': int_or_none(fmt.get('height')),
2534                 'quality': q(quality),
2535                 'tbr': tbr,
2536                 'url': fmt_url,
2537                 'width': fmt.get('width'),
2538                 'language': audio_track.get('id', '').split('.')[0],
2539             }
2540             mime_mobj = re.match(
2541                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2542             if mime_mobj:
2543                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2544                 dct.update(parse_codecs(mime_mobj.group(2)))
2545                 # The 3gp format in android client has a quality of "small",
2546                 # but is actually worse than all other formats
2547                 if dct['ext'] == '3gp':
2548                     dct['quality'] = q('tiny')
2549             no_audio = dct.get('acodec') == 'none'
2550             no_video = dct.get('vcodec') == 'none'
2551             if no_audio:
2552                 dct['vbr'] = tbr
2553             if no_video:
2554                 dct['abr'] = tbr
2555             if no_audio or no_video:
2556                 dct['downloader_options'] = {
2557                     # Youtube throttles chunks >~10M
2558                     'http_chunk_size': 10485760,
2559                 }
2560                 if dct.get('ext'):
2561                     dct['container'] = dct['ext'] + '_dash'
2562             formats.append(dct)
2563
2564         skip_manifests = self._configuration_arg('skip')
2565         get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2566         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2567
2568         for sd in (streaming_data, ytm_streaming_data):
2569             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2570             if hls_manifest_url:
2571                 for f in self._extract_m3u8_formats(
2572                         hls_manifest_url, video_id, 'mp4', fatal=False):
2573                     itag = self._search_regex(
2574                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2575                     if itag:
2576                         f['format_id'] = itag
2577                     formats.append(f)
2578
2579             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2580             if dash_manifest_url:
2581                 for f in self._extract_mpd_formats(
2582                         dash_manifest_url, video_id, fatal=False):
2583                     itag = f['format_id']
2584                     if itag in itags:
2585                         continue
2586                     if itag in itag_qualities:
2587                         f['quality'] = q(itag_qualities[itag])
2588                     filesize = int_or_none(self._search_regex(
2589                         r'/clen/(\d+)', f.get('fragment_base_url')
2590                         or f['url'], 'file size', default=None))
2591                     if filesize:
2592                         f['filesize'] = filesize
2593                     formats.append(f)
2594
2595         if not formats:
2596             if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
2597                 self.raise_no_formats(
2598                     'This video is DRM protected.', expected=True)
2599             pemr = try_get(
2600                 playability_status,
2601                 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2602                 dict) or {}
2603             reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2604             subreason = pemr.get('subreason')
2605             if subreason:
2606                 subreason = clean_html(get_text(subreason))
2607                 if subreason == 'The uploader has not made this video available in your country.':
2608                     countries = microformat.get('availableCountries')
2609                     if not countries:
2610                         regions_allowed = search_meta('regionsAllowed')
2611                         countries = regions_allowed.split(',') if regions_allowed else None
2612                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2613                 reason += '\n' + subreason
2614             if reason:
2615                 self.raise_no_formats(reason, expected=True)
2616
2617         self._sort_formats(formats)
2618
2619         keywords = video_details.get('keywords') or []
2620         if not keywords and webpage:
2621             keywords = [
2622                 unescapeHTML(m.group('content'))
2623                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2624         for keyword in keywords:
2625             if keyword.startswith('yt:stretch='):
2626                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2627                 if mobj:
2628                     # NB: float is intentional for forcing float division
2629                     w, h = (float(v) for v in mobj.groups())
2630                     if w > 0 and h > 0:
2631                         ratio = w / h
2632                         for f in formats:
2633                             if f.get('vcodec') != 'none':
2634                                 f['stretched_ratio'] = ratio
2635                         break
2636
2637         thumbnails = []
2638         for container in (video_details, microformat):
2639             for thumbnail in (try_get(
2640                     container,
2641                     lambda x: x['thumbnail']['thumbnails'], list) or []):
2642                 thumbnail_url = thumbnail.get('url')
2643                 if not thumbnail_url:
2644                     continue
2645                 # Sometimes youtube gives a wrong thumbnail URL. See:
2646                 # https://github.com/yt-dlp/yt-dlp/issues/233
2647                 # https://github.com/ytdl-org/youtube-dl/issues/28023
2648                 if 'maxresdefault' in thumbnail_url:
2649                     thumbnail_url = thumbnail_url.split('?')[0]
2650                 thumbnails.append({
2651                     'url': thumbnail_url,
2652                     'height': int_or_none(thumbnail.get('height')),
2653                     'width': int_or_none(thumbnail.get('width')),
2654                     'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2655                 })
2656         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2657         if thumbnail_url:
2658             thumbnails.append({
2659                 'url': thumbnail_url,
2660                 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2661             })
2662         # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2663         # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2664         thumbnails.append({
2665             'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2666             'preference': 1,
2667         })
2668         self._remove_duplicate_formats(thumbnails)
2669
2670         category = microformat.get('category') or search_meta('genre')
2671         channel_id = video_details.get('channelId') \
2672             or microformat.get('externalChannelId') \
2673             or search_meta('channelId')
2674         duration = int_or_none(
2675             video_details.get('lengthSeconds')
2676             or microformat.get('lengthSeconds')) \
2677             or parse_duration(search_meta('duration'))
2678         is_live = video_details.get('isLive')
2679         is_upcoming = video_details.get('isUpcoming')
2680         owner_profile_url = microformat.get('ownerProfileUrl')
2681
2682         info = {
2683             'id': video_id,
2684             'title': self._live_title(video_title) if is_live else video_title,
2685             'formats': formats,
2686             'thumbnails': thumbnails,
2687             'description': video_description,
2688             'upload_date': unified_strdate(
2689                 microformat.get('uploadDate')
2690                 or search_meta('uploadDate')),
2691             'uploader': video_details['author'],
2692             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2693             'uploader_url': owner_profile_url,
2694             'channel_id': channel_id,
2695             'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2696             'duration': duration,
2697             'view_count': int_or_none(
2698                 video_details.get('viewCount')
2699                 or microformat.get('viewCount')
2700                 or search_meta('interactionCount')),
2701             'average_rating': float_or_none(video_details.get('averageRating')),
2702             'age_limit': 18 if (
2703                 microformat.get('isFamilySafe') is False
2704                 or search_meta('isFamilyFriendly') == 'false'
2705                 or search_meta('og:restrictions:age') == '18+') else 0,
2706             'webpage_url': webpage_url,
2707             'categories': [category] if category else None,
2708             'tags': keywords,
2709             'is_live': is_live,
2710             'playable_in_embed': playability_status.get('playableInEmbed'),
2711             'was_live': video_details.get('isLiveContent'),
2712         }
2713
2714         pctr = try_get(
2715             player_response,
2716             lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2717         subtitles = {}
2718         if pctr:
2719             def process_language(container, base_url, lang_code, sub_name, query):
2720                 lang_subs = container.setdefault(lang_code, [])
2721                 for fmt in self._SUBTITLE_FORMATS:
2722                     query.update({
2723                         'fmt': fmt,
2724                     })
2725                     lang_subs.append({
2726                         'ext': fmt,
2727                         'url': update_url_query(base_url, query),
2728                         'name': sub_name,
2729                     })
2730
2731             for caption_track in (pctr.get('captionTracks') or []):
2732                 base_url = caption_track.get('baseUrl')
2733                 if not base_url:
2734                     continue
2735                 if caption_track.get('kind') != 'asr':
2736                     lang_code = (
2737                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2738                         or caption_track.get('languageCode'))
2739                     if not lang_code:
2740                         continue
2741                     process_language(
2742                         subtitles, base_url, lang_code,
2743                         try_get(caption_track, lambda x: x['name']['simpleText']),
2744                         {})
2745                     continue
2746                 automatic_captions = {}
2747                 for translation_language in (pctr.get('translationLanguages') or []):
2748                     translation_language_code = translation_language.get('languageCode')
2749                     if not translation_language_code:
2750                         continue
2751                     process_language(
2752                         automatic_captions, base_url, translation_language_code,
2753                         try_get(translation_language, (
2754                             lambda x: x['languageName']['simpleText'],
2755                             lambda x: x['languageName']['runs'][0]['text'])),
2756                         {'tlang': translation_language_code})
2757                 info['automatic_captions'] = automatic_captions
2758         info['subtitles'] = subtitles
2759
2760         parsed_url = compat_urllib_parse_urlparse(url)
2761         for component in [parsed_url.fragment, parsed_url.query]:
2762             query = compat_parse_qs(component)
2763             for k, v in query.items():
2764                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2765                     d_k += '_time'
2766                     if d_k not in info and k in s_ks:
2767                         info[d_k] = parse_duration(query[k][0])
2768
2769         # Youtube Music Auto-generated description
2770         if video_description:
2771             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2772             if mobj:
2773                 release_year = mobj.group('release_year')
2774                 release_date = mobj.group('release_date')
2775                 if release_date:
2776                     release_date = release_date.replace('-', '')
2777                     if not release_year:
2778                         release_year = release_date[:4]
2779                 info.update({
2780                     'album': mobj.group('album'.strip()),
2781                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2782                     'track': mobj.group('track').strip(),
2783                     'release_date': release_date,
2784                     'release_year': int_or_none(release_year),
2785                 })
2786
2787         initial_data = None
2788         if webpage:
2789             initial_data = self._extract_yt_initial_variable(
2790                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2791                 'yt initial data')
2792         if not initial_data:
2793             initial_data = self._extract_response(
2794                 item_id=video_id, ep='next', fatal=False,
2795                 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2796                 note='Downloading initial data API JSON')
2797
2798         try:
2799             # This will error if there is no livechat
2800             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2801             info['subtitles']['live_chat'] = [{
2802                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2803                 'video_id': video_id,
2804                 'ext': 'json',
2805                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2806             }]
2807         except (KeyError, IndexError, TypeError):
2808             pass
2809
2810         if initial_data:
2811             chapters = self._extract_chapters_from_json(
2812                 initial_data, video_id, duration)
2813             if not chapters:
2814                 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2815                     contents = try_get(
2816                         engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2817                         list)
2818                     if not contents:
2819                         continue
2820
2821                     def chapter_time(mmlir):
2822                         return parse_duration(
2823                             get_text(mmlir.get('timeDescription')))
2824
2825                     chapters = []
2826                     for next_num, content in enumerate(contents, start=1):
2827                         mmlir = content.get('macroMarkersListItemRenderer') or {}
2828                         start_time = chapter_time(mmlir)
2829                         end_time = chapter_time(try_get(
2830                             contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2831                             if next_num < len(contents) else duration
2832                         if start_time is None or end_time is None:
2833                             continue
2834                         chapters.append({
2835                             'start_time': start_time,
2836                             'end_time': end_time,
2837                             'title': get_text(mmlir.get('title')),
2838                         })
2839                     if chapters:
2840                         break
2841             if chapters:
2842                 info['chapters'] = chapters
2843
2844             contents = try_get(
2845                 initial_data,
2846                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2847                 list) or []
2848             for content in contents:
2849                 vpir = content.get('videoPrimaryInfoRenderer')
2850                 if vpir:
2851                     stl = vpir.get('superTitleLink')
2852                     if stl:
2853                         stl = get_text(stl)
2854                         if try_get(
2855                                 vpir,
2856                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2857                             info['location'] = stl
2858                         else:
2859                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2860                             if mobj:
2861                                 info.update({
2862                                     'series': mobj.group(1),
2863                                     'season_number': int(mobj.group(2)),
2864                                     'episode_number': int(mobj.group(3)),
2865                                 })
2866                     for tlb in (try_get(
2867                             vpir,
2868                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2869                             list) or []):
2870                         tbr = tlb.get('toggleButtonRenderer') or {}
2871                         for getter, regex in [(
2872                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2873                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2874                                     lambda x: x['accessibility'],
2875                                     lambda x: x['accessibilityData']['accessibilityData'],
2876                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2877                             label = (try_get(tbr, getter, dict) or {}).get('label')
2878                             if label:
2879                                 mobj = re.match(regex, label)
2880                                 if mobj:
2881                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2882                                     break
2883                     sbr_tooltip = try_get(
2884                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2885                     if sbr_tooltip:
2886                         like_count, dislike_count = sbr_tooltip.split(' / ')
2887                         info.update({
2888                             'like_count': str_to_int(like_count),
2889                             'dislike_count': str_to_int(dislike_count),
2890                         })
2891                 vsir = content.get('videoSecondaryInfoRenderer')
2892                 if vsir:
2893                     info['channel'] = get_text(try_get(
2894                         vsir,
2895                         lambda x: x['owner']['videoOwnerRenderer']['title'],
2896                         dict))
2897                     rows = try_get(
2898                         vsir,
2899                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2900                         list) or []
2901                     multiple_songs = False
2902                     for row in rows:
2903                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2904                             multiple_songs = True
2905                             break
2906                     for row in rows:
2907                         mrr = row.get('metadataRowRenderer') or {}
2908                         mrr_title = mrr.get('title')
2909                         if not mrr_title:
2910                             continue
2911                         mrr_title = get_text(mrr['title'])
2912                         mrr_contents_text = get_text(mrr['contents'][0])
2913                         if mrr_title == 'License':
2914                             info['license'] = mrr_contents_text
2915                         elif not multiple_songs:
2916                             if mrr_title == 'Album':
2917                                 info['album'] = mrr_contents_text
2918                             elif mrr_title == 'Artist':
2919                                 info['artist'] = mrr_contents_text
2920                             elif mrr_title == 'Song':
2921                                 info['track'] = mrr_contents_text
2922
2923         fallbacks = {
2924             'channel': 'uploader',
2925             'channel_id': 'uploader_id',
2926             'channel_url': 'uploader_url',
2927         }
2928         for to, frm in fallbacks.items():
2929             if not info.get(to):
2930                 info[to] = info.get(frm)
2931
2932         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2933             v = info.get(s_k)
2934             if v:
2935                 info[d_k] = v
2936
2937         is_private = bool_or_none(video_details.get('isPrivate'))
2938         is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2939         is_membersonly = None
2940         is_premium = None
2941         if initial_data and is_private is not None:
2942             is_membersonly = False
2943             is_premium = False
2944             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2945             for content in contents or []:
2946                 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2947                 for badge in badges or []:
2948                     label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2949                     if label.lower() == 'members only':
2950                         is_membersonly = True
2951                         break
2952                     elif label.lower() == 'premium':
2953                         is_premium = True
2954                         break
2955                 if is_membersonly or is_premium:
2956                     break
2957
2958         # TODO: Add this for playlists
2959         info['availability'] = self._availability(
2960             is_private=is_private,
2961             needs_premium=is_premium,
2962             needs_subscription=is_membersonly,
2963             needs_auth=info['age_limit'] >= 18,
2964             is_unlisted=None if is_private is None else is_unlisted)
2965
2966         # get xsrf for annotations or comments
2967         get_annotations = self.get_param('writeannotations', False)
2968         get_comments = self.get_param('getcomments', False)
2969         if get_annotations or get_comments:
2970             xsrf_token = None
2971             ytcfg = self._extract_ytcfg(video_id, webpage)
2972             if ytcfg:
2973                 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2974             if not xsrf_token:
2975                 xsrf_token = self._search_regex(
2976                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2977                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
2978
2979         # annotations
2980         if get_annotations:
2981             invideo_url = try_get(
2982                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2983             if xsrf_token and invideo_url:
2984                 xsrf_field_name = None
2985                 if ytcfg:
2986                     xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
2987                 if not xsrf_field_name:
2988                     xsrf_field_name = self._search_regex(
2989                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
2990                         webpage, 'xsrf field name',
2991                         group='xsrf_field_name', default='session_token')
2992                 info['annotations'] = self._download_webpage(
2993                     self._proto_relative_url(invideo_url),
2994                     video_id, note='Downloading annotations',
2995                     errnote='Unable to download video annotations', fatal=False,
2996                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
2997
2998         if get_comments:
2999             info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
3000
3001         self.mark_watched(video_id, player_response)
3002
3003         return info
3004
3005
3006 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3007     IE_DESC = 'YouTube.com tab'
3008     _VALID_URL = r'''(?x)
3009                     https?://
3010                         (?:\w+\.)?
3011                         (?:
3012                             youtube(?:kids)?\.com|
3013                             invidio\.us
3014                         )/
3015                         (?:
3016                             (?P<channel_type>channel|c|user|browse)/|
3017                             (?P<not_channel>
3018                                 feed/|hashtag/|
3019                                 (?:playlist|watch)\?.*?\blist=
3020                             )|
3021                             (?!(?:%s)\b)  # Direct URLs
3022                         )
3023                         (?P<id>[^/?\#&]+)
3024                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3025     IE_NAME = 'youtube:tab'
3026
3027     _TESTS = [{
3028         'note': 'playlists, multipage',
3029         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3030         'playlist_mincount': 94,
3031         'info_dict': {
3032             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3033             'title': 'Игорь Клейнер - Playlists',
3034             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3035             'uploader': 'Игорь Клейнер',
3036             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3037         },
3038     }, {
3039         'note': 'playlists, multipage, different order',
3040         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3041         'playlist_mincount': 94,
3042         'info_dict': {
3043             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3044             'title': 'Игорь Клейнер - Playlists',
3045             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3046             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3047             'uploader': 'Игорь Клейнер',
3048         },
3049     }, {
3050         'note': 'playlists, series',
3051         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3052         'playlist_mincount': 5,
3053         'info_dict': {
3054             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3055             'title': '3Blue1Brown - Playlists',
3056             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3057             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3058             'uploader': '3Blue1Brown',
3059         },
3060     }, {
3061         'note': 'playlists, singlepage',
3062         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3063         'playlist_mincount': 4,
3064         'info_dict': {
3065             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3066             'title': 'ThirstForScience - Playlists',
3067             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3068             'uploader': 'ThirstForScience',
3069             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3070         }
3071     }, {
3072         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3073         'only_matching': True,
3074     }, {
3075         'note': 'basic, single video playlist',
3076         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3077         'info_dict': {
3078             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3079             'uploader': 'Sergey M.',
3080             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3081             'title': 'youtube-dl public playlist',
3082         },
3083         'playlist_count': 1,
3084     }, {
3085         'note': 'empty playlist',
3086         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3087         'info_dict': {
3088             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3089             'uploader': 'Sergey M.',
3090             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3091             'title': 'youtube-dl empty playlist',
3092         },
3093         'playlist_count': 0,
3094     }, {
3095         'note': 'Home tab',
3096         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3097         'info_dict': {
3098             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3099             'title': 'lex will - Home',
3100             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3101             'uploader': 'lex will',
3102             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3103         },
3104         'playlist_mincount': 2,
3105     }, {
3106         'note': 'Videos tab',
3107         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3108         'info_dict': {
3109             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3110             'title': 'lex will - Videos',
3111             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3112             'uploader': 'lex will',
3113             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3114         },
3115         'playlist_mincount': 975,
3116     }, {
3117         'note': 'Videos tab, sorted by popular',
3118         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3119         'info_dict': {
3120             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3121             'title': 'lex will - Videos',
3122             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3123             'uploader': 'lex will',
3124             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3125         },
3126         'playlist_mincount': 199,
3127     }, {
3128         'note': 'Playlists tab',
3129         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3130         'info_dict': {
3131             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3132             'title': 'lex will - Playlists',
3133             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3134             'uploader': 'lex will',
3135             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3136         },
3137         'playlist_mincount': 17,
3138     }, {
3139         'note': 'Community tab',
3140         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3141         'info_dict': {
3142             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3143             'title': 'lex will - Community',
3144             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3145             'uploader': 'lex will',
3146             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3147         },
3148         'playlist_mincount': 18,
3149     }, {
3150         'note': 'Channels tab',
3151         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3152         'info_dict': {
3153             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3154             'title': 'lex will - Channels',
3155             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3156             'uploader': 'lex will',
3157             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3158         },
3159         'playlist_mincount': 12,
3160     }, {
3161         'note': 'Search tab',
3162         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3163         'playlist_mincount': 40,
3164         'info_dict': {
3165             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3166             'title': '3Blue1Brown - Search - linear algebra',
3167             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3168             'uploader': '3Blue1Brown',
3169             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3170         },
3171     }, {
3172         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3173         'only_matching': True,
3174     }, {
3175         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3176         'only_matching': True,
3177     }, {
3178         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3179         'only_matching': True,
3180     }, {
3181         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3182         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3183         'info_dict': {
3184             'title': '29C3: Not my department',
3185             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3186             'uploader': 'Christiaan008',
3187             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3188             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3189         },
3190         'playlist_count': 96,
3191     }, {
3192         'note': 'Large playlist',
3193         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3194         'info_dict': {
3195             'title': 'Uploads from Cauchemar',
3196             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3197             'uploader': 'Cauchemar',
3198             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3199         },
3200         'playlist_mincount': 1123,
3201     }, {
3202         'note': 'even larger playlist, 8832 videos',
3203         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3204         'only_matching': True,
3205     }, {
3206         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3207         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3208         'info_dict': {
3209             'title': 'Uploads from Interstellar Movie',
3210             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3211             'uploader': 'Interstellar Movie',
3212             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3213         },
3214         'playlist_mincount': 21,
3215     }, {
3216         'note': 'Playlist with "show unavailable videos" button',
3217         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3218         'info_dict': {
3219             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3220             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3221             'uploader': 'Phim Siêu Nhân Nhật Bản',
3222             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3223         },
3224         'playlist_mincount': 200,
3225     }, {
3226         'note': 'Playlist with unavailable videos in page 7',
3227         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3228         'info_dict': {
3229             'title': 'Uploads from BlankTV',
3230             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3231             'uploader': 'BlankTV',
3232             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3233         },
3234         'playlist_mincount': 1000,
3235     }, {
3236         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3237         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3238         'info_dict': {
3239             'title': 'Data Analysis with Dr Mike Pound',
3240             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3241             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3242             'uploader': 'Computerphile',
3243             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3244         },
3245         'playlist_mincount': 11,
3246     }, {
3247         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3248         'only_matching': True,
3249     }, {
3250         'note': 'Playlist URL that does not actually serve a playlist',
3251         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3252         'info_dict': {
3253             'id': 'FqZTN594JQw',
3254             'ext': 'webm',
3255             'title': "Smiley's People 01 detective, Adventure Series, Action",
3256             'uploader': 'STREEM',
3257             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3258             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3259             'upload_date': '20150526',
3260             'license': 'Standard YouTube License',
3261             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3262             'categories': ['People & Blogs'],
3263             'tags': list,
3264             'view_count': int,
3265             'like_count': int,
3266             'dislike_count': int,
3267         },
3268         'params': {
3269             'skip_download': True,
3270         },
3271         'skip': 'This video is not available.',
3272         'add_ie': [YoutubeIE.ie_key()],
3273     }, {
3274         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3275         'only_matching': True,
3276     }, {
3277         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3278         'only_matching': True,
3279     }, {
3280         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3281         'info_dict': {
3282             'id': 'X1whbWASnNQ',  # This will keep changing
3283             'ext': 'mp4',
3284             'title': compat_str,
3285             'uploader': 'Sky News',
3286             'uploader_id': 'skynews',
3287             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3288             'upload_date': r're:\d{8}',
3289             'description': compat_str,
3290             'categories': ['News & Politics'],
3291             'tags': list,
3292             'like_count': int,
3293             'dislike_count': int,
3294         },
3295         'params': {
3296             'skip_download': True,
3297         },
3298         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3299     }, {
3300         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3301         'info_dict': {
3302             'id': 'a48o2S1cPoo',
3303             'ext': 'mp4',
3304             'title': 'The Young Turks - Live Main Show',
3305             'uploader': 'The Young Turks',
3306             'uploader_id': 'TheYoungTurks',
3307             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3308             'upload_date': '20150715',
3309             'license': 'Standard YouTube License',
3310             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3311             'categories': ['News & Politics'],
3312             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3313             'like_count': int,
3314             'dislike_count': int,
3315         },
3316         'params': {
3317             'skip_download': True,
3318         },
3319         'only_matching': True,
3320     }, {
3321         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3322         'only_matching': True,
3323     }, {
3324         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3325         'only_matching': True,
3326     }, {
3327         'note': 'A channel that is not live. Should raise error',
3328         'url': 'https://www.youtube.com/user/numberphile/live',
3329         'only_matching': True,
3330     }, {
3331         'url': 'https://www.youtube.com/feed/trending',
3332         'only_matching': True,
3333     }, {
3334         'url': 'https://www.youtube.com/feed/library',
3335         'only_matching': True,
3336     }, {
3337         'url': 'https://www.youtube.com/feed/history',
3338         'only_matching': True,
3339     }, {
3340         'url': 'https://www.youtube.com/feed/subscriptions',
3341         'only_matching': True,
3342     }, {
3343         'url': 'https://www.youtube.com/feed/watch_later',
3344         'only_matching': True,
3345     }, {
3346         'note': 'Recommended - redirects to home page',
3347         'url': 'https://www.youtube.com/feed/recommended',
3348         'only_matching': True,
3349     }, {
3350         'note': 'inline playlist with not always working continuations',
3351         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3352         'only_matching': True,
3353     }, {
3354         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3355         'only_matching': True,
3356     }, {
3357         'url': 'https://www.youtube.com/course',
3358         'only_matching': True,
3359     }, {
3360         'url': 'https://www.youtube.com/zsecurity',
3361         'only_matching': True,
3362     }, {
3363         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3364         'only_matching': True,
3365     }, {
3366         'url': 'https://www.youtube.com/TheYoungTurks/live',
3367         'only_matching': True,
3368     }, {
3369         'url': 'https://www.youtube.com/hashtag/cctv9',
3370         'info_dict': {
3371             'id': 'cctv9',
3372             'title': '#cctv9',
3373         },
3374         'playlist_mincount': 350,
3375     }, {
3376         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3377         'only_matching': True,
3378     }, {
3379         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3380         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3381         'only_matching': True
3382     }, {
3383         'note': '/browse/ should redirect to /channel/',
3384         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3385         'only_matching': True
3386     }, {
3387         'note': 'VLPL, should redirect to playlist?list=PL...',
3388         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3389         'info_dict': {
3390             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3391             'uploader': 'NoCopyrightSounds',
3392             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3393             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3394             'title': 'NCS Releases',
3395         },
3396         'playlist_mincount': 166,
3397     }, {
3398         'note': 'Topic, should redirect to playlist?list=UU...',
3399         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3400         'info_dict': {
3401             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3402             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3403             'title': 'Uploads from Royalty Free Music - Topic',
3404             'uploader': 'Royalty Free Music - Topic',
3405         },
3406         'expected_warnings': [
3407             'A channel/user page was given',
3408             'The URL does not have a videos tab',
3409         ],
3410         'playlist_mincount': 101,
3411     }, {
3412         'note': 'Topic without a UU playlist',
3413         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3414         'info_dict': {
3415             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3416             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3417         },
3418         'expected_warnings': [
3419             'A channel/user page was given',
3420             'The URL does not have a videos tab',
3421             'Falling back to channel URL',
3422         ],
3423         'playlist_mincount': 9,
3424     }, {
3425         'note': 'Youtube music Album',
3426         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3427         'info_dict': {
3428             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3429             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3430         },
3431         'playlist_count': 50,
3432     }]
3433
3434     @classmethod
3435     def suitable(cls, url):
3436         return False if YoutubeIE.suitable(url) else super(
3437             YoutubeTabIE, cls).suitable(url)
3438
3439     def _extract_channel_id(self, webpage):
3440         channel_id = self._html_search_meta(
3441             'channelId', webpage, 'channel id', default=None)
3442         if channel_id:
3443             return channel_id
3444         channel_url = self._html_search_meta(
3445             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3446              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3447              'twitter:app:url:googleplay'), webpage, 'channel url')
3448         return self._search_regex(
3449             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3450             channel_url, 'channel id')
3451
3452     @staticmethod
3453     def _extract_basic_item_renderer(item):
3454         # Modified from _extract_grid_item_renderer
3455         known_basic_renderers = (
3456             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3457         )
3458         for key, renderer in item.items():
3459             if not isinstance(renderer, dict):
3460                 continue
3461             elif key in known_basic_renderers:
3462                 return renderer
3463             elif key.startswith('grid') and key.endswith('Renderer'):
3464                 return renderer
3465
3466     def _grid_entries(self, grid_renderer):
3467         for item in grid_renderer['items']:
3468             if not isinstance(item, dict):
3469                 continue
3470             renderer = self._extract_basic_item_renderer(item)
3471             if not isinstance(renderer, dict):
3472                 continue
3473             title = try_get(
3474                 renderer, (lambda x: x['title']['runs'][0]['text'],
3475                            lambda x: x['title']['simpleText']), compat_str)
3476             # playlist
3477             playlist_id = renderer.get('playlistId')
3478             if playlist_id:
3479                 yield self.url_result(
3480                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3481                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3482                     video_title=title)
3483                 continue
3484             # video
3485             video_id = renderer.get('videoId')
3486             if video_id:
3487                 yield self._extract_video(renderer)
3488                 continue
3489             # channel
3490             channel_id = renderer.get('channelId')
3491             if channel_id:
3492                 title = try_get(
3493                     renderer, lambda x: x['title']['simpleText'], compat_str)
3494                 yield self.url_result(
3495                     'https://www.youtube.com/channel/%s' % channel_id,
3496                     ie=YoutubeTabIE.ie_key(), video_title=title)
3497                 continue
3498             # generic endpoint URL support
3499             ep_url = urljoin('https://www.youtube.com/', try_get(
3500                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3501                 compat_str))
3502             if ep_url:
3503                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3504                     if ie.suitable(ep_url):
3505                         yield self.url_result(
3506                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3507                         break
3508
3509     def _shelf_entries_from_content(self, shelf_renderer):
3510         content = shelf_renderer.get('content')
3511         if not isinstance(content, dict):
3512             return
3513         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3514         if renderer:
3515             # TODO: add support for nested playlists so each shelf is processed
3516             # as separate playlist
3517             # TODO: this includes only first N items
3518             for entry in self._grid_entries(renderer):
3519                 yield entry
3520         renderer = content.get('horizontalListRenderer')
3521         if renderer:
3522             # TODO
3523             pass
3524
3525     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3526         ep = try_get(
3527             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3528             compat_str)
3529         shelf_url = urljoin('https://www.youtube.com', ep)
3530         if shelf_url:
3531             # Skipping links to another channels, note that checking for
3532             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3533             # will not work
3534             if skip_channels and '/channels?' in shelf_url:
3535                 return
3536             title = try_get(
3537                 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3538             yield self.url_result(shelf_url, video_title=title)
3539         # Shelf may not contain shelf URL, fallback to extraction from content
3540         for entry in self._shelf_entries_from_content(shelf_renderer):
3541             yield entry
3542
3543     def _playlist_entries(self, video_list_renderer):
3544         for content in video_list_renderer['contents']:
3545             if not isinstance(content, dict):
3546                 continue
3547             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3548             if not isinstance(renderer, dict):
3549                 continue
3550             video_id = renderer.get('videoId')
3551             if not video_id:
3552                 continue
3553             yield self._extract_video(renderer)
3554
3555     def _rich_entries(self, rich_grid_renderer):
3556         renderer = try_get(
3557             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3558         video_id = renderer.get('videoId')
3559         if not video_id:
3560             return
3561         yield self._extract_video(renderer)
3562
3563     def _video_entry(self, video_renderer):
3564         video_id = video_renderer.get('videoId')
3565         if video_id:
3566             return self._extract_video(video_renderer)
3567
3568     def _post_thread_entries(self, post_thread_renderer):
3569         post_renderer = try_get(
3570             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3571         if not post_renderer:
3572             return
3573         # video attachment
3574         video_renderer = try_get(
3575             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3576         video_id = video_renderer.get('videoId')
3577         if video_id:
3578             entry = self._extract_video(video_renderer)
3579             if entry:
3580                 yield entry
3581         # playlist attachment
3582         playlist_id = try_get(
3583             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3584         if playlist_id:
3585             yield self.url_result(
3586                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3587                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3588         # inline video links
3589         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3590         for run in runs:
3591             if not isinstance(run, dict):
3592                 continue
3593             ep_url = try_get(
3594                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3595             if not ep_url:
3596                 continue
3597             if not YoutubeIE.suitable(ep_url):
3598                 continue
3599             ep_video_id = YoutubeIE._match_id(ep_url)
3600             if video_id == ep_video_id:
3601                 continue
3602             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3603
3604     def _post_thread_continuation_entries(self, post_thread_continuation):
3605         contents = post_thread_continuation.get('contents')
3606         if not isinstance(contents, list):
3607             return
3608         for content in contents:
3609             renderer = content.get('backstagePostThreadRenderer')
3610             if not isinstance(renderer, dict):
3611                 continue
3612             for entry in self._post_thread_entries(renderer):
3613                 yield entry
3614
3615     r''' # unused
3616     def _rich_grid_entries(self, contents):
3617         for content in contents:
3618             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3619             if video_renderer:
3620                 entry = self._video_entry(video_renderer)
3621                 if entry:
3622                     yield entry
3623     '''
3624     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3625
3626         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3627             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3628             for content in contents:
3629                 if not isinstance(content, dict):
3630                     continue
3631                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3632                 if not is_renderer:
3633                     renderer = content.get('richItemRenderer')
3634                     if renderer:
3635                         for entry in self._rich_entries(renderer):
3636                             yield entry
3637                         continuation_list[0] = self._extract_continuation(parent_renderer)
3638                     continue
3639                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3640                 for isr_content in isr_contents:
3641                     if not isinstance(isr_content, dict):
3642                         continue
3643
3644                     known_renderers = {
3645                         'playlistVideoListRenderer': self._playlist_entries,
3646                         'gridRenderer': self._grid_entries,
3647                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3648                         'backstagePostThreadRenderer': self._post_thread_entries,
3649                         'videoRenderer': lambda x: [self._video_entry(x)],
3650                     }
3651                     for key, renderer in isr_content.items():
3652                         if key not in known_renderers:
3653                             continue
3654                         for entry in known_renderers[key](renderer):
3655                             if entry:
3656                                 yield entry
3657                         continuation_list[0] = self._extract_continuation(renderer)
3658                         break
3659
3660                 if not continuation_list[0]:
3661                     continuation_list[0] = self._extract_continuation(is_renderer)
3662
3663             if not continuation_list[0]:
3664                 continuation_list[0] = self._extract_continuation(parent_renderer)
3665
3666         continuation_list = [None]  # Python 2 doesnot support nonlocal
3667         tab_content = try_get(tab, lambda x: x['content'], dict)
3668         if not tab_content:
3669             return
3670         parent_renderer = (
3671             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3672             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3673         for entry in extract_entries(parent_renderer):
3674             yield entry
3675         continuation = continuation_list[0]
3676         context = self._extract_context(ytcfg)
3677         visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
3678
3679         for page_num in itertools.count(1):
3680             if not continuation:
3681                 break
3682             query = {
3683                 'continuation': continuation['continuation'],
3684                 'clickTracking': {'clickTrackingParams': continuation['itct']}
3685             }
3686             headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3687             response = self._extract_response(
3688                 item_id='%s page %s' % (item_id, page_num),
3689                 query=query, headers=headers, ytcfg=ytcfg,
3690                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3691
3692             if not response:
3693                 break
3694             visitor_data = try_get(
3695                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3696
3697             known_continuation_renderers = {
3698                 'playlistVideoListContinuation': self._playlist_entries,
3699                 'gridContinuation': self._grid_entries,
3700                 'itemSectionContinuation': self._post_thread_continuation_entries,
3701                 'sectionListContinuation': extract_entries,  # for feeds
3702             }
3703             continuation_contents = try_get(
3704                 response, lambda x: x['continuationContents'], dict) or {}
3705             continuation_renderer = None
3706             for key, value in continuation_contents.items():
3707                 if key not in known_continuation_renderers:
3708                     continue
3709                 continuation_renderer = value
3710                 continuation_list = [None]
3711                 for entry in known_continuation_renderers[key](continuation_renderer):
3712                     yield entry
3713                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3714                 break
3715             if continuation_renderer:
3716                 continue
3717
3718             known_renderers = {
3719                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3720                 'gridVideoRenderer': (self._grid_entries, 'items'),
3721                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3722                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3723                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3724                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3725             }
3726             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3727             continuation_items = try_get(
3728                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3729             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3730             video_items_renderer = None
3731             for key, value in continuation_item.items():
3732                 if key not in known_renderers:
3733                     continue
3734                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3735                 continuation_list = [None]
3736                 for entry in known_renderers[key][0](video_items_renderer):
3737                     yield entry
3738                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3739                 break
3740             if video_items_renderer:
3741                 continue
3742             break
3743
3744     @staticmethod
3745     def _extract_selected_tab(tabs):
3746         for tab in tabs:
3747             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3748             if renderer.get('selected') is True:
3749                 return renderer
3750         else:
3751             raise ExtractorError('Unable to find selected tab')
3752
3753     @staticmethod
3754     def _extract_uploader(data):
3755         uploader = {}
3756         sidebar_renderer = try_get(
3757             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3758         if sidebar_renderer:
3759             for item in sidebar_renderer:
3760                 if not isinstance(item, dict):
3761                     continue
3762                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3763                 if not isinstance(renderer, dict):
3764                     continue
3765                 owner = try_get(
3766                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3767                 if owner:
3768                     uploader['uploader'] = owner.get('text')
3769                     uploader['uploader_id'] = try_get(
3770                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3771                     uploader['uploader_url'] = urljoin(
3772                         'https://www.youtube.com/',
3773                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3774         return {k: v for k, v in uploader.items() if v is not None}
3775
3776     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3777         playlist_id = title = description = channel_url = channel_name = channel_id = None
3778         thumbnails_list = tags = []
3779
3780         selected_tab = self._extract_selected_tab(tabs)
3781         renderer = try_get(
3782             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3783         if renderer:
3784             channel_name = renderer.get('title')
3785             channel_url = renderer.get('channelUrl')
3786             channel_id = renderer.get('externalId')
3787         else:
3788             renderer = try_get(
3789                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3790
3791         if renderer:
3792             title = renderer.get('title')
3793             description = renderer.get('description', '')
3794             playlist_id = channel_id
3795             tags = renderer.get('keywords', '').split()
3796             thumbnails_list = (
3797                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3798                 or try_get(
3799                     data,
3800                     lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3801                     list)
3802                 or [])
3803
3804         thumbnails = []
3805         for t in thumbnails_list:
3806             if not isinstance(t, dict):
3807                 continue
3808             thumbnail_url = url_or_none(t.get('url'))
3809             if not thumbnail_url:
3810                 continue
3811             thumbnails.append({
3812                 'url': thumbnail_url,
3813                 'width': int_or_none(t.get('width')),
3814                 'height': int_or_none(t.get('height')),
3815             })
3816         if playlist_id is None:
3817             playlist_id = item_id
3818         if title is None:
3819             title = (
3820                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3821                 or playlist_id)
3822         title += format_field(selected_tab, 'title', ' - %s')
3823         title += format_field(selected_tab, 'expandedText', ' - %s')
3824
3825         metadata = {
3826             'playlist_id': playlist_id,
3827             'playlist_title': title,
3828             'playlist_description': description,
3829             'uploader': channel_name,
3830             'uploader_id': channel_id,
3831             'uploader_url': channel_url,
3832             'thumbnails': thumbnails,
3833             'tags': tags,
3834         }
3835         if not channel_id:
3836             metadata.update(self._extract_uploader(data))
3837         metadata.update({
3838             'channel': metadata['uploader'],
3839             'channel_id': metadata['uploader_id'],
3840             'channel_url': metadata['uploader_url']})
3841         return self.playlist_result(
3842             self._entries(
3843                 selected_tab, playlist_id,
3844                 self._extract_identity_token(webpage, item_id),
3845                 self._extract_account_syncid(data),
3846                 self._extract_ytcfg(item_id, webpage)),
3847             **metadata)
3848
3849     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3850         first_id = last_id = None
3851         ytcfg = self._extract_ytcfg(playlist_id, webpage)
3852         headers = self._generate_api_headers(
3853             ytcfg, account_syncid=self._extract_account_syncid(data),
3854             identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3855             visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3856         for page_num in itertools.count(1):
3857             videos = list(self._playlist_entries(playlist))
3858             if not videos:
3859                 return
3860             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3861             if start >= len(videos):
3862                 return
3863             for video in videos[start:]:
3864                 if video['id'] == first_id:
3865                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3866                     return
3867                 yield video
3868             first_id = first_id or videos[0]['id']
3869             last_id = videos[-1]['id']
3870             watch_endpoint = try_get(
3871                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3872             query = {
3873                 'playlistId': playlist_id,
3874                 'videoId': watch_endpoint.get('videoId') or last_id,
3875                 'index': watch_endpoint.get('index') or len(videos),
3876                 'params': watch_endpoint.get('params') or 'OAE%3D'
3877             }
3878             response = self._extract_response(
3879                 item_id='%s page %d' % (playlist_id, page_num),
3880                 query=query,
3881                 ep='next',
3882                 headers=headers,
3883                 check_get_keys='contents'
3884             )
3885             playlist = try_get(
3886                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3887
3888     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3889         title = playlist.get('title') or try_get(
3890             data, lambda x: x['titleText']['simpleText'], compat_str)
3891         playlist_id = playlist.get('playlistId') or item_id
3892
3893         # Delegating everything except mix playlists to regular tab-based playlist URL
3894         playlist_url = urljoin(url, try_get(
3895             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3896             compat_str))
3897         if playlist_url and playlist_url != url:
3898             return self.url_result(
3899                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3900                 video_title=title)
3901
3902         return self.playlist_result(
3903             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
3904             playlist_id=playlist_id, playlist_title=title)
3905
3906     def _reload_with_unavailable_videos(self, item_id, data, webpage):
3907         """
3908         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3909         """
3910         sidebar_renderer = try_get(
3911             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3912         if not sidebar_renderer:
3913             return
3914         browse_id = params = None
3915         for item in sidebar_renderer:
3916             if not isinstance(item, dict):
3917                 continue
3918             renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3919             menu_renderer = try_get(
3920                 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3921             for menu_item in menu_renderer:
3922                 if not isinstance(menu_item, dict):
3923                     continue
3924                 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3925                 text = try_get(
3926                     nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3927                 if not text or text.lower() != 'show unavailable videos':
3928                     continue
3929                 browse_endpoint = try_get(
3930                     nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3931                 browse_id = browse_endpoint.get('browseId')
3932                 params = browse_endpoint.get('params')
3933                 break
3934
3935             ytcfg = self._extract_ytcfg(item_id, webpage)
3936             headers = self._generate_api_headers(
3937                 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3938                 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3939                 visitor_data=try_get(
3940                     self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3941             query = {
3942                 'params': params or 'wgYCCAA=',
3943                 'browseId': browse_id or 'VL%s' % item_id
3944             }
3945             return self._extract_response(
3946                 item_id=item_id, headers=headers, query=query,
3947                 check_get_keys='contents', fatal=False,
3948                 note='Downloading API JSON with unavailable videos')
3949
3950     def _extract_webpage(self, url, item_id):
3951         retries = self.get_param('extractor_retries', 3)
3952         count = -1
3953         last_error = 'Incomplete yt initial data recieved'
3954         while count < retries:
3955             count += 1
3956             # Sometimes youtube returns a webpage with incomplete ytInitialData
3957             # See: https://github.com/yt-dlp/yt-dlp/issues/116
3958             if count:
3959                 self.report_warning('%s. Retrying ...' % last_error)
3960             webpage = self._download_webpage(
3961                 url, item_id,
3962                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
3963             data = self._extract_yt_initial_data(item_id, webpage)
3964             if data.get('contents') or data.get('currentVideoEndpoint'):
3965                 break
3966             # Extract alerts here only when there is error
3967             self._extract_and_report_alerts(data)
3968             if count >= retries:
3969                 raise ExtractorError(last_error)
3970         return webpage, data
3971
3972     @staticmethod
3973     def _smuggle_data(entries, data):
3974         for entry in entries:
3975             if data:
3976                 entry['url'] = smuggle_url(entry['url'], data)
3977             yield entry
3978
3979     def _real_extract(self, url):
3980         url, smuggled_data = unsmuggle_url(url, {})
3981         if self.is_music_url(url):
3982             smuggled_data['is_music_url'] = True
3983         info_dict = self.__real_extract(url, smuggled_data)
3984         if info_dict.get('entries'):
3985             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
3986         return info_dict
3987
3988     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
3989
3990     def __real_extract(self, url, smuggled_data):
3991         item_id = self._match_id(url)
3992         url = compat_urlparse.urlunparse(
3993             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
3994         compat_opts = self.get_param('compat_opts', [])
3995
3996         def get_mobj(url):
3997             mobj = self._url_re.match(url).groupdict()
3998             mobj.update((k, '') for k, v in mobj.items() if v is None)
3999             return mobj
4000
4001         mobj = get_mobj(url)
4002         # Youtube returns incomplete data if tabname is not lower case
4003         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4004
4005         if is_channel:
4006             if smuggled_data.get('is_music_url'):
4007                 if item_id[:2] == 'VL':
4008                     # Youtube music VL channels have an equivalent playlist
4009                     item_id = item_id[2:]
4010                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4011                 elif item_id[:2] == 'MP':
4012                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4013                     item_id = self._search_regex(
4014                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4015                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4016                         'playlist id')
4017                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4018                 elif mobj['channel_type'] == 'browse':
4019                     # Youtube music /browse/ should be changed to /channel/
4020                     pre = 'https://www.youtube.com/channel/%s' % item_id
4021         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4022             # Home URLs should redirect to /videos/
4023             self.report_warning(
4024                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4025                 'To download only the videos in the home page, add a "/featured" to the URL')
4026             tab = '/videos'
4027
4028         url = ''.join((pre, tab, post))
4029         mobj = get_mobj(url)
4030
4031         # Handle both video/playlist URLs
4032         qs = parse_qs(url)
4033         video_id = qs.get('v', [None])[0]
4034         playlist_id = qs.get('list', [None])[0]
4035
4036         if not video_id and mobj['not_channel'].startswith('watch'):
4037             if not playlist_id:
4038                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4039                 raise ExtractorError('Unable to recognize tab page')
4040             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4041             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4042             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4043             mobj = get_mobj(url)
4044
4045         if video_id and playlist_id:
4046             if self.get_param('noplaylist'):
4047                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4048                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4049             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4050
4051         webpage, data = self._extract_webpage(url, item_id)
4052
4053         tabs = try_get(
4054             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4055         if tabs:
4056             selected_tab = self._extract_selected_tab(tabs)
4057             tab_name = selected_tab.get('title', '')
4058             if 'no-youtube-channel-redirect' not in compat_opts:
4059                 if mobj['tab'] == '/live':
4060                     # Live tab should have redirected to the video
4061                     raise ExtractorError('The channel is not currently live', expected=True)
4062                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4063                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4064                         # Topic channels don't have /videos. Use the equivalent playlist instead
4065                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4066                         pl_id = 'UU%s' % item_id[2:]
4067                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4068                         try:
4069                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4070                             for alert_type, alert_message in self._extract_alerts(pl_data):
4071                                 if alert_type == 'error':
4072                                     raise ExtractorError('Youtube said: %s' % alert_message)
4073                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4074                         except ExtractorError:
4075                             self.report_warning('The playlist gave error. Falling back to channel URL')
4076                     else:
4077                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4078
4079         self.write_debug('Final URL: %s' % url)
4080
4081         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4082         if 'no-youtube-unavailable-videos' not in compat_opts:
4083             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4084         self._extract_and_report_alerts(data)
4085
4086         tabs = try_get(
4087             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4088         if tabs:
4089             return self._extract_from_tabs(item_id, webpage, data, tabs)
4090
4091         playlist = try_get(
4092             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4093         if playlist:
4094             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4095
4096         video_id = try_get(
4097             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4098             compat_str) or video_id
4099         if video_id:
4100             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4101                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4102             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4103
4104         raise ExtractorError('Unable to recognize tab page')
4105
4106
4107 class YoutubePlaylistIE(InfoExtractor):
4108     IE_DESC = 'YouTube.com playlists'
4109     _VALID_URL = r'''(?x)(?:
4110                         (?:https?://)?
4111                         (?:\w+\.)?
4112                         (?:
4113                             (?:
4114                                 youtube(?:kids)?\.com|
4115                                 invidio\.us
4116                             )
4117                             /.*?\?.*?\blist=
4118                         )?
4119                         (?P<id>%(playlist_id)s)
4120                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4121     IE_NAME = 'youtube:playlist'
4122     _TESTS = [{
4123         'note': 'issue #673',
4124         'url': 'PLBB231211A4F62143',
4125         'info_dict': {
4126             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4127             'id': 'PLBB231211A4F62143',
4128             'uploader': 'Wickydoo',
4129             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4130         },
4131         'playlist_mincount': 29,
4132     }, {
4133         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4134         'info_dict': {
4135             'title': 'YDL_safe_search',
4136             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4137         },
4138         'playlist_count': 2,
4139         'skip': 'This playlist is private',
4140     }, {
4141         'note': 'embedded',
4142         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4143         'playlist_count': 4,
4144         'info_dict': {
4145             'title': 'JODA15',
4146             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4147             'uploader': 'milan',
4148             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4149         }
4150     }, {
4151         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4152         'playlist_mincount': 982,
4153         'info_dict': {
4154             'title': '2018 Chinese New Singles (11/6 updated)',
4155             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4156             'uploader': 'LBK',
4157             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4158         }
4159     }, {
4160         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4161         'only_matching': True,
4162     }, {
4163         # music album playlist
4164         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4165         'only_matching': True,
4166     }]
4167
4168     @classmethod
4169     def suitable(cls, url):
4170         if YoutubeTabIE.suitable(url):
4171             return False
4172         # Hack for lazy extractors until more generic solution is implemented
4173         # (see #28780)
4174         from .youtube import parse_qs
4175         qs = parse_qs(url)
4176         if qs.get('v', [None])[0]:
4177             return False
4178         return super(YoutubePlaylistIE, cls).suitable(url)
4179
4180     def _real_extract(self, url):
4181         playlist_id = self._match_id(url)
4182         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4183         url = update_url_query(
4184             'https://www.youtube.com/playlist',
4185             parse_qs(url) or {'list': playlist_id})
4186         if is_music_url:
4187             url = smuggle_url(url, {'is_music_url': True})
4188         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4189
4190
4191 class YoutubeYtBeIE(InfoExtractor):
4192     IE_DESC = 'youtu.be'
4193     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4194     _TESTS = [{
4195         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4196         'info_dict': {
4197             'id': 'yeWKywCrFtk',
4198             'ext': 'mp4',
4199             'title': 'Small Scale Baler and Braiding Rugs',
4200             'uploader': 'Backus-Page House Museum',
4201             'uploader_id': 'backuspagemuseum',
4202             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4203             'upload_date': '20161008',
4204             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4205             'categories': ['Nonprofits & Activism'],
4206             'tags': list,
4207             'like_count': int,
4208             'dislike_count': int,
4209         },
4210         'params': {
4211             'noplaylist': True,
4212             'skip_download': True,
4213         },
4214     }, {
4215         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4216         'only_matching': True,
4217     }]
4218
4219     def _real_extract(self, url):
4220         mobj = re.match(self._VALID_URL, url)
4221         video_id = mobj.group('id')
4222         playlist_id = mobj.group('playlist_id')
4223         return self.url_result(
4224             update_url_query('https://www.youtube.com/watch', {
4225                 'v': video_id,
4226                 'list': playlist_id,
4227                 'feature': 'youtu.be',
4228             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4229
4230
4231 class YoutubeYtUserIE(InfoExtractor):
4232     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4233     _VALID_URL = r'ytuser:(?P<id>.+)'
4234     _TESTS = [{
4235         'url': 'ytuser:phihag',
4236         'only_matching': True,
4237     }]
4238
4239     def _real_extract(self, url):
4240         user_id = self._match_id(url)
4241         return self.url_result(
4242             'https://www.youtube.com/user/%s' % user_id,
4243             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4244
4245
4246 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4247     IE_NAME = 'youtube:favorites'
4248     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4249     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4250     _LOGIN_REQUIRED = True
4251     _TESTS = [{
4252         'url': ':ytfav',
4253         'only_matching': True,
4254     }, {
4255         'url': ':ytfavorites',
4256         'only_matching': True,
4257     }]
4258
4259     def _real_extract(self, url):
4260         return self.url_result(
4261             'https://www.youtube.com/playlist?list=LL',
4262             ie=YoutubeTabIE.ie_key())
4263
4264
4265 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4266     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4267     # there doesn't appear to be a real limit, for example if you search for
4268     # 'python' you get more than 8.000.000 results
4269     _MAX_RESULTS = float('inf')
4270     IE_NAME = 'youtube:search'
4271     _SEARCH_KEY = 'ytsearch'
4272     _SEARCH_PARAMS = None
4273     _TESTS = []
4274
4275     def _entries(self, query, n):
4276         data = {'query': query}
4277         if self._SEARCH_PARAMS:
4278             data['params'] = self._SEARCH_PARAMS
4279         total = 0
4280         for page_num in itertools.count(1):
4281             search = self._extract_response(
4282                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4283                 check_get_keys=('contents', 'onResponseReceivedCommands')
4284             )
4285             if not search:
4286                 break
4287             slr_contents = try_get(
4288                 search,
4289                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4290                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4291                 list)
4292             if not slr_contents:
4293                 break
4294
4295             # Youtube sometimes adds promoted content to searches,
4296             # changing the index location of videos and token.
4297             # So we search through all entries till we find them.
4298             continuation_token = None
4299             for slr_content in slr_contents:
4300                 if continuation_token is None:
4301                     continuation_token = try_get(
4302                         slr_content,
4303                         lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4304                         compat_str)
4305
4306                 isr_contents = try_get(
4307                     slr_content,
4308                     lambda x: x['itemSectionRenderer']['contents'],
4309                     list)
4310                 if not isr_contents:
4311                     continue
4312                 for content in isr_contents:
4313                     if not isinstance(content, dict):
4314                         continue
4315                     video = content.get('videoRenderer')
4316                     if not isinstance(video, dict):
4317                         continue
4318                     video_id = video.get('videoId')
4319                     if not video_id:
4320                         continue
4321
4322                     yield self._extract_video(video)
4323                     total += 1
4324                     if total == n:
4325                         return
4326
4327             if not continuation_token:
4328                 break
4329             data['continuation'] = continuation_token
4330
4331     def _get_n_results(self, query, n):
4332         """Get a specified number of results for a query"""
4333         return self.playlist_result(self._entries(query, n), query)
4334
4335
4336 class YoutubeSearchDateIE(YoutubeSearchIE):
4337     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4338     _SEARCH_KEY = 'ytsearchdate'
4339     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4340     _SEARCH_PARAMS = 'CAI%3D'
4341
4342
4343 class YoutubeSearchURLIE(YoutubeSearchIE):
4344     IE_DESC = 'YouTube.com search URLs'
4345     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4346     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4347     # _MAX_RESULTS = 100
4348     _TESTS = [{
4349         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4350         'playlist_mincount': 5,
4351         'info_dict': {
4352             'title': 'youtube-dl test video',
4353         }
4354     }, {
4355         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4356         'only_matching': True,
4357     }]
4358
4359     @classmethod
4360     def _make_valid_url(cls):
4361         return cls._VALID_URL
4362
4363     def _real_extract(self, url):
4364         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4365         query = (qs.get('search_query') or qs.get('q'))[0]
4366         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4367         return self._get_n_results(query, self._MAX_RESULTS)
4368
4369
4370 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4371     """
4372     Base class for feed extractors
4373     Subclasses must define the _FEED_NAME property.
4374     """
4375     _LOGIN_REQUIRED = True
4376     _TESTS = []
4377
4378     @property
4379     def IE_NAME(self):
4380         return 'youtube:%s' % self._FEED_NAME
4381
4382     def _real_extract(self, url):
4383         return self.url_result(
4384             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4385             ie=YoutubeTabIE.ie_key())
4386
4387
4388 class YoutubeWatchLaterIE(InfoExtractor):
4389     IE_NAME = 'youtube:watchlater'
4390     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4391     _VALID_URL = r':ytwatchlater'
4392     _TESTS = [{
4393         'url': ':ytwatchlater',
4394         'only_matching': True,
4395     }]
4396
4397     def _real_extract(self, url):
4398         return self.url_result(
4399             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4400
4401
4402 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4403     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4404     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4405     _FEED_NAME = 'recommended'
4406     _LOGIN_REQUIRED = False
4407     _TESTS = [{
4408         'url': ':ytrec',
4409         'only_matching': True,
4410     }, {
4411         'url': ':ytrecommended',
4412         'only_matching': True,
4413     }, {
4414         'url': 'https://youtube.com',
4415         'only_matching': True,
4416     }]
4417
4418
4419 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4420     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4421     _VALID_URL = r':ytsub(?:scription)?s?'
4422     _FEED_NAME = 'subscriptions'
4423     _TESTS = [{
4424         'url': ':ytsubs',
4425         'only_matching': True,
4426     }, {
4427         'url': ':ytsubscriptions',
4428         'only_matching': True,
4429     }]
4430
4431
4432 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4433     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4434     _VALID_URL = r':ythis(?:tory)?'
4435     _FEED_NAME = 'history'
4436     _TESTS = [{
4437         'url': ':ythistory',
4438         'only_matching': True,
4439     }]
4440
4441
4442 class YoutubeTruncatedURLIE(InfoExtractor):
4443     IE_NAME = 'youtube:truncated_url'
4444     IE_DESC = False  # Do not list
4445     _VALID_URL = r'''(?x)
4446         (?:https?://)?
4447         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4448         (?:watch\?(?:
4449             feature=[a-z_]+|
4450             annotation_id=annotation_[^&]+|
4451             x-yt-cl=[0-9]+|
4452             hl=[^&]*|
4453             t=[0-9]+
4454         )?
4455         |
4456             attribution_link\?a=[^&]+
4457         )
4458         $
4459     '''
4460
4461     _TESTS = [{
4462         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4463         'only_matching': True,
4464     }, {
4465         'url': 'https://www.youtube.com/watch?',
4466         'only_matching': True,
4467     }, {
4468         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4469         'only_matching': True,
4470     }, {
4471         'url': 'https://www.youtube.com/watch?feature=foo',
4472         'only_matching': True,
4473     }, {
4474         'url': 'https://www.youtube.com/watch?hl=en-GB',
4475         'only_matching': True,
4476     }, {
4477         'url': 'https://www.youtube.com/watch?t=2372',
4478         'only_matching': True,
4479     }]
4480
4481     def _real_extract(self, url):
4482         raise ExtractorError(
4483             'Did you forget to quote the URL? Remember that & is a meta '
4484             'character in most shells, so you want to put the URL in quotes, '
4485             'like  youtube-dl '
4486             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4487             ' or simply  youtube-dl BaW_jenozKc  .',
4488             expected=True)
4489
4490
4491 class YoutubeTruncatedIDIE(InfoExtractor):
4492     IE_NAME = 'youtube:truncated_id'
4493     IE_DESC = False  # Do not list
4494     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4495
4496     _TESTS = [{
4497         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4498         'only_matching': True,
4499     }]
4500
4501     def _real_extract(self, url):
4502         video_id = self._match_id(url)
4503         raise ExtractorError(
4504             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4505             expected=True)