yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import hashlib
   9 import itertools
  10 import json
  11 import os.path
  12 import random
  13 import re
  14 import time
  15 import traceback
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from ..compat import (
  19     compat_chr,
  20     compat_HTTPError,
  21     compat_parse_qs,
  22     compat_str,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27 )
  28 from ..jsinterp import JSInterpreter
  29 from ..utils import (
  30     bool_or_none,
  31     bytes_to_intlist,
  32     clean_html,
  33     dict_get,
  34     datetime_from_str,
  35     error_to_compat_str,
  36     ExtractorError,
  37     format_field,
  38     float_or_none,
  39     int_or_none,
  40     intlist_to_bytes,
  41     mimetype2ext,
  42     parse_codecs,
  43     parse_duration,
  44     qualities,
  45     remove_start,
  46     smuggle_url,
  47     str_or_none,
  48     str_to_int,
  49     try_get,
  50     unescapeHTML,
  51     unified_strdate,
  52     unsmuggle_url,
  53     update_url_query,
  54     url_or_none,
  55     urlencode_postdata,
  56     urljoin
  57 )
  58
  59
  60 def parse_qs(url):
  61     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  62
  63
  64 class YoutubeBaseInfoExtractor(InfoExtractor):
  65     """Provide base functions for Youtube extractors"""
  66     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  67     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  68
  69     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  70     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  71     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  72
  73     _RESERVED_NAMES = (
  74         r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
  75         r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
  76         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
  77
  78     _NETRC_MACHINE = 'youtube'
  79     # If True it will raise an error if no login info is provided
  80     _LOGIN_REQUIRED = False
  81
  82     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  83
  84     def _login(self):
  85         """
  86         Attempt to log in to YouTube.
  87         True is returned if successful or skipped.
  88         False is returned if login failed.
  89
  90         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  91         """
  92
  93         def warn(message):
  94             self.report_warning(message)
  95
  96         # username+password login is broken
  97         if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
  98             self.raise_login_required(
  99                 'Login details are needed to download this content', method='cookies')
 100         username, password = self._get_login_info()
 101         if username:
 102             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 103         return
 104
 105         # Everything below this is broken!
 106         r'''
 107         # No authentication to be performed
 108         if username is None:
 109             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 110                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 111             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 112             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 113             return True
 114
 115         login_page = self._download_webpage(
 116             self._LOGIN_URL, None,
 117             note='Downloading login page',
 118             errnote='unable to fetch login page', fatal=False)
 119         if login_page is False:
 120             return
 121
 122         login_form = self._hidden_inputs(login_page)
 123
 124         def req(url, f_req, note, errnote):
 125             data = login_form.copy()
 126             data.update({
 127                 'pstMsg': 1,
 128                 'checkConnection': 'youtube',
 129                 'checkedDomains': 'youtube',
 130                 'hl': 'en',
 131                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 132                 'f.req': json.dumps(f_req),
 133                 'flowName': 'GlifWebSignIn',
 134                 'flowEntry': 'ServiceLogin',
 135                 # TODO: reverse actual botguard identifier generation algo
 136                 'bgRequest': '["identifier",""]',
 137             })
 138             return self._download_json(
 139                 url, None, note=note, errnote=errnote,
 140                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 141                 fatal=False,
 142                 data=urlencode_postdata(data), headers={
 143                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 144                     'Google-Accounts-XSRF': 1,
 145                 })
 146
 147         lookup_req = [
 148             username,
 149             None, [], None, 'US', None, None, 2, False, True,
 150             [
 151                 None, None,
 152                 [2, 1, None, 1,
 153                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 154                  None, [], 4],
 155                 1, [None, None, []], None, None, None, True
 156             ],
 157             username,
 158         ]
 159
 160         lookup_results = req(
 161             self._LOOKUP_URL, lookup_req,
 162             'Looking up account info', 'Unable to look up account info')
 163
 164         if lookup_results is False:
 165             return False
 166
 167         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 168         if not user_hash:
 169             warn('Unable to extract user hash')
 170             return False
 171
 172         challenge_req = [
 173             user_hash,
 174             None, 1, None, [1, None, None, None, [password, None, True]],
 175             [
 176                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 177                 1, [None, None, []], None, None, None, True
 178             ]]
 179
 180         challenge_results = req(
 181             self._CHALLENGE_URL, challenge_req,
 182             'Logging in', 'Unable to log in')
 183
 184         if challenge_results is False:
 185             return
 186
 187         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 188         if login_res:
 189             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 190             warn(
 191                 'Unable to login: %s' % 'Invalid password'
 192                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 193             return False
 194
 195         res = try_get(challenge_results, lambda x: x[0][-1], list)
 196         if not res:
 197             warn('Unable to extract result entry')
 198             return False
 199
 200         login_challenge = try_get(res, lambda x: x[0][0], list)
 201         if login_challenge:
 202             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 203             if challenge_str == 'TWO_STEP_VERIFICATION':
 204                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 205                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 206                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 207                 if status == 'QUOTA_EXCEEDED':
 208                     warn('Exceeded the limit of TFA codes, try later')
 209                     return False
 210
 211                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 212                 if not tl:
 213                     warn('Unable to extract TL')
 214                     return False
 215
 216                 tfa_code = self._get_tfa_info('2-step verification code')
 217
 218                 if not tfa_code:
 219                     warn(
 220                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 221                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 222                     return False
 223
 224                 tfa_code = remove_start(tfa_code, 'G-')
 225
 226                 tfa_req = [
 227                     user_hash, None, 2, None,
 228                     [
 229                         9, None, None, None, None, None, None, None,
 230                         [None, tfa_code, True, 2]
 231                     ]]
 232
 233                 tfa_results = req(
 234                     self._TFA_URL.format(tl), tfa_req,
 235                     'Submitting TFA code', 'Unable to submit TFA code')
 236
 237                 if tfa_results is False:
 238                     return False
 239
 240                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 241                 if tfa_res:
 242                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 243                     warn(
 244                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 245                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 246                     return False
 247
 248                 check_cookie_url = try_get(
 249                     tfa_results, lambda x: x[0][-1][2], compat_str)
 250             else:
 251                 CHALLENGES = {
 252                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 253                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 254                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 255                 }
 256                 challenge = CHALLENGES.get(
 257                     challenge_str,
 258                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 259                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 260                 return False
 261         else:
 262             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 263
 264         if not check_cookie_url:
 265             warn('Unable to extract CheckCookie URL')
 266             return False
 267
 268         check_cookie_results = self._download_webpage(
 269             check_cookie_url, None, 'Checking cookie', fatal=False)
 270
 271         if check_cookie_results is False:
 272             return False
 273
 274         if 'https://myaccount.google.com/' not in check_cookie_results:
 275             warn('Unable to log in')
 276             return False
 277
 278         return True
 279         '''
 280
 281     def _initialize_consent(self):
 282         cookies = self._get_cookies('https://www.youtube.com/')
 283         if cookies.get('__Secure-3PSID'):
 284             return
 285         consent_id = None
 286         consent = cookies.get('CONSENT')
 287         if consent:
 288             if 'YES' in consent.value:
 289                 return
 290             consent_id = self._search_regex(
 291                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 292         if not consent_id:
 293             consent_id = random.randint(100, 999)
 294         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 295
 296     def _real_initialize(self):
 297         self._initialize_consent()
 298         if self._downloader is None:
 299             return
 300         if not self._login():
 301             return
 302
 303     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 304     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 305     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 306
 307     _YT_DEFAULT_YTCFGS = {
 308         'WEB': {
 309             'INNERTUBE_API_VERSION': 'v1',
 310             'INNERTUBE_CLIENT_NAME': 'WEB',
 311             'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
 312             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 313             'INNERTUBE_CONTEXT': {
 314                 'client': {
 315                     'clientName': 'WEB',
 316                     'clientVersion': '2.20210622.10.00',
 317                     'hl': 'en',
 318                 }
 319             },
 320             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
 321         },
 322         'WEB_REMIX': {
 323             'INNERTUBE_API_VERSION': 'v1',
 324             'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
 325             'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
 326             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 327             'INNERTUBE_CONTEXT': {
 328                 'client': {
 329                     'clientName': 'WEB_REMIX',
 330                     'clientVersion': '1.20210621.00.00',
 331                     'hl': 'en',
 332                 }
 333             },
 334             'INNERTUBE_CONTEXT_CLIENT_NAME': 67
 335         },
 336         'WEB_EMBEDDED_PLAYER': {
 337             'INNERTUBE_API_VERSION': 'v1',
 338             'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
 339             'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
 340             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 341             'INNERTUBE_CONTEXT': {
 342                 'client': {
 343                     'clientName': 'WEB_EMBEDDED_PLAYER',
 344                     'clientVersion': '1.20210620.0.1',
 345                     'hl': 'en',
 346                 }
 347             },
 348             'INNERTUBE_CONTEXT_CLIENT_NAME': 56
 349         },
 350         'ANDROID': {
 351             'INNERTUBE_API_VERSION': 'v1',
 352             'INNERTUBE_CLIENT_NAME': 'ANDROID',
 353             'INNERTUBE_CLIENT_VERSION': '16.20',
 354             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 355             'INNERTUBE_CONTEXT': {
 356                 'client': {
 357                     'clientName': 'ANDROID',
 358                     'clientVersion': '16.20',
 359                     'hl': 'en',
 360                 }
 361             },
 362             'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
 363         },
 364         'ANDROID_EMBEDDED_PLAYER': {
 365             'INNERTUBE_API_VERSION': 'v1',
 366             'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
 367             'INNERTUBE_CLIENT_VERSION': '16.20',
 368             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 369             'INNERTUBE_CONTEXT': {
 370                 'client': {
 371                     'clientName': 'ANDROID_EMBEDDED_PLAYER',
 372                     'clientVersion': '16.20',
 373                     'hl': 'en',
 374                 }
 375             },
 376             'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
 377         },
 378         'ANDROID_MUSIC': {
 379             'INNERTUBE_API_VERSION': 'v1',
 380             'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
 381             'INNERTUBE_CLIENT_VERSION': '4.32',
 382             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 383             'INNERTUBE_CONTEXT': {
 384                 'client': {
 385                     'clientName': 'ANDROID_MUSIC',
 386                     'clientVersion': '4.32',
 387                     'hl': 'en',
 388                 }
 389             },
 390             'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
 391         }
 392     }
 393
 394     _YT_DEFAULT_INNERTUBE_HOSTS = {
 395         'DIRECT': 'youtubei.googleapis.com',
 396         'WEB': 'www.youtube.com',
 397         'WEB_REMIX': 'music.youtube.com',
 398         'ANDROID_MUSIC': 'music.youtube.com'
 399     }
 400
 401     def _get_default_ytcfg(self, client='WEB'):
 402         if client in self._YT_DEFAULT_YTCFGS:
 403             return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
 404         self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
 405         return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
 406
 407     def _get_innertube_host(self, client='WEB'):
 408         return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
 409
 410     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
 411         # try_get but with fallback to default ytcfg client values when present
 412         _func = lambda y: try_get(y, getter, expected_type)
 413         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 414
 415     def _extract_client_name(self, ytcfg, default_client='WEB'):
 416         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
 417
 418     def _extract_client_version(self, ytcfg, default_client='WEB'):
 419         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
 420
 421     def _extract_api_key(self, ytcfg=None, default_client='WEB'):
 422         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 423
 424     def _extract_context(self, ytcfg=None, default_client='WEB'):
 425         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 426         context = _get_context(ytcfg)
 427         if context:
 428             return context
 429
 430         context = _get_context(self._get_default_ytcfg(default_client))
 431         if not ytcfg:
 432             return context
 433
 434         # Recreate the client context (required)
 435         context['client'].update({
 436             'clientVersion': self._extract_client_version(ytcfg, default_client),
 437             'clientName': self._extract_client_name(ytcfg, default_client),
 438         })
 439         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 440         if visitor_data:
 441             context['client']['visitorData'] = visitor_data
 442         return context
 443
 444     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 445         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 446         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 447         yt_cookies = self._get_cookies('https://www.youtube.com')
 448         sapisid_cookie = dict_get(
 449             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 450         if sapisid_cookie is None:
 451             return
 452         time_now = round(time.time())
 453         # SAPISID cookie is required if not already present
 454         if not yt_cookies.get('SAPISID'):
 455             self._set_cookie(
 456                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 457         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 458         sapisidhash = hashlib.sha1(
 459             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 460         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 461
 462     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 463                   note='Downloading API JSON', errnote='Unable to download API page',
 464                   context=None, api_key=None, api_hostname=None, default_client='WEB'):
 465
 466         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 467         data.update(query)
 468         real_headers = self._generate_api_headers(client=default_client)
 469         real_headers.update({'content-type': 'application/json'})
 470         if headers:
 471             real_headers.update(headers)
 472         return self._download_json(
 473             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 474             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 475             data=json.dumps(data).encode('utf8'), headers=real_headers,
 476             query={'key': api_key or self._extract_api_key()})
 477
 478     def _extract_yt_initial_data(self, video_id, webpage):
 479         return self._parse_json(
 480             self._search_regex(
 481                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 482                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 483             video_id)
 484
 485     def _extract_identity_token(self, webpage, item_id):
 486         ytcfg = self._extract_ytcfg(item_id, webpage)
 487         if ytcfg:
 488             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 489             if token:
 490                 return token
 491         return self._search_regex(
 492             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 493             'identity token', default=None)
 494
 495     @staticmethod
 496     def _extract_account_syncid(data):
 497         """
 498         Extract syncId required to download private playlists of secondary channels
 499         @param data Either response or ytcfg
 500         """
 501         sync_ids = (try_get(
 502             data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 503                    lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 504         if len(sync_ids) >= 2 and sync_ids[1]:
 505             # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 506             # and just "user_syncid||" for primary channel. We only want the channel_syncid
 507             return sync_ids[0]
 508         # ytcfg includes channel_syncid if on secondary channel
 509         return data.get('DELEGATED_SESSION_ID')
 510
 511     def _extract_ytcfg(self, video_id, webpage):
 512         if not webpage:
 513             return {}
 514         return self._parse_json(
 515             self._search_regex(
 516                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 517                 default='{}'), video_id, fatal=False) or {}
 518
 519     def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
 520                               visitor_data=None, api_hostname=None, client='WEB'):
 521         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
 522         headers = {
 523             'X-YouTube-Client-Name': compat_str(
 524                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
 525             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
 526             'Origin': origin
 527         }
 528         if not visitor_data and ytcfg:
 529             visitor_data = try_get(
 530                 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
 531         if identity_token:
 532             headers['X-Youtube-Identity-Token'] = identity_token
 533         if account_syncid:
 534             headers['X-Goog-PageId'] = account_syncid
 535             headers['X-Goog-AuthUser'] = 0
 536         if visitor_data:
 537             headers['X-Goog-Visitor-Id'] = visitor_data
 538         auth = self._generate_sapisidhash_header(origin)
 539         if auth is not None:
 540             headers['Authorization'] = auth
 541             headers['X-Origin'] = origin
 542         return headers
 543
 544     @staticmethod
 545     def _build_api_continuation_query(continuation, ctp=None):
 546         query = {
 547             'continuation': continuation
 548         }
 549         # TODO: Inconsistency with clickTrackingParams.
 550         # Currently we have a fixed ctp contained within context (from ytcfg)
 551         # and a ctp in root query for continuation.
 552         if ctp:
 553             query['clickTracking'] = {'clickTrackingParams': ctp}
 554         return query
 555
 556     @classmethod
 557     def _continuation_query_ajax_to_api(cls, continuation_query):
 558         continuation = dict_get(continuation_query, ('continuation', 'ctoken'))
 559         return cls._build_api_continuation_query(continuation, continuation_query.get('itct'))
 560
 561     @staticmethod
 562     def _build_continuation_query(continuation, ctp=None):
 563         query = {
 564             'ctoken': continuation,
 565             'continuation': continuation,
 566         }
 567         if ctp:
 568             query['itct'] = ctp
 569         return query
 570
 571     @classmethod
 572     def _extract_next_continuation_data(cls, renderer):
 573         next_continuation = try_get(
 574             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 575                        lambda x: x['continuation']['reloadContinuationData']), dict)
 576         if not next_continuation:
 577             return
 578         continuation = next_continuation.get('continuation')
 579         if not continuation:
 580             return
 581         ctp = next_continuation.get('clickTrackingParams')
 582         return cls._build_continuation_query(continuation, ctp)
 583
 584     @classmethod
 585     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 586         if isinstance(continuation_ep, dict):
 587             continuation = try_get(
 588                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 589             if not continuation:
 590                 return
 591             ctp = continuation_ep.get('clickTrackingParams')
 592             return cls._build_continuation_query(continuation, ctp)
 593
 594     @classmethod
 595     def _extract_continuation(cls, renderer):
 596         next_continuation = cls._extract_next_continuation_data(renderer)
 597         if next_continuation:
 598             return next_continuation
 599         contents = []
 600         for key in ('contents', 'items'):
 601             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 602         for content in contents:
 603             if not isinstance(content, dict):
 604                 continue
 605             continuation_ep = try_get(
 606                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 607                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 608                 dict)
 609             continuation = cls._extract_continuation_ep_data(continuation_ep)
 610             if continuation:
 611                 return continuation
 612
 613     @staticmethod
 614     def _extract_alerts(data):
 615         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 616             if not isinstance(alert_dict, dict):
 617                 continue
 618             for alert in alert_dict.values():
 619                 alert_type = alert.get('type')
 620                 if not alert_type:
 621                     continue
 622                 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
 623                 if message:
 624                     yield alert_type, message
 625                 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
 626                     message += try_get(run, lambda x: x['text'], compat_str)
 627                 if message:
 628                     yield alert_type, message
 629
 630     def _report_alerts(self, alerts, expected=True):
 631         errors = []
 632         warnings = []
 633         for alert_type, alert_message in alerts:
 634             if alert_type.lower() == 'error':
 635                 errors.append([alert_type, alert_message])
 636             else:
 637                 warnings.append([alert_type, alert_message])
 638
 639         for alert_type, alert_message in (warnings + errors[:-1]):
 640             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 641         if errors:
 642             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 643
 644     def _extract_and_report_alerts(self, data, *args, **kwargs):
 645         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 646
 647     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 648                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 649                           default_client='WEB'):
 650         response = None
 651         last_error = None
 652         count = -1
 653         retries = self.get_param('extractor_retries', 3)
 654         if check_get_keys is None:
 655             check_get_keys = []
 656         while count < retries:
 657             count += 1
 658             if last_error:
 659                 self.report_warning('%s. Retrying ...' % last_error)
 660             try:
 661                 response = self._call_api(
 662                     ep=ep, fatal=True, headers=headers,
 663                     video_id=item_id, query=query,
 664                     context=self._extract_context(ytcfg, default_client),
 665                     api_key=self._extract_api_key(ytcfg, default_client),
 666                     api_hostname=api_hostname, default_client=default_client,
 667                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 668             except ExtractorError as e:
 669                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
 670                     # Downloading page may result in intermittent 5xx HTTP error
 671                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 672                     last_error = 'HTTP Error %s' % e.cause.code
 673                     if count < retries:
 674                         continue
 675                 if fatal:
 676                     raise
 677                 else:
 678                     self.report_warning(error_to_compat_str(e))
 679                     return
 680
 681             else:
 682                 # Youtube may send alerts if there was an issue with the continuation page
 683                 try:
 684                     self._extract_and_report_alerts(response, expected=False)
 685                 except ExtractorError as e:
 686                     if fatal:
 687                         raise
 688                     self.report_warning(error_to_compat_str(e))
 689                     return
 690                 if not check_get_keys or dict_get(response, check_get_keys):
 691                     break
 692                 # Youtube sometimes sends incomplete data
 693                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 694                 last_error = 'Incomplete data received'
 695                 if count >= retries:
 696                     if fatal:
 697                         raise ExtractorError(last_error)
 698                     else:
 699                         self.report_warning(last_error)
 700                         return
 701         return response
 702
 703     @staticmethod
 704     def is_music_url(url):
 705         return re.match(r'https?://music\.youtube\.com/', url) is not None
 706
 707     def _extract_video(self, renderer):
 708         video_id = renderer.get('videoId')
 709         title = try_get(
 710             renderer,
 711             (lambda x: x['title']['runs'][0]['text'],
 712              lambda x: x['title']['simpleText']), compat_str)
 713         description = try_get(
 714             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
 715             compat_str)
 716         duration = parse_duration(try_get(
 717             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
 718         view_count_text = try_get(
 719             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
 720         view_count = str_to_int(self._search_regex(
 721             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 722             'view count', default=None))
 723         uploader = try_get(
 724             renderer,
 725             (lambda x: x['ownerText']['runs'][0]['text'],
 726              lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
 727         return {
 728             '_type': 'url',
 729             'ie_key': YoutubeIE.ie_key(),
 730             'id': video_id,
 731             'url': video_id,
 732             'title': title,
 733             'description': description,
 734             'duration': duration,
 735             'view_count': view_count,
 736             'uploader': uploader,
 737         }
 738
 739
 740 class YoutubeIE(YoutubeBaseInfoExtractor):
 741     IE_DESC = 'YouTube.com'
 742     _INVIDIOUS_SITES = (
 743         # invidious-redirect websites
 744         r'(?:www\.)?redirect\.invidious\.io',
 745         r'(?:(?:www|dev)\.)?invidio\.us',
 746         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 747         r'(?:www\.)?invidious\.pussthecat\.org',
 748         r'(?:www\.)?invidious\.zee\.li',
 749         r'(?:www\.)?invidious\.ethibox\.fr',
 750         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 751         # youtube-dl invidious instances list
 752         r'(?:(?:www|no)\.)?invidiou\.sh',
 753         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 754         r'(?:www\.)?invidious\.kabi\.tk',
 755         r'(?:www\.)?invidious\.mastodon\.host',
 756         r'(?:www\.)?invidious\.zapashcanon\.fr',
 757         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 758         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 759         r'(?:www\.)?invidious\.himiko\.cloud',
 760         r'(?:www\.)?invidious\.reallyancient\.tech',
 761         r'(?:www\.)?invidious\.tube',
 762         r'(?:www\.)?invidiou\.site',
 763         r'(?:www\.)?invidious\.site',
 764         r'(?:www\.)?invidious\.xyz',
 765         r'(?:www\.)?invidious\.nixnet\.xyz',
 766         r'(?:www\.)?invidious\.048596\.xyz',
 767         r'(?:www\.)?invidious\.drycat\.fr',
 768         r'(?:www\.)?inv\.skyn3t\.in',
 769         r'(?:www\.)?tube\.poal\.co',
 770         r'(?:www\.)?tube\.connect\.cafe',
 771         r'(?:www\.)?vid\.wxzm\.sx',
 772         r'(?:www\.)?vid\.mint\.lgbt',
 773         r'(?:www\.)?vid\.puffyan\.us',
 774         r'(?:www\.)?yewtu\.be',
 775         r'(?:www\.)?yt\.elukerio\.org',
 776         r'(?:www\.)?yt\.lelux\.fi',
 777         r'(?:www\.)?invidious\.ggc-project\.de',
 778         r'(?:www\.)?yt\.maisputain\.ovh',
 779         r'(?:www\.)?ytprivate\.com',
 780         r'(?:www\.)?invidious\.13ad\.de',
 781         r'(?:www\.)?invidious\.toot\.koeln',
 782         r'(?:www\.)?invidious\.fdn\.fr',
 783         r'(?:www\.)?watch\.nettohikari\.com',
 784         r'(?:www\.)?invidious\.namazso\.eu',
 785         r'(?:www\.)?invidious\.silkky\.cloud',
 786         r'(?:www\.)?invidious\.exonip\.de',
 787         r'(?:www\.)?invidious\.riverside\.rocks',
 788         r'(?:www\.)?invidious\.blamefran\.net',
 789         r'(?:www\.)?invidious\.moomoo\.de',
 790         r'(?:www\.)?ytb\.trom\.tf',
 791         r'(?:www\.)?yt\.cyberhost\.uk',
 792         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 793         r'(?:www\.)?qklhadlycap4cnod\.onion',
 794         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 795         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 796         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 797         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 798         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 799         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 800         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 801         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 802         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 803         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 804     )
 805     _VALID_URL = r"""(?x)^
 806                      (
 807                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 808                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 809                             (?:www\.)?deturl\.com/www\.youtube\.com|
 810                             (?:www\.)?pwnyoutube\.com|
 811                             (?:www\.)?hooktube\.com|
 812                             (?:www\.)?yourepeat\.com|
 813                             tube\.majestyc\.net|
 814                             %(invidious)s|
 815                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 816                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 817                          (?:                                                  # the various things that can precede the ID:
 818                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 819                              |(?:                                             # or the v= param in all its forms
 820                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 821                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 822                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 823                                  v=
 824                              )
 825                          ))
 826                          |(?:
 827                             youtu\.be|                                        # just youtu.be/xxxx
 828                             vid\.plus|                                        # or vid.plus/xxxx
 829                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 830                             %(invidious)s
 831                          )/
 832                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 833                          )
 834                      )?                                                       # all until now is optional -> you can pass the naked ID
 835                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 836                      (?(1).+)?                                                # if we found the ID, everything can follow
 837                      (?:\#|$)""" % {
 838         'invidious': '|'.join(_INVIDIOUS_SITES),
 839     }
 840     _PLAYER_INFO_RE = (
 841         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 842         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 843         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 844     )
 845     _formats = {
 846         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 847         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 848         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 849         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 850         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 851         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 852         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 853         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 854         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 855         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 856         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 857         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 858         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 859         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 860         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 861         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 862         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 863         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 864
 865
 866         # 3D videos
 867         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 868         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 869         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 870         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 871         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 872         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 873         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 874
 875         # Apple HTTP Live Streaming
 876         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 877         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 878         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 879         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 880         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 881         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 882         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 883         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 884
 885         # DASH mp4 video
 886         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 887         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 888         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 889         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 890         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 891         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 892         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 893         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 894         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 895         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 896         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 897         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 898
 899         # Dash mp4 audio
 900         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 901         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 902         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 903         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 904         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 905         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 906         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 907
 908         # Dash webm
 909         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 910         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 911         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 912         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 913         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 914         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 915         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 916         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 917         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 918         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 919         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 920         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 921         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 922         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 923         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 924         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 925         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 926         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 927         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 928         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 929         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 930         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 931
 932         # Dash webm audio
 933         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 934         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 935
 936         # Dash webm audio with opus inside
 937         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 938         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 939         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 940
 941         # RTMP (unnamed)
 942         '_rtmp': {'protocol': 'rtmp'},
 943
 944         # av01 video only formats sometimes served with "unknown" codecs
 945         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 946         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 947         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 948         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 949     }
 950     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 951
 952     _AGE_GATE_REASONS = (
 953         'Sign in to confirm your age',
 954         'This video may be inappropriate for some users.',
 955         'Sorry, this content is age-restricted.')
 956
 957     _GEO_BYPASS = False
 958
 959     IE_NAME = 'youtube'
 960     _TESTS = [
 961         {
 962             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 963             'info_dict': {
 964                 'id': 'BaW_jenozKc',
 965                 'ext': 'mp4',
 966                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 967                 'uploader': 'Philipp Hagemeister',
 968                 'uploader_id': 'phihag',
 969                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 970                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 971                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 972                 'upload_date': '20121002',
 973                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 974                 'categories': ['Science & Technology'],
 975                 'tags': ['youtube-dl'],
 976                 'duration': 10,
 977                 'view_count': int,
 978                 'like_count': int,
 979                 'dislike_count': int,
 980                 'start_time': 1,
 981                 'end_time': 9,
 982             }
 983         },
 984         {
 985             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 986             'note': 'Embed-only video (#1746)',
 987             'info_dict': {
 988                 'id': 'yZIXLfi8CZQ',
 989                 'ext': 'mp4',
 990                 'upload_date': '20120608',
 991                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 992                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 993                 'uploader': 'SET India',
 994                 'uploader_id': 'setindia',
 995                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 996                 'age_limit': 18,
 997             },
 998             'skip': 'Private video',
 999         },
1000         {
1001             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1002             'note': 'Use the first video ID in the URL',
1003             'info_dict': {
1004                 'id': 'BaW_jenozKc',
1005                 'ext': 'mp4',
1006                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1007                 'uploader': 'Philipp Hagemeister',
1008                 'uploader_id': 'phihag',
1009                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1010                 'upload_date': '20121002',
1011                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1012                 'categories': ['Science & Technology'],
1013                 'tags': ['youtube-dl'],
1014                 'duration': 10,
1015                 'view_count': int,
1016                 'like_count': int,
1017                 'dislike_count': int,
1018             },
1019             'params': {
1020                 'skip_download': True,
1021             },
1022         },
1023         {
1024             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1025             'note': '256k DASH audio (format 141) via DASH manifest',
1026             'info_dict': {
1027                 'id': 'a9LDPn-MO4I',
1028                 'ext': 'm4a',
1029                 'upload_date': '20121002',
1030                 'uploader_id': '8KVIDEO',
1031                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1032                 'description': '',
1033                 'uploader': '8KVIDEO',
1034                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1035             },
1036             'params': {
1037                 'youtube_include_dash_manifest': True,
1038                 'format': '141',
1039             },
1040             'skip': 'format 141 not served anymore',
1041         },
1042         # DASH manifest with encrypted signature
1043         {
1044             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1045             'info_dict': {
1046                 'id': 'IB3lcPjvWLA',
1047                 'ext': 'm4a',
1048                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1049                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1050                 'duration': 244,
1051                 'uploader': 'AfrojackVEVO',
1052                 'uploader_id': 'AfrojackVEVO',
1053                 'upload_date': '20131011',
1054                 'abr': 129.495,
1055             },
1056             'params': {
1057                 'youtube_include_dash_manifest': True,
1058                 'format': '141/bestaudio[ext=m4a]',
1059             },
1060         },
1061         # Controversy video
1062         {
1063             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1064             'info_dict': {
1065                 'id': 'T4XJQO3qol8',
1066                 'ext': 'mp4',
1067                 'duration': 219,
1068                 'upload_date': '20100909',
1069                 'uploader': 'Amazing Atheist',
1070                 'uploader_id': 'TheAmazingAtheist',
1071                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
1072                 'title': 'Burning Everyone\'s Koran',
1073                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
1074             }
1075         },
1076         # Normal age-gate video (embed allowed)
1077         {
1078             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1079             'info_dict': {
1080                 'id': 'HtVdAasjOgU',
1081                 'ext': 'mp4',
1082                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1083                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1084                 'duration': 142,
1085                 'uploader': 'The Witcher',
1086                 'uploader_id': 'WitcherGame',
1087                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1088                 'upload_date': '20140605',
1089                 'age_limit': 18,
1090             },
1091         },
1092         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1093         # YouTube Red ad is not captured for creator
1094         {
1095             'url': '__2ABJjxzNo',
1096             'info_dict': {
1097                 'id': '__2ABJjxzNo',
1098                 'ext': 'mp4',
1099                 'duration': 266,
1100                 'upload_date': '20100430',
1101                 'uploader_id': 'deadmau5',
1102                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1103                 'creator': 'deadmau5',
1104                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1105                 'uploader': 'deadmau5',
1106                 'title': 'Deadmau5 - Some Chords (HD)',
1107                 'alt_title': 'Some Chords',
1108             },
1109             'expected_warnings': [
1110                 'DASH manifest missing',
1111             ]
1112         },
1113         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1114         {
1115             'url': 'lqQg6PlCWgI',
1116             'info_dict': {
1117                 'id': 'lqQg6PlCWgI',
1118                 'ext': 'mp4',
1119                 'duration': 6085,
1120                 'upload_date': '20150827',
1121                 'uploader_id': 'olympic',
1122                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1123                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1124                 'uploader': 'Olympic',
1125                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1126             },
1127             'params': {
1128                 'skip_download': 'requires avconv',
1129             }
1130         },
1131         # Non-square pixels
1132         {
1133             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1134             'info_dict': {
1135                 'id': '_b-2C3KPAM0',
1136                 'ext': 'mp4',
1137                 'stretched_ratio': 16 / 9.,
1138                 'duration': 85,
1139                 'upload_date': '20110310',
1140                 'uploader_id': 'AllenMeow',
1141                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1142                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1143                 'uploader': '孫ᄋᄅ',
1144                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1145             },
1146         },
1147         # url_encoded_fmt_stream_map is empty string
1148         {
1149             'url': 'qEJwOuvDf7I',
1150             'info_dict': {
1151                 'id': 'qEJwOuvDf7I',
1152                 'ext': 'webm',
1153                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1154                 'description': '',
1155                 'upload_date': '20150404',
1156                 'uploader_id': 'spbelect',
1157                 'uploader': 'Наблюдатели Петербурга',
1158             },
1159             'params': {
1160                 'skip_download': 'requires avconv',
1161             },
1162             'skip': 'This live event has ended.',
1163         },
1164         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1165         {
1166             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1167             'info_dict': {
1168                 'id': 'FIl7x6_3R5Y',
1169                 'ext': 'webm',
1170                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1171                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1172                 'duration': 220,
1173                 'upload_date': '20150625',
1174                 'uploader_id': 'dorappi2000',
1175                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1176                 'uploader': 'dorappi2000',
1177                 'formats': 'mincount:31',
1178             },
1179             'skip': 'not actual anymore',
1180         },
1181         # DASH manifest with segment_list
1182         {
1183             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1184             'md5': '8ce563a1d667b599d21064e982ab9e31',
1185             'info_dict': {
1186                 'id': 'CsmdDsKjzN8',
1187                 'ext': 'mp4',
1188                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1189                 'uploader': 'Airtek',
1190                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1191                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1192                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1193             },
1194             'params': {
1195                 'youtube_include_dash_manifest': True,
1196                 'format': '135',  # bestvideo
1197             },
1198             'skip': 'This live event has ended.',
1199         },
1200         {
1201             # Multifeed videos (multiple cameras), URL is for Main Camera
1202             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1203             'info_dict': {
1204                 'id': 'jvGDaLqkpTg',
1205                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1206                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1207             },
1208             'playlist': [{
1209                 'info_dict': {
1210                     'id': 'jvGDaLqkpTg',
1211                     'ext': 'mp4',
1212                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1213                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1214                     'duration': 10643,
1215                     'upload_date': '20161111',
1216                     'uploader': 'Team PGP',
1217                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1218                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1219                 },
1220             }, {
1221                 'info_dict': {
1222                     'id': '3AKt1R1aDnw',
1223                     'ext': 'mp4',
1224                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1225                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1226                     'duration': 10991,
1227                     'upload_date': '20161111',
1228                     'uploader': 'Team PGP',
1229                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1230                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1231                 },
1232             }, {
1233                 'info_dict': {
1234                     'id': 'RtAMM00gpVc',
1235                     'ext': 'mp4',
1236                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1237                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1238                     'duration': 10995,
1239                     'upload_date': '20161111',
1240                     'uploader': 'Team PGP',
1241                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1242                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1243                 },
1244             }, {
1245                 'info_dict': {
1246                     'id': '6N2fdlP3C5U',
1247                     'ext': 'mp4',
1248                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1249                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1250                     'duration': 10990,
1251                     'upload_date': '20161111',
1252                     'uploader': 'Team PGP',
1253                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1254                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1255                 },
1256             }],
1257             'params': {
1258                 'skip_download': True,
1259             },
1260         },
1261         {
1262             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1263             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1264             'info_dict': {
1265                 'id': 'gVfLd0zydlo',
1266                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1267             },
1268             'playlist_count': 2,
1269             'skip': 'Not multifeed anymore',
1270         },
1271         {
1272             'url': 'https://vid.plus/FlRa-iH7PGw',
1273             'only_matching': True,
1274         },
1275         {
1276             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1277             'only_matching': True,
1278         },
1279         {
1280             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1281             # Also tests cut-off URL expansion in video description (see
1282             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1283             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1284             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1285             'info_dict': {
1286                 'id': 'lsguqyKfVQg',
1287                 'ext': 'mp4',
1288                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1289                 'alt_title': 'Dark Walk - Position Music',
1290                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1291                 'duration': 133,
1292                 'upload_date': '20151119',
1293                 'uploader_id': 'IronSoulElf',
1294                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1295                 'uploader': 'IronSoulElf',
1296                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
1297                 'track': 'Dark Walk - Position Music',
1298                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
1299                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1300             },
1301             'params': {
1302                 'skip_download': True,
1303             },
1304         },
1305         {
1306             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1307             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1308             'only_matching': True,
1309         },
1310         {
1311             # Video with yt:stretch=17:0
1312             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1313             'info_dict': {
1314                 'id': 'Q39EVAstoRM',
1315                 'ext': 'mp4',
1316                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1317                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1318                 'upload_date': '20151107',
1319                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1320                 'uploader': 'CH GAMER DROID',
1321             },
1322             'params': {
1323                 'skip_download': True,
1324             },
1325             'skip': 'This video does not exist.',
1326         },
1327         {
1328             # Video with incomplete 'yt:stretch=16:'
1329             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1330             'only_matching': True,
1331         },
1332         {
1333             # Video licensed under Creative Commons
1334             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1335             'info_dict': {
1336                 'id': 'M4gD1WSo5mA',
1337                 'ext': 'mp4',
1338                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1339                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1340                 'duration': 721,
1341                 'upload_date': '20150127',
1342                 'uploader_id': 'BerkmanCenter',
1343                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1344                 'uploader': 'The Berkman Klein Center for Internet & Society',
1345                 'license': 'Creative Commons Attribution license (reuse allowed)',
1346             },
1347             'params': {
1348                 'skip_download': True,
1349             },
1350         },
1351         {
1352             # Channel-like uploader_url
1353             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1354             'info_dict': {
1355                 'id': 'eQcmzGIKrzg',
1356                 'ext': 'mp4',
1357                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1358                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1359                 'duration': 4060,
1360                 'upload_date': '20151119',
1361                 'uploader': 'Bernie Sanders',
1362                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1363                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1364                 'license': 'Creative Commons Attribution license (reuse allowed)',
1365             },
1366             'params': {
1367                 'skip_download': True,
1368             },
1369         },
1370         {
1371             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1372             'only_matching': True,
1373         },
1374         {
1375             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1376             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1377             'only_matching': True,
1378         },
1379         {
1380             # Rental video preview
1381             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1382             'info_dict': {
1383                 'id': 'uGpuVWrhIzE',
1384                 'ext': 'mp4',
1385                 'title': 'Piku - Trailer',
1386                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1387                 'upload_date': '20150811',
1388                 'uploader': 'FlixMatrix',
1389                 'uploader_id': 'FlixMatrixKaravan',
1390                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1391                 'license': 'Standard YouTube License',
1392             },
1393             'params': {
1394                 'skip_download': True,
1395             },
1396             'skip': 'This video is not available.',
1397         },
1398         {
1399             # YouTube Red video with episode data
1400             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1401             'info_dict': {
1402                 'id': 'iqKdEhx-dD4',
1403                 'ext': 'mp4',
1404                 'title': 'Isolation - Mind Field (Ep 1)',
1405                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1406                 'duration': 2085,
1407                 'upload_date': '20170118',
1408                 'uploader': 'Vsauce',
1409                 'uploader_id': 'Vsauce',
1410                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1411                 'series': 'Mind Field',
1412                 'season_number': 1,
1413                 'episode_number': 1,
1414             },
1415             'params': {
1416                 'skip_download': True,
1417             },
1418             'expected_warnings': [
1419                 'Skipping DASH manifest',
1420             ],
1421         },
1422         {
1423             # The following content has been identified by the YouTube community
1424             # as inappropriate or offensive to some audiences.
1425             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1426             'info_dict': {
1427                 'id': '6SJNVb0GnPI',
1428                 'ext': 'mp4',
1429                 'title': 'Race Differences in Intelligence',
1430                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1431                 'duration': 965,
1432                 'upload_date': '20140124',
1433                 'uploader': 'New Century Foundation',
1434                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1435                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1436             },
1437             'params': {
1438                 'skip_download': True,
1439             },
1440             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1441         },
1442         {
1443             # itag 212
1444             'url': '1t24XAntNCY',
1445             'only_matching': True,
1446         },
1447         {
1448             # geo restricted to JP
1449             'url': 'sJL6WA-aGkQ',
1450             'only_matching': True,
1451         },
1452         {
1453             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1454             'only_matching': True,
1455         },
1456         {
1457             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1458             'only_matching': True,
1459         },
1460         {
1461             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1462             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1463             'only_matching': True,
1464         },
1465         {
1466             # DRM protected
1467             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1468             'only_matching': True,
1469         },
1470         {
1471             # Video with unsupported adaptive stream type formats
1472             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1473             'info_dict': {
1474                 'id': 'Z4Vy8R84T1U',
1475                 'ext': 'mp4',
1476                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1477                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1478                 'duration': 433,
1479                 'upload_date': '20130923',
1480                 'uploader': 'Amelia Putri Harwita',
1481                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1482                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1483                 'formats': 'maxcount:10',
1484             },
1485             'params': {
1486                 'skip_download': True,
1487                 'youtube_include_dash_manifest': False,
1488             },
1489             'skip': 'not actual anymore',
1490         },
1491         {
1492             # Youtube Music Auto-generated description
1493             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1494             'info_dict': {
1495                 'id': 'MgNrAu2pzNs',
1496                 'ext': 'mp4',
1497                 'title': 'Voyeur Girl',
1498                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1499                 'upload_date': '20190312',
1500                 'uploader': 'Stephen - Topic',
1501                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1502                 'artist': 'Stephen',
1503                 'track': 'Voyeur Girl',
1504                 'album': 'it\'s too much love to know my dear',
1505                 'release_date': '20190313',
1506                 'release_year': 2019,
1507             },
1508             'params': {
1509                 'skip_download': True,
1510             },
1511         },
1512         {
1513             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1514             'only_matching': True,
1515         },
1516         {
1517             # invalid -> valid video id redirection
1518             'url': 'DJztXj2GPfl',
1519             'info_dict': {
1520                 'id': 'DJztXj2GPfk',
1521                 'ext': 'mp4',
1522                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1523                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1524                 'upload_date': '20090125',
1525                 'uploader': 'Prochorowka',
1526                 'uploader_id': 'Prochorowka',
1527                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1528                 'artist': 'Panjabi MC',
1529                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1530                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1531             },
1532             'params': {
1533                 'skip_download': True,
1534             },
1535             'skip': 'Video unavailable',
1536         },
1537         {
1538             # empty description results in an empty string
1539             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1540             'info_dict': {
1541                 'id': 'x41yOUIvK2k',
1542                 'ext': 'mp4',
1543                 'title': 'IMG 3456',
1544                 'description': '',
1545                 'upload_date': '20170613',
1546                 'uploader_id': 'ElevageOrVert',
1547                 'uploader': 'ElevageOrVert',
1548             },
1549             'params': {
1550                 'skip_download': True,
1551             },
1552         },
1553         {
1554             # with '};' inside yt initial data (see [1])
1555             # see [2] for an example with '};' inside ytInitialPlayerResponse
1556             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1557             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1558             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1559             'info_dict': {
1560                 'id': 'CHqg6qOn4no',
1561                 'ext': 'mp4',
1562                 'title': 'Part 77   Sort a list of simple types in c#',
1563                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1564                 'upload_date': '20130831',
1565                 'uploader_id': 'kudvenkat',
1566                 'uploader': 'kudvenkat',
1567             },
1568             'params': {
1569                 'skip_download': True,
1570             },
1571         },
1572         {
1573             # another example of '};' in ytInitialData
1574             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1575             'only_matching': True,
1576         },
1577         {
1578             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1579             'only_matching': True,
1580         },
1581         {
1582             # https://github.com/ytdl-org/youtube-dl/pull/28094
1583             'url': 'OtqTfy26tG0',
1584             'info_dict': {
1585                 'id': 'OtqTfy26tG0',
1586                 'ext': 'mp4',
1587                 'title': 'Burn Out',
1588                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1589                 'upload_date': '20141120',
1590                 'uploader': 'The Cinematic Orchestra - Topic',
1591                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1592                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1593                 'artist': 'The Cinematic Orchestra',
1594                 'track': 'Burn Out',
1595                 'album': 'Every Day',
1596                 'release_data': None,
1597                 'release_year': None,
1598             },
1599             'params': {
1600                 'skip_download': True,
1601             },
1602         },
1603         {
1604             # controversial video, only works with bpctr when authenticated with cookies
1605             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1606             'only_matching': True,
1607         },
1608         {
1609             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1610             'url': 'cBvYw8_A0vQ',
1611             'info_dict': {
1612                 'id': 'cBvYw8_A0vQ',
1613                 'ext': 'mp4',
1614                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1615                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1616                 'upload_date': '20201120',
1617                 'uploader': 'Walk around Japan',
1618                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1619                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1620             },
1621             'params': {
1622                 'skip_download': True,
1623             },
1624         }, {
1625             # Has multiple audio streams
1626             'url': 'WaOKSUlf4TM',
1627             'only_matching': True
1628         }, {
1629             # Requires Premium: has format 141 when requested using YTM url
1630             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1631             'only_matching': True
1632         }, {
1633             # multiple subtitles with same lang_code
1634             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1635             'only_matching': True,
1636         }, {
1637             # Force use android client fallback
1638             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1639             'info_dict': {
1640                 'id': 'YOelRv7fMxY',
1641                 'title': 'Digging a Secret Tunnel from my Workshop',
1642                 'ext': '3gp',
1643                 'upload_date': '20210624',
1644                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1645                 'uploader': 'colinfurze',
1646                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1647                 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1648             },
1649             'params': {
1650                 'format': '17',  # 3gp format available on android
1651                 'extractor_args': {'youtube': {'player_client': ['android']}},
1652             },
1653         },
1654         {
1655             # Skip download of additional client configs (remix client config in this case)
1656             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1657             'only_matching': True,
1658             'params': {
1659                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1660             },
1661         }
1662     ]
1663
1664     @classmethod
1665     def suitable(cls, url):
1666         # Hack for lazy extractors until more generic solution is implemented
1667         # (see #28780)
1668         from .youtube import parse_qs
1669         qs = parse_qs(url)
1670         if qs.get('list', [None])[0]:
1671             return False
1672         return super(YoutubeIE, cls).suitable(url)
1673
1674     def __init__(self, *args, **kwargs):
1675         super(YoutubeIE, self).__init__(*args, **kwargs)
1676         self._code_cache = {}
1677         self._player_cache = {}
1678
1679     def _extract_player_url(self, ytcfg=None, webpage=None):
1680         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1681         if not player_url:
1682             player_url = self._search_regex(
1683                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1684                 webpage, 'player URL', fatal=False)
1685         if player_url.startswith('//'):
1686             player_url = 'https:' + player_url
1687         elif not re.match(r'https?://', player_url):
1688             player_url = compat_urlparse.urljoin(
1689                 'https://www.youtube.com', player_url)
1690         return player_url
1691
1692     def _signature_cache_id(self, example_sig):
1693         """ Return a string representation of a signature """
1694         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1695
1696     @classmethod
1697     def _extract_player_info(cls, player_url):
1698         for player_re in cls._PLAYER_INFO_RE:
1699             id_m = re.search(player_re, player_url)
1700             if id_m:
1701                 break
1702         else:
1703             raise ExtractorError('Cannot identify player %r' % player_url)
1704         return id_m.group('id')
1705
1706     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1707         player_id = self._extract_player_info(player_url)
1708         if player_id not in self._code_cache:
1709             self._code_cache[player_id] = self._download_webpage(
1710                 player_url, video_id, fatal=fatal,
1711                 note='Downloading player ' + player_id,
1712                 errnote='Download of %s failed' % player_url)
1713         return player_id in self._code_cache
1714
1715     def _extract_signature_function(self, video_id, player_url, example_sig):
1716         player_id = self._extract_player_info(player_url)
1717
1718         # Read from filesystem cache
1719         func_id = 'js_%s_%s' % (
1720             player_id, self._signature_cache_id(example_sig))
1721         assert os.path.basename(func_id) == func_id
1722
1723         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1724         if cache_spec is not None:
1725             return lambda s: ''.join(s[i] for i in cache_spec)
1726
1727         if self._load_player(video_id, player_url):
1728             code = self._code_cache[player_id]
1729             res = self._parse_sig_js(code)
1730
1731             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1732             cache_res = res(test_string)
1733             cache_spec = [ord(c) for c in cache_res]
1734
1735             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1736             return res
1737
1738     def _print_sig_code(self, func, example_sig):
1739         def gen_sig_code(idxs):
1740             def _genslice(start, end, step):
1741                 starts = '' if start == 0 else str(start)
1742                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1743                 steps = '' if step == 1 else (':%d' % step)
1744                 return 's[%s%s%s]' % (starts, ends, steps)
1745
1746             step = None
1747             # Quelch pyflakes warnings - start will be set when step is set
1748             start = '(Never used)'
1749             for i, prev in zip(idxs[1:], idxs[:-1]):
1750                 if step is not None:
1751                     if i - prev == step:
1752                         continue
1753                     yield _genslice(start, prev, step)
1754                     step = None
1755                     continue
1756                 if i - prev in [-1, 1]:
1757                     step = i - prev
1758                     start = prev
1759                     continue
1760                 else:
1761                     yield 's[%d]' % prev
1762             if step is None:
1763                 yield 's[%d]' % i
1764             else:
1765                 yield _genslice(start, i, step)
1766
1767         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1768         cache_res = func(test_string)
1769         cache_spec = [ord(c) for c in cache_res]
1770         expr_code = ' + '.join(gen_sig_code(cache_spec))
1771         signature_id_tuple = '(%s)' % (
1772             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1773         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1774                 '    return %s\n') % (signature_id_tuple, expr_code)
1775         self.to_screen('Extracted signature function:\n' + code)
1776
1777     def _parse_sig_js(self, jscode):
1778         funcname = self._search_regex(
1779             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1780              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1781              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1782              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1783              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1784              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1785              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1786              # Obsolete patterns
1787              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1788              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1789              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1790              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1791              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1792              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1793              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1794              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1795             jscode, 'Initial JS player signature function name', group='sig')
1796
1797         jsi = JSInterpreter(jscode)
1798         initial_function = jsi.extract_function(funcname)
1799         return lambda s: initial_function([s])
1800
1801     def _decrypt_signature(self, s, video_id, player_url):
1802         """Turn the encrypted s field into a working signature"""
1803
1804         if player_url is None:
1805             raise ExtractorError('Cannot decrypt signature without player_url')
1806
1807         try:
1808             player_id = (player_url, self._signature_cache_id(s))
1809             if player_id not in self._player_cache:
1810                 func = self._extract_signature_function(
1811                     video_id, player_url, s
1812                 )
1813                 self._player_cache[player_id] = func
1814             func = self._player_cache[player_id]
1815             if self.get_param('youtube_print_sig_code'):
1816                 self._print_sig_code(func, s)
1817             return func(s)
1818         except Exception as e:
1819             tb = traceback.format_exc()
1820             raise ExtractorError(
1821                 'Signature extraction failed: ' + tb, cause=e)
1822
1823     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1824         """
1825         Extract signatureTimestamp (sts)
1826         Required to tell API what sig/player version is in use.
1827         """
1828         sts = None
1829         if isinstance(ytcfg, dict):
1830             sts = int_or_none(ytcfg.get('STS'))
1831
1832         if not sts:
1833             # Attempt to extract from player
1834             if player_url is None:
1835                 error_msg = 'Cannot extract signature timestamp without player_url.'
1836                 if fatal:
1837                     raise ExtractorError(error_msg)
1838                 self.report_warning(error_msg)
1839                 return
1840             if self._load_player(video_id, player_url, fatal=fatal):
1841                 player_id = self._extract_player_info(player_url)
1842                 code = self._code_cache[player_id]
1843                 sts = int_or_none(self._search_regex(
1844                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1845                     'JS player signature timestamp', group='sts', fatal=fatal))
1846         return sts
1847
1848     def _mark_watched(self, video_id, player_response):
1849         playback_url = url_or_none(try_get(
1850             player_response,
1851             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
1852         if not playback_url:
1853             return
1854         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1855         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1856
1857         # cpn generation algorithm is reverse engineered from base.js.
1858         # In fact it works even with dummy cpn.
1859         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1860         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1861
1862         qs.update({
1863             'ver': ['2'],
1864             'cpn': [cpn],
1865         })
1866         playback_url = compat_urlparse.urlunparse(
1867             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1868
1869         self._download_webpage(
1870             playback_url, video_id, 'Marking watched',
1871             'Unable to mark watched', fatal=False)
1872
1873     @staticmethod
1874     def _extract_urls(webpage):
1875         # Embedded YouTube player
1876         entries = [
1877             unescapeHTML(mobj.group('url'))
1878             for mobj in re.finditer(r'''(?x)
1879             (?:
1880                 <iframe[^>]+?src=|
1881                 data-video-url=|
1882                 <embed[^>]+?src=|
1883                 embedSWF\(?:\s*|
1884                 <object[^>]+data=|
1885                 new\s+SWFObject\(
1886             )
1887             (["\'])
1888                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1889                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1890             \1''', webpage)]
1891
1892         # lazyYT YouTube embed
1893         entries.extend(list(map(
1894             unescapeHTML,
1895             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1896
1897         # Wordpress "YouTube Video Importer" plugin
1898         matches = re.findall(r'''(?x)<div[^>]+
1899             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1900             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1901         entries.extend(m[-1] for m in matches)
1902
1903         return entries
1904
1905     @staticmethod
1906     def _extract_url(webpage):
1907         urls = YoutubeIE._extract_urls(webpage)
1908         return urls[0] if urls else None
1909
1910     @classmethod
1911     def extract_id(cls, url):
1912         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1913         if mobj is None:
1914             raise ExtractorError('Invalid URL: %s' % url)
1915         video_id = mobj.group(2)
1916         return video_id
1917
1918     def _extract_chapters_from_json(self, data, video_id, duration):
1919         chapters_list = try_get(
1920             data,
1921             lambda x: x['playerOverlays']
1922                        ['playerOverlayRenderer']
1923                        ['decoratedPlayerBarRenderer']
1924                        ['decoratedPlayerBarRenderer']
1925                        ['playerBar']
1926                        ['chapteredPlayerBarRenderer']
1927                        ['chapters'],
1928             list)
1929         if not chapters_list:
1930             return
1931
1932         def chapter_time(chapter):
1933             return float_or_none(
1934                 try_get(
1935                     chapter,
1936                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1937                     int),
1938                 scale=1000)
1939         chapters = []
1940         for next_num, chapter in enumerate(chapters_list, start=1):
1941             start_time = chapter_time(chapter)
1942             if start_time is None:
1943                 continue
1944             end_time = (chapter_time(chapters_list[next_num])
1945                         if next_num < len(chapters_list) else duration)
1946             if end_time is None:
1947                 continue
1948             title = try_get(
1949                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1950                 compat_str)
1951             chapters.append({
1952                 'start_time': start_time,
1953                 'end_time': end_time,
1954                 'title': title,
1955             })
1956         return chapters
1957
1958     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1959         return self._parse_json(self._search_regex(
1960             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1961              regex), webpage, name, default='{}'), video_id, fatal=False)
1962
1963     @staticmethod
1964     def parse_time_text(time_text):
1965         """
1966         Parse the comment time text
1967         time_text is in the format 'X units ago (edited)'
1968         """
1969         time_text_split = time_text.split(' ')
1970         if len(time_text_split) >= 3:
1971             return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1972
1973     @staticmethod
1974     def _join_text_entries(runs):
1975         text = None
1976         for run in runs:
1977             if not isinstance(run, dict):
1978                 continue
1979             sub_text = try_get(run, lambda x: x['text'], compat_str)
1980             if sub_text:
1981                 if not text:
1982                     text = sub_text
1983                     continue
1984                 text += sub_text
1985         return text
1986
1987     def _extract_comment(self, comment_renderer, parent=None):
1988         comment_id = comment_renderer.get('commentId')
1989         if not comment_id:
1990             return
1991         comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1992         text = self._join_text_entries(comment_text_runs) or ''
1993         comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1994         time_text = self._join_text_entries(comment_time_text)
1995         timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
1996         author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1997         author_id = try_get(comment_renderer,
1998                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
1999         votes = str_to_int(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2000                                                       lambda x: x['likeCount']), compat_str)) or 0
2001         author_thumbnail = try_get(comment_renderer,
2002                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2003
2004         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2005         is_favorited = 'creatorHeart' in (try_get(
2006             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2007         return {
2008             'id': comment_id,
2009             'text': text,
2010             'timestamp': timestamp,
2011             'time_text': time_text,
2012             'like_count': votes,
2013             'is_favorited': is_favorited,
2014             'author': author,
2015             'author_id': author_id,
2016             'author_thumbnail': author_thumbnail,
2017             'author_is_uploader': author_is_uploader,
2018             'parent': parent or 'root'
2019         }
2020
2021     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2022                          ytcfg, video_id, parent=None, comment_counts=None):
2023
2024         def extract_header(contents):
2025             _total_comments = 0
2026             _continuation = None
2027             for content in contents:
2028                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2029                 expected_comment_count = try_get(comments_header_renderer,
2030                                                  (lambda x: x['countText']['runs'][0]['text'],
2031                                                   lambda x: x['commentsCount']['runs'][0]['text']),
2032                                                  compat_str)
2033                 if expected_comment_count:
2034                     comment_counts[1] = str_to_int(expected_comment_count)
2035                     self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
2036                     _total_comments = comment_counts[1]
2037                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2038                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2039
2040                 sort_menu_item = try_get(
2041                     comments_header_renderer,
2042                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2043                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2044
2045                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2046                 if not _continuation:
2047                     continue
2048
2049                 sort_text = sort_menu_item.get('title')
2050                 if isinstance(sort_text, compat_str):
2051                     sort_text = sort_text.lower()
2052                 else:
2053                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2054                 self.to_screen('Sorting comments by %s' % sort_text)
2055                 break
2056             return _total_comments, _continuation
2057
2058         def extract_thread(contents):
2059             if not parent:
2060                 comment_counts[2] = 0
2061             for content in contents:
2062                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2063                 comment_renderer = try_get(
2064                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2065                     content, (lambda x: x['commentRenderer'], dict))
2066
2067                 if not comment_renderer:
2068                     continue
2069                 comment = self._extract_comment(comment_renderer, parent)
2070                 if not comment:
2071                     continue
2072                 comment_counts[0] += 1
2073                 yield comment
2074                 # Attempt to get the replies
2075                 comment_replies_renderer = try_get(
2076                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2077
2078                 if comment_replies_renderer:
2079                     comment_counts[2] += 1
2080                     comment_entries_iter = self._comment_entries(
2081                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2082                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2083
2084                     for reply_comment in comment_entries_iter:
2085                         yield reply_comment
2086
2087         # YouTube comments have a max depth of 2
2088         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2089         if max_depth == 1 and parent:
2090             return
2091         if not comment_counts:
2092             # comment so far, est. total comments, current comment thread #
2093             comment_counts = [0, 0, 0]
2094
2095         continuation = self._extract_continuation(root_continuation_data)
2096         if continuation and len(continuation['ctoken']) < 27:
2097             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2098             continuation_token = self._generate_comment_continuation(video_id)
2099             continuation = self._build_continuation_query(continuation_token, None)
2100
2101         visitor_data = None
2102         is_first_continuation = parent is None
2103
2104         for page_num in itertools.count(0):
2105             if not continuation:
2106                 break
2107             headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2108             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2109             if page_num == 0:
2110                 if is_first_continuation:
2111                     note_prefix = 'Downloading comment section API JSON'
2112                 else:
2113                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2114                         comment_counts[2], comment_prog_str)
2115             else:
2116                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2117                     '       ' if parent else '', ' replies' if parent else '',
2118                     page_num, comment_prog_str)
2119
2120             response = self._extract_response(
2121                 item_id=None, query=self._continuation_query_ajax_to_api(continuation),
2122                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2123                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2124             if not response:
2125                 break
2126             visitor_data = try_get(
2127                 response,
2128                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2129                 compat_str) or visitor_data
2130
2131             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2132
2133             continuation = None
2134             if isinstance(continuation_contents, list):
2135                 for continuation_section in continuation_contents:
2136                     if not isinstance(continuation_section, dict):
2137                         continue
2138                     continuation_items = try_get(
2139                         continuation_section,
2140                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2141                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2142                         list) or []
2143                     if is_first_continuation:
2144                         total_comments, continuation = extract_header(continuation_items)
2145                         if total_comments:
2146                             yield total_comments
2147                         is_first_continuation = False
2148                         if continuation:
2149                             break
2150                         continue
2151                     count = 0
2152                     for count, entry in enumerate(extract_thread(continuation_items)):
2153                         yield entry
2154                     continuation = self._extract_continuation({'contents': continuation_items})
2155                     if continuation:
2156                         # Sometimes YouTube provides a continuation without any comments
2157                         # In most cases we end up just downloading these with very little comments to come.
2158                         if count == 0:
2159                             if not parent:
2160                                 self.report_warning('No comments received - assuming end of comments')
2161                             continuation = None
2162                         break
2163
2164             # Deprecated response structure
2165             elif isinstance(continuation_contents, dict):
2166                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2167                 for key, continuation_renderer in continuation_contents.items():
2168                     if key not in known_continuation_renderers:
2169                         continue
2170                     if not isinstance(continuation_renderer, dict):
2171                         continue
2172                     if is_first_continuation:
2173                         header_continuation_items = [continuation_renderer.get('header') or {}]
2174                         total_comments, continuation = extract_header(header_continuation_items)
2175                         if total_comments:
2176                             yield total_comments
2177                         is_first_continuation = False
2178                         if continuation:
2179                             break
2180
2181                     # Sometimes YouTube provides a continuation without any comments
2182                     # In most cases we end up just downloading these with very little comments to come.
2183                     count = 0
2184                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2185                         yield entry
2186                     continuation = self._extract_continuation(continuation_renderer)
2187                     if count == 0:
2188                         if not parent:
2189                             self.report_warning('No comments received - assuming end of comments')
2190                         continuation = None
2191                     break
2192
2193     @staticmethod
2194     def _generate_comment_continuation(video_id):
2195         """
2196         Generates initial comment section continuation token from given video id
2197         """
2198         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2199         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2200         new_continuation_intlist = list(itertools.chain.from_iterable(
2201             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2202         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2203
2204     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2205         """Entry for comment extraction"""
2206         def _real_comment_extract(contents):
2207             if isinstance(contents, list):
2208                 for entry in contents:
2209                     for key, renderer in entry.items():
2210                         if key not in known_entry_comment_renderers:
2211                             continue
2212                         yield from self._comment_entries(
2213                             renderer, video_id=video_id, ytcfg=ytcfg,
2214                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2215                             account_syncid=self._extract_account_syncid(ytcfg))
2216                         break
2217         comments = []
2218         known_entry_comment_renderers = ('itemSectionRenderer',)
2219         estimated_total = 0
2220         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2221
2222         try:
2223             for comment in _real_comment_extract(contents):
2224                 if len(comments) >= max_comments:
2225                     break
2226                 if isinstance(comment, int):
2227                     estimated_total = comment
2228                     continue
2229                 comments.append(comment)
2230         except KeyboardInterrupt:
2231             self.to_screen('Interrupted by user')
2232         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2233         return {
2234             'comments': comments,
2235             'comment_count': len(comments),
2236         }
2237
2238     @staticmethod
2239     def _generate_player_context(sts=None):
2240         context = {
2241             'html5Preference': 'HTML5_PREF_WANTS',
2242         }
2243         if sts is not None:
2244             context['signatureTimestamp'] = sts
2245         return {
2246             'playbackContext': {
2247                 'contentPlaybackContext': context
2248             }
2249         }
2250
2251     @staticmethod
2252     def _get_video_info_params(video_id, client='TVHTML5'):
2253         GVI_CLIENTS = {
2254             'ANDROID': {
2255                 'c': 'ANDROID',
2256                 'cver': '16.20',
2257             },
2258             'TVHTML5': {
2259                 'c': 'TVHTML5',
2260                 'cver': '6.20180913',
2261             }
2262         }
2263         query = {
2264             'video_id': video_id,
2265             'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2266             'html5': '1'
2267         }
2268         query.update(GVI_CLIENTS.get(client))
2269         return query
2270
2271     def _real_extract(self, url):
2272         url, smuggled_data = unsmuggle_url(url, {})
2273         video_id = self._match_id(url)
2274
2275         is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2276
2277         base_url = self.http_scheme() + '//www.youtube.com/'
2278         webpage_url = base_url + 'watch?v=' + video_id
2279         webpage = self._download_webpage(
2280             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2281
2282         ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2283         identity_token = self._extract_identity_token(webpage, video_id)
2284         syncid = self._extract_account_syncid(ytcfg)
2285         headers = self._generate_api_headers(ytcfg, identity_token, syncid)
2286
2287         player_url = self._extract_player_url(ytcfg, webpage)
2288
2289         player_client = self._configuration_arg('player_client', [''])[0]
2290         if player_client not in ('web', 'android', ''):
2291             self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2292         force_mobile_client = player_client != 'web'
2293         player_skip = self._configuration_arg('player_skip')
2294
2295         def get_text(x):
2296             if not x:
2297                 return
2298             text = x.get('simpleText')
2299             if text and isinstance(text, compat_str):
2300                 return text
2301             runs = x.get('runs')
2302             if not isinstance(runs, list):
2303                 return
2304             return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
2305
2306         ytm_streaming_data = {}
2307         if is_music_url:
2308             ytm_webpage = None
2309             sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2310             if sts and not force_mobile_client and 'configs' not in player_skip:
2311                 ytm_webpage = self._download_webpage(
2312                     'https://music.youtube.com',
2313                     video_id, fatal=False, note='Downloading remix client config')
2314
2315             ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2316             ytm_client = 'WEB_REMIX'
2317             if not sts or force_mobile_client:
2318                 # Android client already has signature descrambled
2319                 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2320                 if not sts:
2321                     self.report_warning('Falling back to android remix client for player API.')
2322                 ytm_client = 'ANDROID_MUSIC'
2323                 ytm_cfg = {}
2324
2325             ytm_headers = self._generate_api_headers(
2326                 ytm_cfg, identity_token, syncid,
2327                 client=ytm_client)
2328             ytm_query = {'videoId': video_id}
2329             ytm_query.update(self._generate_player_context(sts))
2330
2331             ytm_player_response = self._extract_response(
2332                 item_id=video_id, ep='player', query=ytm_query,
2333                 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2334                 default_client=ytm_client,
2335                 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2336             ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
2337
2338         player_response = None
2339         if webpage:
2340             player_response = self._extract_yt_initial_variable(
2341                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2342                 video_id, 'initial player response')
2343
2344         if not player_response or force_mobile_client:
2345             sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2346             yt_client = 'WEB'
2347             ytpcfg = ytcfg
2348             ytp_headers = headers
2349             if not sts or force_mobile_client:
2350                 # Android client already has signature descrambled
2351                 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2352                 if not sts:
2353                     self.report_warning('Falling back to android client for player API.')
2354                 yt_client = 'ANDROID'
2355                 ytpcfg = {}
2356                 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)
2357
2358             yt_query = {'videoId': video_id}
2359             yt_query.update(self._generate_player_context(sts))
2360             player_response = self._extract_response(
2361                 item_id=video_id, ep='player', query=yt_query,
2362                 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2363                 default_client=yt_client,
2364                 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2365             ) or player_response
2366
2367         # Age-gate workarounds
2368         playability_status = player_response.get('playabilityStatus') or {}
2369         if playability_status.get('reason') in self._AGE_GATE_REASONS:
2370             gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2371             for gvi_client in gvi_clients:
2372                 pr = self._parse_json(try_get(compat_parse_qs(
2373                     self._download_webpage(
2374                         base_url + 'get_video_info', video_id,
2375                         'Refetching age-gated %s info webpage' % gvi_client.lower(),
2376                         'unable to download video info webpage', fatal=False,
2377                         query=self._get_video_info_params(video_id, client=gvi_client))),
2378                     lambda x: x['player_response'][0],
2379                     compat_str) or '{}', video_id)
2380                 if pr:
2381                     break
2382             if not pr:
2383                 self.report_warning('Falling back to embedded-only age-gate workaround.')
2384                 embed_webpage = None
2385                 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2386                 if sts and not force_mobile_client and 'configs' not in player_skip:
2387                     embed_webpage = self._download_webpage(
2388                         'https://www.youtube.com/embed/%s?html5=1' % video_id,
2389                         video_id=video_id, note='Downloading age-gated embed config')
2390
2391                 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2392                 # If we extracted the embed webpage, it'll tell us if we can view the video
2393                 embedded_pr = self._parse_json(
2394                     try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2395                     video_id=video_id)
2396                 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2397                 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2398                     yt_client = 'WEB_EMBEDDED_PLAYER'
2399                     if not sts or force_mobile_client:
2400                         # Android client already has signature descrambled
2401                         # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2402                         if not sts:
2403                             self.report_warning(
2404                                 'Falling back to android embedded client for player API (note: some formats may be missing).')
2405                         yt_client = 'ANDROID_EMBEDDED_PLAYER'
2406                         ytcfg_age = {}
2407
2408                     ytage_headers = self._generate_api_headers(
2409                         ytcfg_age, identity_token, syncid, client=yt_client)
2410                     yt_age_query = {'videoId': video_id}
2411                     yt_age_query.update(self._generate_player_context(sts))
2412                     pr = self._extract_response(
2413                         item_id=video_id, ep='player', query=yt_age_query,
2414                         ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2415                         default_client=yt_client,
2416                         note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
2417                     ) or {}
2418
2419             if pr:
2420                 player_response = pr
2421
2422         trailer_video_id = try_get(
2423             playability_status,
2424             lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2425             compat_str)
2426         if trailer_video_id:
2427             return self.url_result(
2428                 trailer_video_id, self.ie_key(), trailer_video_id)
2429
2430         search_meta = (
2431             lambda x: self._html_search_meta(x, webpage, default=None)) \
2432             if webpage else lambda x: None
2433
2434         video_details = player_response.get('videoDetails') or {}
2435         microformat = try_get(
2436             player_response,
2437             lambda x: x['microformat']['playerMicroformatRenderer'],
2438             dict) or {}
2439         video_title = video_details.get('title') \
2440             or get_text(microformat.get('title')) \
2441             or search_meta(['og:title', 'twitter:title', 'title'])
2442         video_description = video_details.get('shortDescription')
2443
2444         if not smuggled_data.get('force_singlefeed', False):
2445             if not self.get_param('noplaylist'):
2446                 multifeed_metadata_list = try_get(
2447                     player_response,
2448                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2449                     compat_str)
2450                 if multifeed_metadata_list:
2451                     entries = []
2452                     feed_ids = []
2453                     for feed in multifeed_metadata_list.split(','):
2454                         # Unquote should take place before split on comma (,) since textual
2455                         # fields may contain comma as well (see
2456                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2457                         feed_data = compat_parse_qs(
2458                             compat_urllib_parse_unquote_plus(feed))
2459
2460                         def feed_entry(name):
2461                             return try_get(
2462                                 feed_data, lambda x: x[name][0], compat_str)
2463
2464                         feed_id = feed_entry('id')
2465                         if not feed_id:
2466                             continue
2467                         feed_title = feed_entry('title')
2468                         title = video_title
2469                         if feed_title:
2470                             title += ' (%s)' % feed_title
2471                         entries.append({
2472                             '_type': 'url_transparent',
2473                             'ie_key': 'Youtube',
2474                             'url': smuggle_url(
2475                                 base_url + 'watch?v=' + feed_data['id'][0],
2476                                 {'force_singlefeed': True}),
2477                             'title': title,
2478                         })
2479                         feed_ids.append(feed_id)
2480                     self.to_screen(
2481                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2482                         % (', '.join(feed_ids), video_id))
2483                     return self.playlist_result(
2484                         entries, video_id, video_title, video_description)
2485             else:
2486                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2487
2488         formats, itags, stream_ids = [], [], []
2489         itag_qualities = {}
2490         q = qualities([
2491             # "tiny" is the smallest video-only format. But some audio-only formats
2492             # was also labeled "tiny". It is not clear if such formats still exist
2493             'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2494             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2495         ])
2496
2497         streaming_data = player_response.get('streamingData') or {}
2498         streaming_formats = streaming_data.get('formats') or []
2499         streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
2500         streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2501         streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2502
2503         for fmt in streaming_formats:
2504             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2505                 continue
2506
2507             itag = str_or_none(fmt.get('itag'))
2508             audio_track = fmt.get('audioTrack') or {}
2509             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2510             if stream_id in stream_ids:
2511                 continue
2512
2513             quality = fmt.get('quality')
2514             if quality == 'tiny' or not quality:
2515                 quality = fmt.get('audioQuality', '').lower() or quality
2516             if itag and quality:
2517                 itag_qualities[itag] = quality
2518             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2519             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2520             # number of fragment that would subsequently requested with (`&sq=N`)
2521             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2522                 continue
2523
2524             fmt_url = fmt.get('url')
2525             if not fmt_url:
2526                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2527                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2528                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2529                 if not (sc and fmt_url and encrypted_sig):
2530                     continue
2531                 if not player_url:
2532                     continue
2533                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2534                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2535                 fmt_url += '&' + sp + '=' + signature
2536
2537             if itag:
2538                 itags.append(itag)
2539                 stream_ids.append(stream_id)
2540
2541             tbr = float_or_none(
2542                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2543             dct = {
2544                 'asr': int_or_none(fmt.get('audioSampleRate')),
2545                 'filesize': int_or_none(fmt.get('contentLength')),
2546                 'format_id': itag,
2547                 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
2548                 'fps': int_or_none(fmt.get('fps')),
2549                 'height': int_or_none(fmt.get('height')),
2550                 'quality': q(quality),
2551                 'tbr': tbr,
2552                 'url': fmt_url,
2553                 'width': fmt.get('width'),
2554                 'language': audio_track.get('id', '').split('.')[0],
2555             }
2556             mime_mobj = re.match(
2557                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2558             if mime_mobj:
2559                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2560                 dct.update(parse_codecs(mime_mobj.group(2)))
2561                 # The 3gp format in android client has a quality of "small",
2562                 # but is actually worse than all other formats
2563                 if dct['ext'] == '3gp':
2564                     dct['quality'] = q('tiny')
2565             no_audio = dct.get('acodec') == 'none'
2566             no_video = dct.get('vcodec') == 'none'
2567             if no_audio:
2568                 dct['vbr'] = tbr
2569             if no_video:
2570                 dct['abr'] = tbr
2571             if no_audio or no_video:
2572                 dct['downloader_options'] = {
2573                     # Youtube throttles chunks >~10M
2574                     'http_chunk_size': 10485760,
2575                 }
2576                 if dct.get('ext'):
2577                     dct['container'] = dct['ext'] + '_dash'
2578             formats.append(dct)
2579
2580         skip_manifests = self._configuration_arg('skip')
2581         get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2582         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2583
2584         for sd in (streaming_data, ytm_streaming_data):
2585             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2586             if hls_manifest_url:
2587                 for f in self._extract_m3u8_formats(
2588                         hls_manifest_url, video_id, 'mp4', fatal=False):
2589                     itag = self._search_regex(
2590                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2591                     if itag:
2592                         f['format_id'] = itag
2593                     formats.append(f)
2594
2595             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2596             if dash_manifest_url:
2597                 for f in self._extract_mpd_formats(
2598                         dash_manifest_url, video_id, fatal=False):
2599                     itag = f['format_id']
2600                     if itag in itags:
2601                         continue
2602                     if itag in itag_qualities:
2603                         f['quality'] = q(itag_qualities[itag])
2604                     filesize = int_or_none(self._search_regex(
2605                         r'/clen/(\d+)', f.get('fragment_base_url')
2606                         or f['url'], 'file size', default=None))
2607                     if filesize:
2608                         f['filesize'] = filesize
2609                     formats.append(f)
2610
2611         if not formats:
2612             if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
2613                 self.raise_no_formats(
2614                     'This video is DRM protected.', expected=True)
2615             pemr = try_get(
2616                 playability_status,
2617                 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2618                 dict) or {}
2619             reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2620             subreason = pemr.get('subreason')
2621             if subreason:
2622                 subreason = clean_html(get_text(subreason))
2623                 if subreason == 'The uploader has not made this video available in your country.':
2624                     countries = microformat.get('availableCountries')
2625                     if not countries:
2626                         regions_allowed = search_meta('regionsAllowed')
2627                         countries = regions_allowed.split(',') if regions_allowed else None
2628                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2629                 reason += '\n' + subreason
2630             if reason:
2631                 self.raise_no_formats(reason, expected=True)
2632
2633         self._sort_formats(formats)
2634
2635         keywords = video_details.get('keywords') or []
2636         if not keywords and webpage:
2637             keywords = [
2638                 unescapeHTML(m.group('content'))
2639                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2640         for keyword in keywords:
2641             if keyword.startswith('yt:stretch='):
2642                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2643                 if mobj:
2644                     # NB: float is intentional for forcing float division
2645                     w, h = (float(v) for v in mobj.groups())
2646                     if w > 0 and h > 0:
2647                         ratio = w / h
2648                         for f in formats:
2649                             if f.get('vcodec') != 'none':
2650                                 f['stretched_ratio'] = ratio
2651                         break
2652
2653         thumbnails = []
2654         for container in (video_details, microformat):
2655             for thumbnail in (try_get(
2656                     container,
2657                     lambda x: x['thumbnail']['thumbnails'], list) or []):
2658                 thumbnail_url = thumbnail.get('url')
2659                 if not thumbnail_url:
2660                     continue
2661                 # Sometimes youtube gives a wrong thumbnail URL. See:
2662                 # https://github.com/yt-dlp/yt-dlp/issues/233
2663                 # https://github.com/ytdl-org/youtube-dl/issues/28023
2664                 if 'maxresdefault' in thumbnail_url:
2665                     thumbnail_url = thumbnail_url.split('?')[0]
2666                 thumbnails.append({
2667                     'url': thumbnail_url,
2668                     'height': int_or_none(thumbnail.get('height')),
2669                     'width': int_or_none(thumbnail.get('width')),
2670                     'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2671                 })
2672         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2673         if thumbnail_url:
2674             thumbnails.append({
2675                 'url': thumbnail_url,
2676                 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2677             })
2678         # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2679         # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2680         thumbnails.append({
2681             'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2682             'preference': 1,
2683         })
2684         self._remove_duplicate_formats(thumbnails)
2685
2686         category = microformat.get('category') or search_meta('genre')
2687         channel_id = video_details.get('channelId') \
2688             or microformat.get('externalChannelId') \
2689             or search_meta('channelId')
2690         duration = int_or_none(
2691             video_details.get('lengthSeconds')
2692             or microformat.get('lengthSeconds')) \
2693             or parse_duration(search_meta('duration'))
2694         is_live = video_details.get('isLive')
2695         is_upcoming = video_details.get('isUpcoming')
2696         owner_profile_url = microformat.get('ownerProfileUrl')
2697
2698         info = {
2699             'id': video_id,
2700             'title': self._live_title(video_title) if is_live else video_title,
2701             'formats': formats,
2702             'thumbnails': thumbnails,
2703             'description': video_description,
2704             'upload_date': unified_strdate(
2705                 microformat.get('uploadDate')
2706                 or search_meta('uploadDate')),
2707             'uploader': video_details['author'],
2708             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2709             'uploader_url': owner_profile_url,
2710             'channel_id': channel_id,
2711             'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2712             'duration': duration,
2713             'view_count': int_or_none(
2714                 video_details.get('viewCount')
2715                 or microformat.get('viewCount')
2716                 or search_meta('interactionCount')),
2717             'average_rating': float_or_none(video_details.get('averageRating')),
2718             'age_limit': 18 if (
2719                 microformat.get('isFamilySafe') is False
2720                 or search_meta('isFamilyFriendly') == 'false'
2721                 or search_meta('og:restrictions:age') == '18+') else 0,
2722             'webpage_url': webpage_url,
2723             'categories': [category] if category else None,
2724             'tags': keywords,
2725             'is_live': is_live,
2726             'playable_in_embed': playability_status.get('playableInEmbed'),
2727             'was_live': video_details.get('isLiveContent'),
2728         }
2729
2730         pctr = try_get(
2731             player_response,
2732             lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2733         subtitles = {}
2734         if pctr:
2735             def process_language(container, base_url, lang_code, sub_name, query):
2736                 lang_subs = container.setdefault(lang_code, [])
2737                 for fmt in self._SUBTITLE_FORMATS:
2738                     query.update({
2739                         'fmt': fmt,
2740                     })
2741                     lang_subs.append({
2742                         'ext': fmt,
2743                         'url': update_url_query(base_url, query),
2744                         'name': sub_name,
2745                     })
2746
2747             for caption_track in (pctr.get('captionTracks') or []):
2748                 base_url = caption_track.get('baseUrl')
2749                 if not base_url:
2750                     continue
2751                 if caption_track.get('kind') != 'asr':
2752                     lang_code = (
2753                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2754                         or caption_track.get('languageCode'))
2755                     if not lang_code:
2756                         continue
2757                     process_language(
2758                         subtitles, base_url, lang_code,
2759                         try_get(caption_track, lambda x: x['name']['simpleText']),
2760                         {})
2761                     continue
2762                 automatic_captions = {}
2763                 for translation_language in (pctr.get('translationLanguages') or []):
2764                     translation_language_code = translation_language.get('languageCode')
2765                     if not translation_language_code:
2766                         continue
2767                     process_language(
2768                         automatic_captions, base_url, translation_language_code,
2769                         try_get(translation_language, (
2770                             lambda x: x['languageName']['simpleText'],
2771                             lambda x: x['languageName']['runs'][0]['text'])),
2772                         {'tlang': translation_language_code})
2773                 info['automatic_captions'] = automatic_captions
2774         info['subtitles'] = subtitles
2775
2776         parsed_url = compat_urllib_parse_urlparse(url)
2777         for component in [parsed_url.fragment, parsed_url.query]:
2778             query = compat_parse_qs(component)
2779             for k, v in query.items():
2780                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2781                     d_k += '_time'
2782                     if d_k not in info and k in s_ks:
2783                         info[d_k] = parse_duration(query[k][0])
2784
2785         # Youtube Music Auto-generated description
2786         if video_description:
2787             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2788             if mobj:
2789                 release_year = mobj.group('release_year')
2790                 release_date = mobj.group('release_date')
2791                 if release_date:
2792                     release_date = release_date.replace('-', '')
2793                     if not release_year:
2794                         release_year = release_date[:4]
2795                 info.update({
2796                     'album': mobj.group('album'.strip()),
2797                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2798                     'track': mobj.group('track').strip(),
2799                     'release_date': release_date,
2800                     'release_year': int_or_none(release_year),
2801                 })
2802
2803         initial_data = None
2804         if webpage:
2805             initial_data = self._extract_yt_initial_variable(
2806                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2807                 'yt initial data')
2808         if not initial_data:
2809             initial_data = self._extract_response(
2810                 item_id=video_id, ep='next', fatal=False,
2811                 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2812                 note='Downloading initial data API JSON')
2813
2814         try:
2815             # This will error if there is no livechat
2816             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2817             info['subtitles']['live_chat'] = [{
2818                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2819                 'video_id': video_id,
2820                 'ext': 'json',
2821                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2822             }]
2823         except (KeyError, IndexError, TypeError):
2824             pass
2825
2826         if initial_data:
2827             chapters = self._extract_chapters_from_json(
2828                 initial_data, video_id, duration)
2829             if not chapters:
2830                 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2831                     contents = try_get(
2832                         engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2833                         list)
2834                     if not contents:
2835                         continue
2836
2837                     def chapter_time(mmlir):
2838                         return parse_duration(
2839                             get_text(mmlir.get('timeDescription')))
2840
2841                     chapters = []
2842                     for next_num, content in enumerate(contents, start=1):
2843                         mmlir = content.get('macroMarkersListItemRenderer') or {}
2844                         start_time = chapter_time(mmlir)
2845                         end_time = chapter_time(try_get(
2846                             contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2847                             if next_num < len(contents) else duration
2848                         if start_time is None or end_time is None:
2849                             continue
2850                         chapters.append({
2851                             'start_time': start_time,
2852                             'end_time': end_time,
2853                             'title': get_text(mmlir.get('title')),
2854                         })
2855                     if chapters:
2856                         break
2857             if chapters:
2858                 info['chapters'] = chapters
2859
2860             contents = try_get(
2861                 initial_data,
2862                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2863                 list) or []
2864             for content in contents:
2865                 vpir = content.get('videoPrimaryInfoRenderer')
2866                 if vpir:
2867                     stl = vpir.get('superTitleLink')
2868                     if stl:
2869                         stl = get_text(stl)
2870                         if try_get(
2871                                 vpir,
2872                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2873                             info['location'] = stl
2874                         else:
2875                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2876                             if mobj:
2877                                 info.update({
2878                                     'series': mobj.group(1),
2879                                     'season_number': int(mobj.group(2)),
2880                                     'episode_number': int(mobj.group(3)),
2881                                 })
2882                     for tlb in (try_get(
2883                             vpir,
2884                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2885                             list) or []):
2886                         tbr = tlb.get('toggleButtonRenderer') or {}
2887                         for getter, regex in [(
2888                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2889                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2890                                     lambda x: x['accessibility'],
2891                                     lambda x: x['accessibilityData']['accessibilityData'],
2892                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2893                             label = (try_get(tbr, getter, dict) or {}).get('label')
2894                             if label:
2895                                 mobj = re.match(regex, label)
2896                                 if mobj:
2897                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2898                                     break
2899                     sbr_tooltip = try_get(
2900                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2901                     if sbr_tooltip:
2902                         like_count, dislike_count = sbr_tooltip.split(' / ')
2903                         info.update({
2904                             'like_count': str_to_int(like_count),
2905                             'dislike_count': str_to_int(dislike_count),
2906                         })
2907                 vsir = content.get('videoSecondaryInfoRenderer')
2908                 if vsir:
2909                     info['channel'] = get_text(try_get(
2910                         vsir,
2911                         lambda x: x['owner']['videoOwnerRenderer']['title'],
2912                         dict))
2913                     rows = try_get(
2914                         vsir,
2915                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2916                         list) or []
2917                     multiple_songs = False
2918                     for row in rows:
2919                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2920                             multiple_songs = True
2921                             break
2922                     for row in rows:
2923                         mrr = row.get('metadataRowRenderer') or {}
2924                         mrr_title = mrr.get('title')
2925                         if not mrr_title:
2926                             continue
2927                         mrr_title = get_text(mrr['title'])
2928                         mrr_contents_text = get_text(mrr['contents'][0])
2929                         if mrr_title == 'License':
2930                             info['license'] = mrr_contents_text
2931                         elif not multiple_songs:
2932                             if mrr_title == 'Album':
2933                                 info['album'] = mrr_contents_text
2934                             elif mrr_title == 'Artist':
2935                                 info['artist'] = mrr_contents_text
2936                             elif mrr_title == 'Song':
2937                                 info['track'] = mrr_contents_text
2938
2939         fallbacks = {
2940             'channel': 'uploader',
2941             'channel_id': 'uploader_id',
2942             'channel_url': 'uploader_url',
2943         }
2944         for to, frm in fallbacks.items():
2945             if not info.get(to):
2946                 info[to] = info.get(frm)
2947
2948         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2949             v = info.get(s_k)
2950             if v:
2951                 info[d_k] = v
2952
2953         is_private = bool_or_none(video_details.get('isPrivate'))
2954         is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2955         is_membersonly = None
2956         is_premium = None
2957         if initial_data and is_private is not None:
2958             is_membersonly = False
2959             is_premium = False
2960             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2961             for content in contents or []:
2962                 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2963                 for badge in badges or []:
2964                     label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2965                     if label.lower() == 'members only':
2966                         is_membersonly = True
2967                         break
2968                     elif label.lower() == 'premium':
2969                         is_premium = True
2970                         break
2971                 if is_membersonly or is_premium:
2972                     break
2973
2974         # TODO: Add this for playlists
2975         info['availability'] = self._availability(
2976             is_private=is_private,
2977             needs_premium=is_premium,
2978             needs_subscription=is_membersonly,
2979             needs_auth=info['age_limit'] >= 18,
2980             is_unlisted=None if is_private is None else is_unlisted)
2981
2982         # get xsrf for annotations or comments
2983         get_annotations = self.get_param('writeannotations', False)
2984         get_comments = self.get_param('getcomments', False)
2985         if get_annotations or get_comments:
2986             xsrf_token = None
2987             ytcfg = self._extract_ytcfg(video_id, webpage)
2988             if ytcfg:
2989                 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2990             if not xsrf_token:
2991                 xsrf_token = self._search_regex(
2992                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2993                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
2994
2995         # annotations
2996         if get_annotations:
2997             invideo_url = try_get(
2998                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
2999             if xsrf_token and invideo_url:
3000                 xsrf_field_name = None
3001                 if ytcfg:
3002                     xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3003                 if not xsrf_field_name:
3004                     xsrf_field_name = self._search_regex(
3005                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3006                         webpage, 'xsrf field name',
3007                         group='xsrf_field_name', default='session_token')
3008                 info['annotations'] = self._download_webpage(
3009                     self._proto_relative_url(invideo_url),
3010                     video_id, note='Downloading annotations',
3011                     errnote='Unable to download video annotations', fatal=False,
3012                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3013
3014         if get_comments:
3015             info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
3016
3017         self.mark_watched(video_id, player_response)
3018
3019         return info
3020
3021
3022 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3023     IE_DESC = 'YouTube.com tab'
3024     _VALID_URL = r'''(?x)
3025                     https?://
3026                         (?:\w+\.)?
3027                         (?:
3028                             youtube(?:kids)?\.com|
3029                             invidio\.us
3030                         )/
3031                         (?:
3032                             (?P<channel_type>channel|c|user|browse)/|
3033                             (?P<not_channel>
3034                                 feed/|hashtag/|
3035                                 (?:playlist|watch)\?.*?\blist=
3036                             )|
3037                             (?!(?:%s)\b)  # Direct URLs
3038                         )
3039                         (?P<id>[^/?\#&]+)
3040                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3041     IE_NAME = 'youtube:tab'
3042
3043     _TESTS = [{
3044         'note': 'playlists, multipage',
3045         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3046         'playlist_mincount': 94,
3047         'info_dict': {
3048             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3049             'title': 'Игорь Клейнер - Playlists',
3050             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3051             'uploader': 'Игорь Клейнер',
3052             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3053         },
3054     }, {
3055         'note': 'playlists, multipage, different order',
3056         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3057         'playlist_mincount': 94,
3058         'info_dict': {
3059             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3060             'title': 'Игорь Клейнер - Playlists',
3061             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3062             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3063             'uploader': 'Игорь Клейнер',
3064         },
3065     }, {
3066         'note': 'playlists, series',
3067         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3068         'playlist_mincount': 5,
3069         'info_dict': {
3070             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3071             'title': '3Blue1Brown - Playlists',
3072             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3073             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3074             'uploader': '3Blue1Brown',
3075         },
3076     }, {
3077         'note': 'playlists, singlepage',
3078         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3079         'playlist_mincount': 4,
3080         'info_dict': {
3081             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3082             'title': 'ThirstForScience - Playlists',
3083             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3084             'uploader': 'ThirstForScience',
3085             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3086         }
3087     }, {
3088         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3089         'only_matching': True,
3090     }, {
3091         'note': 'basic, single video playlist',
3092         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3093         'info_dict': {
3094             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3095             'uploader': 'Sergey M.',
3096             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3097             'title': 'youtube-dl public playlist',
3098         },
3099         'playlist_count': 1,
3100     }, {
3101         'note': 'empty playlist',
3102         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3103         'info_dict': {
3104             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3105             'uploader': 'Sergey M.',
3106             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3107             'title': 'youtube-dl empty playlist',
3108         },
3109         'playlist_count': 0,
3110     }, {
3111         'note': 'Home tab',
3112         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3113         'info_dict': {
3114             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3115             'title': 'lex will - Home',
3116             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3117             'uploader': 'lex will',
3118             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3119         },
3120         'playlist_mincount': 2,
3121     }, {
3122         'note': 'Videos tab',
3123         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3124         'info_dict': {
3125             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3126             'title': 'lex will - Videos',
3127             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3128             'uploader': 'lex will',
3129             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3130         },
3131         'playlist_mincount': 975,
3132     }, {
3133         'note': 'Videos tab, sorted by popular',
3134         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3135         'info_dict': {
3136             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3137             'title': 'lex will - Videos',
3138             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3139             'uploader': 'lex will',
3140             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3141         },
3142         'playlist_mincount': 199,
3143     }, {
3144         'note': 'Playlists tab',
3145         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3146         'info_dict': {
3147             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3148             'title': 'lex will - Playlists',
3149             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3150             'uploader': 'lex will',
3151             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3152         },
3153         'playlist_mincount': 17,
3154     }, {
3155         'note': 'Community tab',
3156         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3157         'info_dict': {
3158             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3159             'title': 'lex will - Community',
3160             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3161             'uploader': 'lex will',
3162             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3163         },
3164         'playlist_mincount': 18,
3165     }, {
3166         'note': 'Channels tab',
3167         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3168         'info_dict': {
3169             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3170             'title': 'lex will - Channels',
3171             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3172             'uploader': 'lex will',
3173             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3174         },
3175         'playlist_mincount': 12,
3176     }, {
3177         'note': 'Search tab',
3178         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3179         'playlist_mincount': 40,
3180         'info_dict': {
3181             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3182             'title': '3Blue1Brown - Search - linear algebra',
3183             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3184             'uploader': '3Blue1Brown',
3185             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3186         },
3187     }, {
3188         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3189         'only_matching': True,
3190     }, {
3191         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3192         'only_matching': True,
3193     }, {
3194         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3195         'only_matching': True,
3196     }, {
3197         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3198         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3199         'info_dict': {
3200             'title': '29C3: Not my department',
3201             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3202             'uploader': 'Christiaan008',
3203             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3204             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3205         },
3206         'playlist_count': 96,
3207     }, {
3208         'note': 'Large playlist',
3209         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3210         'info_dict': {
3211             'title': 'Uploads from Cauchemar',
3212             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3213             'uploader': 'Cauchemar',
3214             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3215         },
3216         'playlist_mincount': 1123,
3217     }, {
3218         'note': 'even larger playlist, 8832 videos',
3219         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3220         'only_matching': True,
3221     }, {
3222         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3223         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3224         'info_dict': {
3225             'title': 'Uploads from Interstellar Movie',
3226             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3227             'uploader': 'Interstellar Movie',
3228             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3229         },
3230         'playlist_mincount': 21,
3231     }, {
3232         'note': 'Playlist with "show unavailable videos" button',
3233         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3234         'info_dict': {
3235             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3236             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3237             'uploader': 'Phim Siêu Nhân Nhật Bản',
3238             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3239         },
3240         'playlist_mincount': 200,
3241     }, {
3242         'note': 'Playlist with unavailable videos in page 7',
3243         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3244         'info_dict': {
3245             'title': 'Uploads from BlankTV',
3246             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3247             'uploader': 'BlankTV',
3248             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3249         },
3250         'playlist_mincount': 1000,
3251     }, {
3252         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3253         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3254         'info_dict': {
3255             'title': 'Data Analysis with Dr Mike Pound',
3256             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3257             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3258             'uploader': 'Computerphile',
3259             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3260         },
3261         'playlist_mincount': 11,
3262     }, {
3263         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3264         'only_matching': True,
3265     }, {
3266         'note': 'Playlist URL that does not actually serve a playlist',
3267         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3268         'info_dict': {
3269             'id': 'FqZTN594JQw',
3270             'ext': 'webm',
3271             'title': "Smiley's People 01 detective, Adventure Series, Action",
3272             'uploader': 'STREEM',
3273             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3274             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3275             'upload_date': '20150526',
3276             'license': 'Standard YouTube License',
3277             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3278             'categories': ['People & Blogs'],
3279             'tags': list,
3280             'view_count': int,
3281             'like_count': int,
3282             'dislike_count': int,
3283         },
3284         'params': {
3285             'skip_download': True,
3286         },
3287         'skip': 'This video is not available.',
3288         'add_ie': [YoutubeIE.ie_key()],
3289     }, {
3290         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3291         'only_matching': True,
3292     }, {
3293         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3294         'only_matching': True,
3295     }, {
3296         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3297         'info_dict': {
3298             'id': 'X1whbWASnNQ',  # This will keep changing
3299             'ext': 'mp4',
3300             'title': compat_str,
3301             'uploader': 'Sky News',
3302             'uploader_id': 'skynews',
3303             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3304             'upload_date': r're:\d{8}',
3305             'description': compat_str,
3306             'categories': ['News & Politics'],
3307             'tags': list,
3308             'like_count': int,
3309             'dislike_count': int,
3310         },
3311         'params': {
3312             'skip_download': True,
3313         },
3314         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3315     }, {
3316         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3317         'info_dict': {
3318             'id': 'a48o2S1cPoo',
3319             'ext': 'mp4',
3320             'title': 'The Young Turks - Live Main Show',
3321             'uploader': 'The Young Turks',
3322             'uploader_id': 'TheYoungTurks',
3323             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3324             'upload_date': '20150715',
3325             'license': 'Standard YouTube License',
3326             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3327             'categories': ['News & Politics'],
3328             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3329             'like_count': int,
3330             'dislike_count': int,
3331         },
3332         'params': {
3333             'skip_download': True,
3334         },
3335         'only_matching': True,
3336     }, {
3337         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3338         'only_matching': True,
3339     }, {
3340         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3341         'only_matching': True,
3342     }, {
3343         'note': 'A channel that is not live. Should raise error',
3344         'url': 'https://www.youtube.com/user/numberphile/live',
3345         'only_matching': True,
3346     }, {
3347         'url': 'https://www.youtube.com/feed/trending',
3348         'only_matching': True,
3349     }, {
3350         'url': 'https://www.youtube.com/feed/library',
3351         'only_matching': True,
3352     }, {
3353         'url': 'https://www.youtube.com/feed/history',
3354         'only_matching': True,
3355     }, {
3356         'url': 'https://www.youtube.com/feed/subscriptions',
3357         'only_matching': True,
3358     }, {
3359         'url': 'https://www.youtube.com/feed/watch_later',
3360         'only_matching': True,
3361     }, {
3362         'note': 'Recommended - redirects to home page',
3363         'url': 'https://www.youtube.com/feed/recommended',
3364         'only_matching': True,
3365     }, {
3366         'note': 'inline playlist with not always working continuations',
3367         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3368         'only_matching': True,
3369     }, {
3370         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3371         'only_matching': True,
3372     }, {
3373         'url': 'https://www.youtube.com/course',
3374         'only_matching': True,
3375     }, {
3376         'url': 'https://www.youtube.com/zsecurity',
3377         'only_matching': True,
3378     }, {
3379         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3380         'only_matching': True,
3381     }, {
3382         'url': 'https://www.youtube.com/TheYoungTurks/live',
3383         'only_matching': True,
3384     }, {
3385         'url': 'https://www.youtube.com/hashtag/cctv9',
3386         'info_dict': {
3387             'id': 'cctv9',
3388             'title': '#cctv9',
3389         },
3390         'playlist_mincount': 350,
3391     }, {
3392         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3393         'only_matching': True,
3394     }, {
3395         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3396         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3397         'only_matching': True
3398     }, {
3399         'note': '/browse/ should redirect to /channel/',
3400         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3401         'only_matching': True
3402     }, {
3403         'note': 'VLPL, should redirect to playlist?list=PL...',
3404         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3405         'info_dict': {
3406             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3407             'uploader': 'NoCopyrightSounds',
3408             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3409             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3410             'title': 'NCS Releases',
3411         },
3412         'playlist_mincount': 166,
3413     }, {
3414         'note': 'Topic, should redirect to playlist?list=UU...',
3415         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3416         'info_dict': {
3417             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3418             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3419             'title': 'Uploads from Royalty Free Music - Topic',
3420             'uploader': 'Royalty Free Music - Topic',
3421         },
3422         'expected_warnings': [
3423             'A channel/user page was given',
3424             'The URL does not have a videos tab',
3425         ],
3426         'playlist_mincount': 101,
3427     }, {
3428         'note': 'Topic without a UU playlist',
3429         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3430         'info_dict': {
3431             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3432             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3433         },
3434         'expected_warnings': [
3435             'A channel/user page was given',
3436             'The URL does not have a videos tab',
3437             'Falling back to channel URL',
3438         ],
3439         'playlist_mincount': 9,
3440     }, {
3441         'note': 'Youtube music Album',
3442         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3443         'info_dict': {
3444             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3445             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3446         },
3447         'playlist_count': 50,
3448     }]
3449
3450     @classmethod
3451     def suitable(cls, url):
3452         return False if YoutubeIE.suitable(url) else super(
3453             YoutubeTabIE, cls).suitable(url)
3454
3455     def _extract_channel_id(self, webpage):
3456         channel_id = self._html_search_meta(
3457             'channelId', webpage, 'channel id', default=None)
3458         if channel_id:
3459             return channel_id
3460         channel_url = self._html_search_meta(
3461             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3462              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3463              'twitter:app:url:googleplay'), webpage, 'channel url')
3464         return self._search_regex(
3465             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3466             channel_url, 'channel id')
3467
3468     @staticmethod
3469     def _extract_basic_item_renderer(item):
3470         # Modified from _extract_grid_item_renderer
3471         known_basic_renderers = (
3472             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3473         )
3474         for key, renderer in item.items():
3475             if not isinstance(renderer, dict):
3476                 continue
3477             elif key in known_basic_renderers:
3478                 return renderer
3479             elif key.startswith('grid') and key.endswith('Renderer'):
3480                 return renderer
3481
3482     def _grid_entries(self, grid_renderer):
3483         for item in grid_renderer['items']:
3484             if not isinstance(item, dict):
3485                 continue
3486             renderer = self._extract_basic_item_renderer(item)
3487             if not isinstance(renderer, dict):
3488                 continue
3489             title = try_get(
3490                 renderer, (lambda x: x['title']['runs'][0]['text'],
3491                            lambda x: x['title']['simpleText']), compat_str)
3492             # playlist
3493             playlist_id = renderer.get('playlistId')
3494             if playlist_id:
3495                 yield self.url_result(
3496                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3497                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3498                     video_title=title)
3499                 continue
3500             # video
3501             video_id = renderer.get('videoId')
3502             if video_id:
3503                 yield self._extract_video(renderer)
3504                 continue
3505             # channel
3506             channel_id = renderer.get('channelId')
3507             if channel_id:
3508                 title = try_get(
3509                     renderer, lambda x: x['title']['simpleText'], compat_str)
3510                 yield self.url_result(
3511                     'https://www.youtube.com/channel/%s' % channel_id,
3512                     ie=YoutubeTabIE.ie_key(), video_title=title)
3513                 continue
3514             # generic endpoint URL support
3515             ep_url = urljoin('https://www.youtube.com/', try_get(
3516                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3517                 compat_str))
3518             if ep_url:
3519                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3520                     if ie.suitable(ep_url):
3521                         yield self.url_result(
3522                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3523                         break
3524
3525     def _shelf_entries_from_content(self, shelf_renderer):
3526         content = shelf_renderer.get('content')
3527         if not isinstance(content, dict):
3528             return
3529         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3530         if renderer:
3531             # TODO: add support for nested playlists so each shelf is processed
3532             # as separate playlist
3533             # TODO: this includes only first N items
3534             for entry in self._grid_entries(renderer):
3535                 yield entry
3536         renderer = content.get('horizontalListRenderer')
3537         if renderer:
3538             # TODO
3539             pass
3540
3541     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3542         ep = try_get(
3543             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3544             compat_str)
3545         shelf_url = urljoin('https://www.youtube.com', ep)
3546         if shelf_url:
3547             # Skipping links to another channels, note that checking for
3548             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3549             # will not work
3550             if skip_channels and '/channels?' in shelf_url:
3551                 return
3552             title = try_get(
3553                 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3554             yield self.url_result(shelf_url, video_title=title)
3555         # Shelf may not contain shelf URL, fallback to extraction from content
3556         for entry in self._shelf_entries_from_content(shelf_renderer):
3557             yield entry
3558
3559     def _playlist_entries(self, video_list_renderer):
3560         for content in video_list_renderer['contents']:
3561             if not isinstance(content, dict):
3562                 continue
3563             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3564             if not isinstance(renderer, dict):
3565                 continue
3566             video_id = renderer.get('videoId')
3567             if not video_id:
3568                 continue
3569             yield self._extract_video(renderer)
3570
3571     def _rich_entries(self, rich_grid_renderer):
3572         renderer = try_get(
3573             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3574         video_id = renderer.get('videoId')
3575         if not video_id:
3576             return
3577         yield self._extract_video(renderer)
3578
3579     def _video_entry(self, video_renderer):
3580         video_id = video_renderer.get('videoId')
3581         if video_id:
3582             return self._extract_video(video_renderer)
3583
3584     def _post_thread_entries(self, post_thread_renderer):
3585         post_renderer = try_get(
3586             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3587         if not post_renderer:
3588             return
3589         # video attachment
3590         video_renderer = try_get(
3591             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3592         video_id = video_renderer.get('videoId')
3593         if video_id:
3594             entry = self._extract_video(video_renderer)
3595             if entry:
3596                 yield entry
3597         # playlist attachment
3598         playlist_id = try_get(
3599             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3600         if playlist_id:
3601             yield self.url_result(
3602                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3603                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3604         # inline video links
3605         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3606         for run in runs:
3607             if not isinstance(run, dict):
3608                 continue
3609             ep_url = try_get(
3610                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3611             if not ep_url:
3612                 continue
3613             if not YoutubeIE.suitable(ep_url):
3614                 continue
3615             ep_video_id = YoutubeIE._match_id(ep_url)
3616             if video_id == ep_video_id:
3617                 continue
3618             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3619
3620     def _post_thread_continuation_entries(self, post_thread_continuation):
3621         contents = post_thread_continuation.get('contents')
3622         if not isinstance(contents, list):
3623             return
3624         for content in contents:
3625             renderer = content.get('backstagePostThreadRenderer')
3626             if not isinstance(renderer, dict):
3627                 continue
3628             for entry in self._post_thread_entries(renderer):
3629                 yield entry
3630
3631     r''' # unused
3632     def _rich_grid_entries(self, contents):
3633         for content in contents:
3634             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3635             if video_renderer:
3636                 entry = self._video_entry(video_renderer)
3637                 if entry:
3638                     yield entry
3639     '''
3640     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3641
3642         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3643             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3644             for content in contents:
3645                 if not isinstance(content, dict):
3646                     continue
3647                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3648                 if not is_renderer:
3649                     renderer = content.get('richItemRenderer')
3650                     if renderer:
3651                         for entry in self._rich_entries(renderer):
3652                             yield entry
3653                         continuation_list[0] = self._extract_continuation(parent_renderer)
3654                     continue
3655                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3656                 for isr_content in isr_contents:
3657                     if not isinstance(isr_content, dict):
3658                         continue
3659
3660                     known_renderers = {
3661                         'playlistVideoListRenderer': self._playlist_entries,
3662                         'gridRenderer': self._grid_entries,
3663                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3664                         'backstagePostThreadRenderer': self._post_thread_entries,
3665                         'videoRenderer': lambda x: [self._video_entry(x)],
3666                     }
3667                     for key, renderer in isr_content.items():
3668                         if key not in known_renderers:
3669                             continue
3670                         for entry in known_renderers[key](renderer):
3671                             if entry:
3672                                 yield entry
3673                         continuation_list[0] = self._extract_continuation(renderer)
3674                         break
3675
3676                 if not continuation_list[0]:
3677                     continuation_list[0] = self._extract_continuation(is_renderer)
3678
3679             if not continuation_list[0]:
3680                 continuation_list[0] = self._extract_continuation(parent_renderer)
3681
3682         continuation_list = [None]  # Python 2 doesnot support nonlocal
3683         tab_content = try_get(tab, lambda x: x['content'], dict)
3684         if not tab_content:
3685             return
3686         parent_renderer = (
3687             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3688             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3689         for entry in extract_entries(parent_renderer):
3690             yield entry
3691         continuation = continuation_list[0]
3692         context = self._extract_context(ytcfg)
3693         visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
3694
3695         for page_num in itertools.count(1):
3696             if not continuation:
3697                 break
3698             query = {
3699                 'continuation': continuation['continuation'],
3700                 'clickTracking': {'clickTrackingParams': continuation['itct']}
3701             }
3702             headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3703             response = self._extract_response(
3704                 item_id='%s page %s' % (item_id, page_num),
3705                 query=query, headers=headers, ytcfg=ytcfg,
3706                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3707
3708             if not response:
3709                 break
3710             visitor_data = try_get(
3711                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3712
3713             known_continuation_renderers = {
3714                 'playlistVideoListContinuation': self._playlist_entries,
3715                 'gridContinuation': self._grid_entries,
3716                 'itemSectionContinuation': self._post_thread_continuation_entries,
3717                 'sectionListContinuation': extract_entries,  # for feeds
3718             }
3719             continuation_contents = try_get(
3720                 response, lambda x: x['continuationContents'], dict) or {}
3721             continuation_renderer = None
3722             for key, value in continuation_contents.items():
3723                 if key not in known_continuation_renderers:
3724                     continue
3725                 continuation_renderer = value
3726                 continuation_list = [None]
3727                 for entry in known_continuation_renderers[key](continuation_renderer):
3728                     yield entry
3729                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3730                 break
3731             if continuation_renderer:
3732                 continue
3733
3734             known_renderers = {
3735                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3736                 'gridVideoRenderer': (self._grid_entries, 'items'),
3737                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3738                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3739                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3740                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3741             }
3742             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3743             continuation_items = try_get(
3744                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3745             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3746             video_items_renderer = None
3747             for key, value in continuation_item.items():
3748                 if key not in known_renderers:
3749                     continue
3750                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3751                 continuation_list = [None]
3752                 for entry in known_renderers[key][0](video_items_renderer):
3753                     yield entry
3754                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3755                 break
3756             if video_items_renderer:
3757                 continue
3758             break
3759
3760     @staticmethod
3761     def _extract_selected_tab(tabs):
3762         for tab in tabs:
3763             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3764             if renderer.get('selected') is True:
3765                 return renderer
3766         else:
3767             raise ExtractorError('Unable to find selected tab')
3768
3769     @staticmethod
3770     def _extract_uploader(data):
3771         uploader = {}
3772         sidebar_renderer = try_get(
3773             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3774         if sidebar_renderer:
3775             for item in sidebar_renderer:
3776                 if not isinstance(item, dict):
3777                     continue
3778                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3779                 if not isinstance(renderer, dict):
3780                     continue
3781                 owner = try_get(
3782                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3783                 if owner:
3784                     uploader['uploader'] = owner.get('text')
3785                     uploader['uploader_id'] = try_get(
3786                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3787                     uploader['uploader_url'] = urljoin(
3788                         'https://www.youtube.com/',
3789                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3790         return {k: v for k, v in uploader.items() if v is not None}
3791
3792     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3793         playlist_id = title = description = channel_url = channel_name = channel_id = None
3794         thumbnails_list = tags = []
3795
3796         selected_tab = self._extract_selected_tab(tabs)
3797         renderer = try_get(
3798             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3799         if renderer:
3800             channel_name = renderer.get('title')
3801             channel_url = renderer.get('channelUrl')
3802             channel_id = renderer.get('externalId')
3803         else:
3804             renderer = try_get(
3805                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3806
3807         if renderer:
3808             title = renderer.get('title')
3809             description = renderer.get('description', '')
3810             playlist_id = channel_id
3811             tags = renderer.get('keywords', '').split()
3812             thumbnails_list = (
3813                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3814                 or try_get(
3815                     data,
3816                     lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3817                     list)
3818                 or [])
3819
3820         thumbnails = []
3821         for t in thumbnails_list:
3822             if not isinstance(t, dict):
3823                 continue
3824             thumbnail_url = url_or_none(t.get('url'))
3825             if not thumbnail_url:
3826                 continue
3827             thumbnails.append({
3828                 'url': thumbnail_url,
3829                 'width': int_or_none(t.get('width')),
3830                 'height': int_or_none(t.get('height')),
3831             })
3832         if playlist_id is None:
3833             playlist_id = item_id
3834         if title is None:
3835             title = (
3836                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3837                 or playlist_id)
3838         title += format_field(selected_tab, 'title', ' - %s')
3839         title += format_field(selected_tab, 'expandedText', ' - %s')
3840
3841         metadata = {
3842             'playlist_id': playlist_id,
3843             'playlist_title': title,
3844             'playlist_description': description,
3845             'uploader': channel_name,
3846             'uploader_id': channel_id,
3847             'uploader_url': channel_url,
3848             'thumbnails': thumbnails,
3849             'tags': tags,
3850         }
3851         if not channel_id:
3852             metadata.update(self._extract_uploader(data))
3853         metadata.update({
3854             'channel': metadata['uploader'],
3855             'channel_id': metadata['uploader_id'],
3856             'channel_url': metadata['uploader_url']})
3857         return self.playlist_result(
3858             self._entries(
3859                 selected_tab, playlist_id,
3860                 self._extract_identity_token(webpage, item_id),
3861                 self._extract_account_syncid(data),
3862                 self._extract_ytcfg(item_id, webpage)),
3863             **metadata)
3864
3865     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3866         first_id = last_id = None
3867         ytcfg = self._extract_ytcfg(playlist_id, webpage)
3868         headers = self._generate_api_headers(
3869             ytcfg, account_syncid=self._extract_account_syncid(data),
3870             identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3871             visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3872         for page_num in itertools.count(1):
3873             videos = list(self._playlist_entries(playlist))
3874             if not videos:
3875                 return
3876             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3877             if start >= len(videos):
3878                 return
3879             for video in videos[start:]:
3880                 if video['id'] == first_id:
3881                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3882                     return
3883                 yield video
3884             first_id = first_id or videos[0]['id']
3885             last_id = videos[-1]['id']
3886             watch_endpoint = try_get(
3887                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3888             query = {
3889                 'playlistId': playlist_id,
3890                 'videoId': watch_endpoint.get('videoId') or last_id,
3891                 'index': watch_endpoint.get('index') or len(videos),
3892                 'params': watch_endpoint.get('params') or 'OAE%3D'
3893             }
3894             response = self._extract_response(
3895                 item_id='%s page %d' % (playlist_id, page_num),
3896                 query=query,
3897                 ep='next',
3898                 headers=headers,
3899                 check_get_keys='contents'
3900             )
3901             playlist = try_get(
3902                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3903
3904     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3905         title = playlist.get('title') or try_get(
3906             data, lambda x: x['titleText']['simpleText'], compat_str)
3907         playlist_id = playlist.get('playlistId') or item_id
3908
3909         # Delegating everything except mix playlists to regular tab-based playlist URL
3910         playlist_url = urljoin(url, try_get(
3911             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3912             compat_str))
3913         if playlist_url and playlist_url != url:
3914             return self.url_result(
3915                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3916                 video_title=title)
3917
3918         return self.playlist_result(
3919             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
3920             playlist_id=playlist_id, playlist_title=title)
3921
3922     def _reload_with_unavailable_videos(self, item_id, data, webpage):
3923         """
3924         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3925         """
3926         sidebar_renderer = try_get(
3927             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3928         if not sidebar_renderer:
3929             return
3930         browse_id = params = None
3931         for item in sidebar_renderer:
3932             if not isinstance(item, dict):
3933                 continue
3934             renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3935             menu_renderer = try_get(
3936                 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3937             for menu_item in menu_renderer:
3938                 if not isinstance(menu_item, dict):
3939                     continue
3940                 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3941                 text = try_get(
3942                     nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3943                 if not text or text.lower() != 'show unavailable videos':
3944                     continue
3945                 browse_endpoint = try_get(
3946                     nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3947                 browse_id = browse_endpoint.get('browseId')
3948                 params = browse_endpoint.get('params')
3949                 break
3950
3951             ytcfg = self._extract_ytcfg(item_id, webpage)
3952             headers = self._generate_api_headers(
3953                 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3954                 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3955                 visitor_data=try_get(
3956                     self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3957             query = {
3958                 'params': params or 'wgYCCAA=',
3959                 'browseId': browse_id or 'VL%s' % item_id
3960             }
3961             return self._extract_response(
3962                 item_id=item_id, headers=headers, query=query,
3963                 check_get_keys='contents', fatal=False,
3964                 note='Downloading API JSON with unavailable videos')
3965
3966     def _extract_webpage(self, url, item_id):
3967         retries = self.get_param('extractor_retries', 3)
3968         count = -1
3969         last_error = 'Incomplete yt initial data recieved'
3970         while count < retries:
3971             count += 1
3972             # Sometimes youtube returns a webpage with incomplete ytInitialData
3973             # See: https://github.com/yt-dlp/yt-dlp/issues/116
3974             if count:
3975                 self.report_warning('%s. Retrying ...' % last_error)
3976             webpage = self._download_webpage(
3977                 url, item_id,
3978                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
3979             data = self._extract_yt_initial_data(item_id, webpage)
3980             if data.get('contents') or data.get('currentVideoEndpoint'):
3981                 break
3982             # Extract alerts here only when there is error
3983             self._extract_and_report_alerts(data)
3984             if count >= retries:
3985                 raise ExtractorError(last_error)
3986         return webpage, data
3987
3988     @staticmethod
3989     def _smuggle_data(entries, data):
3990         for entry in entries:
3991             if data:
3992                 entry['url'] = smuggle_url(entry['url'], data)
3993             yield entry
3994
3995     def _real_extract(self, url):
3996         url, smuggled_data = unsmuggle_url(url, {})
3997         if self.is_music_url(url):
3998             smuggled_data['is_music_url'] = True
3999         info_dict = self.__real_extract(url, smuggled_data)
4000         if info_dict.get('entries'):
4001             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4002         return info_dict
4003
4004     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4005
4006     def __real_extract(self, url, smuggled_data):
4007         item_id = self._match_id(url)
4008         url = compat_urlparse.urlunparse(
4009             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4010         compat_opts = self.get_param('compat_opts', [])
4011
4012         def get_mobj(url):
4013             mobj = self._url_re.match(url).groupdict()
4014             mobj.update((k, '') for k, v in mobj.items() if v is None)
4015             return mobj
4016
4017         mobj = get_mobj(url)
4018         # Youtube returns incomplete data if tabname is not lower case
4019         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4020
4021         if is_channel:
4022             if smuggled_data.get('is_music_url'):
4023                 if item_id[:2] == 'VL':
4024                     # Youtube music VL channels have an equivalent playlist
4025                     item_id = item_id[2:]
4026                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4027                 elif item_id[:2] == 'MP':
4028                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4029                     item_id = self._search_regex(
4030                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4031                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4032                         'playlist id')
4033                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4034                 elif mobj['channel_type'] == 'browse':
4035                     # Youtube music /browse/ should be changed to /channel/
4036                     pre = 'https://www.youtube.com/channel/%s' % item_id
4037         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4038             # Home URLs should redirect to /videos/
4039             self.report_warning(
4040                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4041                 'To download only the videos in the home page, add a "/featured" to the URL')
4042             tab = '/videos'
4043
4044         url = ''.join((pre, tab, post))
4045         mobj = get_mobj(url)
4046
4047         # Handle both video/playlist URLs
4048         qs = parse_qs(url)
4049         video_id = qs.get('v', [None])[0]
4050         playlist_id = qs.get('list', [None])[0]
4051
4052         if not video_id and mobj['not_channel'].startswith('watch'):
4053             if not playlist_id:
4054                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4055                 raise ExtractorError('Unable to recognize tab page')
4056             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4057             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4058             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4059             mobj = get_mobj(url)
4060
4061         if video_id and playlist_id:
4062             if self.get_param('noplaylist'):
4063                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4064                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4065             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4066
4067         webpage, data = self._extract_webpage(url, item_id)
4068
4069         tabs = try_get(
4070             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4071         if tabs:
4072             selected_tab = self._extract_selected_tab(tabs)
4073             tab_name = selected_tab.get('title', '')
4074             if 'no-youtube-channel-redirect' not in compat_opts:
4075                 if mobj['tab'] == '/live':
4076                     # Live tab should have redirected to the video
4077                     raise ExtractorError('The channel is not currently live', expected=True)
4078                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4079                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4080                         # Topic channels don't have /videos. Use the equivalent playlist instead
4081                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4082                         pl_id = 'UU%s' % item_id[2:]
4083                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4084                         try:
4085                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4086                             for alert_type, alert_message in self._extract_alerts(pl_data):
4087                                 if alert_type == 'error':
4088                                     raise ExtractorError('Youtube said: %s' % alert_message)
4089                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4090                         except ExtractorError:
4091                             self.report_warning('The playlist gave error. Falling back to channel URL')
4092                     else:
4093                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4094
4095         self.write_debug('Final URL: %s' % url)
4096
4097         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4098         if 'no-youtube-unavailable-videos' not in compat_opts:
4099             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4100         self._extract_and_report_alerts(data)
4101
4102         tabs = try_get(
4103             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4104         if tabs:
4105             return self._extract_from_tabs(item_id, webpage, data, tabs)
4106
4107         playlist = try_get(
4108             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4109         if playlist:
4110             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4111
4112         video_id = try_get(
4113             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4114             compat_str) or video_id
4115         if video_id:
4116             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4117                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4118             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4119
4120         raise ExtractorError('Unable to recognize tab page')
4121
4122
4123 class YoutubePlaylistIE(InfoExtractor):
4124     IE_DESC = 'YouTube.com playlists'
4125     _VALID_URL = r'''(?x)(?:
4126                         (?:https?://)?
4127                         (?:\w+\.)?
4128                         (?:
4129                             (?:
4130                                 youtube(?:kids)?\.com|
4131                                 invidio\.us
4132                             )
4133                             /.*?\?.*?\blist=
4134                         )?
4135                         (?P<id>%(playlist_id)s)
4136                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4137     IE_NAME = 'youtube:playlist'
4138     _TESTS = [{
4139         'note': 'issue #673',
4140         'url': 'PLBB231211A4F62143',
4141         'info_dict': {
4142             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4143             'id': 'PLBB231211A4F62143',
4144             'uploader': 'Wickydoo',
4145             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4146         },
4147         'playlist_mincount': 29,
4148     }, {
4149         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4150         'info_dict': {
4151             'title': 'YDL_safe_search',
4152             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4153         },
4154         'playlist_count': 2,
4155         'skip': 'This playlist is private',
4156     }, {
4157         'note': 'embedded',
4158         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4159         'playlist_count': 4,
4160         'info_dict': {
4161             'title': 'JODA15',
4162             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4163             'uploader': 'milan',
4164             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4165         }
4166     }, {
4167         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4168         'playlist_mincount': 982,
4169         'info_dict': {
4170             'title': '2018 Chinese New Singles (11/6 updated)',
4171             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4172             'uploader': 'LBK',
4173             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4174         }
4175     }, {
4176         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4177         'only_matching': True,
4178     }, {
4179         # music album playlist
4180         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4181         'only_matching': True,
4182     }]
4183
4184     @classmethod
4185     def suitable(cls, url):
4186         if YoutubeTabIE.suitable(url):
4187             return False
4188         # Hack for lazy extractors until more generic solution is implemented
4189         # (see #28780)
4190         from .youtube import parse_qs
4191         qs = parse_qs(url)
4192         if qs.get('v', [None])[0]:
4193             return False
4194         return super(YoutubePlaylistIE, cls).suitable(url)
4195
4196     def _real_extract(self, url):
4197         playlist_id = self._match_id(url)
4198         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4199         url = update_url_query(
4200             'https://www.youtube.com/playlist',
4201             parse_qs(url) or {'list': playlist_id})
4202         if is_music_url:
4203             url = smuggle_url(url, {'is_music_url': True})
4204         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4205
4206
4207 class YoutubeYtBeIE(InfoExtractor):
4208     IE_DESC = 'youtu.be'
4209     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4210     _TESTS = [{
4211         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4212         'info_dict': {
4213             'id': 'yeWKywCrFtk',
4214             'ext': 'mp4',
4215             'title': 'Small Scale Baler and Braiding Rugs',
4216             'uploader': 'Backus-Page House Museum',
4217             'uploader_id': 'backuspagemuseum',
4218             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4219             'upload_date': '20161008',
4220             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4221             'categories': ['Nonprofits & Activism'],
4222             'tags': list,
4223             'like_count': int,
4224             'dislike_count': int,
4225         },
4226         'params': {
4227             'noplaylist': True,
4228             'skip_download': True,
4229         },
4230     }, {
4231         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4232         'only_matching': True,
4233     }]
4234
4235     def _real_extract(self, url):
4236         mobj = re.match(self._VALID_URL, url)
4237         video_id = mobj.group('id')
4238         playlist_id = mobj.group('playlist_id')
4239         return self.url_result(
4240             update_url_query('https://www.youtube.com/watch', {
4241                 'v': video_id,
4242                 'list': playlist_id,
4243                 'feature': 'youtu.be',
4244             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4245
4246
4247 class YoutubeYtUserIE(InfoExtractor):
4248     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4249     _VALID_URL = r'ytuser:(?P<id>.+)'
4250     _TESTS = [{
4251         'url': 'ytuser:phihag',
4252         'only_matching': True,
4253     }]
4254
4255     def _real_extract(self, url):
4256         user_id = self._match_id(url)
4257         return self.url_result(
4258             'https://www.youtube.com/user/%s' % user_id,
4259             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4260
4261
4262 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4263     IE_NAME = 'youtube:favorites'
4264     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4265     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4266     _LOGIN_REQUIRED = True
4267     _TESTS = [{
4268         'url': ':ytfav',
4269         'only_matching': True,
4270     }, {
4271         'url': ':ytfavorites',
4272         'only_matching': True,
4273     }]
4274
4275     def _real_extract(self, url):
4276         return self.url_result(
4277             'https://www.youtube.com/playlist?list=LL',
4278             ie=YoutubeTabIE.ie_key())
4279
4280
4281 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4282     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4283     # there doesn't appear to be a real limit, for example if you search for
4284     # 'python' you get more than 8.000.000 results
4285     _MAX_RESULTS = float('inf')
4286     IE_NAME = 'youtube:search'
4287     _SEARCH_KEY = 'ytsearch'
4288     _SEARCH_PARAMS = None
4289     _TESTS = []
4290
4291     def _entries(self, query, n):
4292         data = {'query': query}
4293         if self._SEARCH_PARAMS:
4294             data['params'] = self._SEARCH_PARAMS
4295         total = 0
4296         for page_num in itertools.count(1):
4297             search = self._extract_response(
4298                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4299                 check_get_keys=('contents', 'onResponseReceivedCommands')
4300             )
4301             if not search:
4302                 break
4303             slr_contents = try_get(
4304                 search,
4305                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4306                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4307                 list)
4308             if not slr_contents:
4309                 break
4310
4311             # Youtube sometimes adds promoted content to searches,
4312             # changing the index location of videos and token.
4313             # So we search through all entries till we find them.
4314             continuation_token = None
4315             for slr_content in slr_contents:
4316                 if continuation_token is None:
4317                     continuation_token = try_get(
4318                         slr_content,
4319                         lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4320                         compat_str)
4321
4322                 isr_contents = try_get(
4323                     slr_content,
4324                     lambda x: x['itemSectionRenderer']['contents'],
4325                     list)
4326                 if not isr_contents:
4327                     continue
4328                 for content in isr_contents:
4329                     if not isinstance(content, dict):
4330                         continue
4331                     video = content.get('videoRenderer')
4332                     if not isinstance(video, dict):
4333                         continue
4334                     video_id = video.get('videoId')
4335                     if not video_id:
4336                         continue
4337
4338                     yield self._extract_video(video)
4339                     total += 1
4340                     if total == n:
4341                         return
4342
4343             if not continuation_token:
4344                 break
4345             data['continuation'] = continuation_token
4346
4347     def _get_n_results(self, query, n):
4348         """Get a specified number of results for a query"""
4349         return self.playlist_result(self._entries(query, n), query)
4350
4351
4352 class YoutubeSearchDateIE(YoutubeSearchIE):
4353     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4354     _SEARCH_KEY = 'ytsearchdate'
4355     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4356     _SEARCH_PARAMS = 'CAI%3D'
4357
4358
4359 class YoutubeSearchURLIE(YoutubeSearchIE):
4360     IE_DESC = 'YouTube.com search URLs'
4361     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4362     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4363     # _MAX_RESULTS = 100
4364     _TESTS = [{
4365         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4366         'playlist_mincount': 5,
4367         'info_dict': {
4368             'title': 'youtube-dl test video',
4369         }
4370     }, {
4371         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4372         'only_matching': True,
4373     }]
4374
4375     @classmethod
4376     def _make_valid_url(cls):
4377         return cls._VALID_URL
4378
4379     def _real_extract(self, url):
4380         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4381         query = (qs.get('search_query') or qs.get('q'))[0]
4382         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4383         return self._get_n_results(query, self._MAX_RESULTS)
4384
4385
4386 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4387     """
4388     Base class for feed extractors
4389     Subclasses must define the _FEED_NAME property.
4390     """
4391     _LOGIN_REQUIRED = True
4392     _TESTS = []
4393
4394     @property
4395     def IE_NAME(self):
4396         return 'youtube:%s' % self._FEED_NAME
4397
4398     def _real_extract(self, url):
4399         return self.url_result(
4400             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4401             ie=YoutubeTabIE.ie_key())
4402
4403
4404 class YoutubeWatchLaterIE(InfoExtractor):
4405     IE_NAME = 'youtube:watchlater'
4406     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4407     _VALID_URL = r':ytwatchlater'
4408     _TESTS = [{
4409         'url': ':ytwatchlater',
4410         'only_matching': True,
4411     }]
4412
4413     def _real_extract(self, url):
4414         return self.url_result(
4415             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4416
4417
4418 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4419     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4420     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4421     _FEED_NAME = 'recommended'
4422     _LOGIN_REQUIRED = False
4423     _TESTS = [{
4424         'url': ':ytrec',
4425         'only_matching': True,
4426     }, {
4427         'url': ':ytrecommended',
4428         'only_matching': True,
4429     }, {
4430         'url': 'https://youtube.com',
4431         'only_matching': True,
4432     }]
4433
4434
4435 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4436     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4437     _VALID_URL = r':ytsub(?:scription)?s?'
4438     _FEED_NAME = 'subscriptions'
4439     _TESTS = [{
4440         'url': ':ytsubs',
4441         'only_matching': True,
4442     }, {
4443         'url': ':ytsubscriptions',
4444         'only_matching': True,
4445     }]
4446
4447
4448 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4449     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4450     _VALID_URL = r':ythis(?:tory)?'
4451     _FEED_NAME = 'history'
4452     _TESTS = [{
4453         'url': ':ythistory',
4454         'only_matching': True,
4455     }]
4456
4457
4458 class YoutubeTruncatedURLIE(InfoExtractor):
4459     IE_NAME = 'youtube:truncated_url'
4460     IE_DESC = False  # Do not list
4461     _VALID_URL = r'''(?x)
4462         (?:https?://)?
4463         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4464         (?:watch\?(?:
4465             feature=[a-z_]+|
4466             annotation_id=annotation_[^&]+|
4467             x-yt-cl=[0-9]+|
4468             hl=[^&]*|
4469             t=[0-9]+
4470         )?
4471         |
4472             attribution_link\?a=[^&]+
4473         )
4474         $
4475     '''
4476
4477     _TESTS = [{
4478         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4479         'only_matching': True,
4480     }, {
4481         'url': 'https://www.youtube.com/watch?',
4482         'only_matching': True,
4483     }, {
4484         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4485         'only_matching': True,
4486     }, {
4487         'url': 'https://www.youtube.com/watch?feature=foo',
4488         'only_matching': True,
4489     }, {
4490         'url': 'https://www.youtube.com/watch?hl=en-GB',
4491         'only_matching': True,
4492     }, {
4493         'url': 'https://www.youtube.com/watch?t=2372',
4494         'only_matching': True,
4495     }]
4496
4497     def _real_extract(self, url):
4498         raise ExtractorError(
4499             'Did you forget to quote the URL? Remember that & is a meta '
4500             'character in most shells, so you want to put the URL in quotes, '
4501             'like  youtube-dl '
4502             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4503             ' or simply  youtube-dl BaW_jenozKc  .',
4504             expected=True)
4505
4506
4507 class YoutubeTruncatedIDIE(InfoExtractor):
4508     IE_NAME = 'youtube:truncated_id'
4509     IE_DESC = False  # Do not list
4510     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4511
4512     _TESTS = [{
4513         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4514         'only_matching': True,
4515     }]
4516
4517     def _real_extract(self, url):
4518         video_id = self._match_id(url)
4519         raise ExtractorError(
4520             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4521             expected=True)