yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import hashlib
   9 import itertools
  10 import json
  11 import os.path
  12 import random
  13 import re
  14 import time
  15 import traceback
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from ..compat import (
  19     compat_chr,
  20     compat_HTTPError,
  21     compat_parse_qs,
  22     compat_str,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27 )
  28 from ..jsinterp import JSInterpreter
  29 from ..utils import (
  30     bool_or_none,
  31     bytes_to_intlist,
  32     clean_html,
  33     dict_get,
  34     datetime_from_str,
  35     error_to_compat_str,
  36     ExtractorError,
  37     format_field,
  38     float_or_none,
  39     int_or_none,
  40     intlist_to_bytes,
  41     mimetype2ext,
  42     parse_codecs,
  43     parse_count,
  44     parse_duration,
  45     qualities,
  46     remove_start,
  47     smuggle_url,
  48     str_or_none,
  49     str_to_int,
  50     try_get,
  51     unescapeHTML,
  52     unified_strdate,
  53     unsmuggle_url,
  54     update_url_query,
  55     url_or_none,
  56     urlencode_postdata,
  57     urljoin
  58 )
  59
  60
  61 def parse_qs(url):
  62     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  63
  64
  65 class YoutubeBaseInfoExtractor(InfoExtractor):
  66     """Provide base functions for Youtube extractors"""
  67     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  68     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  69
  70     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  71     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  72     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  73
  74     _RESERVED_NAMES = (
  75         r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
  76         r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
  77         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
  78
  79     _NETRC_MACHINE = 'youtube'
  80     # If True it will raise an error if no login info is provided
  81     _LOGIN_REQUIRED = False
  82
  83     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  84
  85     def _login(self):
  86         """
  87         Attempt to log in to YouTube.
  88         True is returned if successful or skipped.
  89         False is returned if login failed.
  90
  91         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  92         """
  93
  94         def warn(message):
  95             self.report_warning(message)
  96
  97         # username+password login is broken
  98         if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
  99             self.raise_login_required(
 100                 'Login details are needed to download this content', method='cookies')
 101         username, password = self._get_login_info()
 102         if username:
 103             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 104         return
 105
 106         # Everything below this is broken!
 107         r'''
 108         # No authentication to be performed
 109         if username is None:
 110             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 111                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 112             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 113             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 114             return True
 115
 116         login_page = self._download_webpage(
 117             self._LOGIN_URL, None,
 118             note='Downloading login page',
 119             errnote='unable to fetch login page', fatal=False)
 120         if login_page is False:
 121             return
 122
 123         login_form = self._hidden_inputs(login_page)
 124
 125         def req(url, f_req, note, errnote):
 126             data = login_form.copy()
 127             data.update({
 128                 'pstMsg': 1,
 129                 'checkConnection': 'youtube',
 130                 'checkedDomains': 'youtube',
 131                 'hl': 'en',
 132                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 133                 'f.req': json.dumps(f_req),
 134                 'flowName': 'GlifWebSignIn',
 135                 'flowEntry': 'ServiceLogin',
 136                 # TODO: reverse actual botguard identifier generation algo
 137                 'bgRequest': '["identifier",""]',
 138             })
 139             return self._download_json(
 140                 url, None, note=note, errnote=errnote,
 141                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 142                 fatal=False,
 143                 data=urlencode_postdata(data), headers={
 144                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 145                     'Google-Accounts-XSRF': 1,
 146                 })
 147
 148         lookup_req = [
 149             username,
 150             None, [], None, 'US', None, None, 2, False, True,
 151             [
 152                 None, None,
 153                 [2, 1, None, 1,
 154                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 155                  None, [], 4],
 156                 1, [None, None, []], None, None, None, True
 157             ],
 158             username,
 159         ]
 160
 161         lookup_results = req(
 162             self._LOOKUP_URL, lookup_req,
 163             'Looking up account info', 'Unable to look up account info')
 164
 165         if lookup_results is False:
 166             return False
 167
 168         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 169         if not user_hash:
 170             warn('Unable to extract user hash')
 171             return False
 172
 173         challenge_req = [
 174             user_hash,
 175             None, 1, None, [1, None, None, None, [password, None, True]],
 176             [
 177                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 178                 1, [None, None, []], None, None, None, True
 179             ]]
 180
 181         challenge_results = req(
 182             self._CHALLENGE_URL, challenge_req,
 183             'Logging in', 'Unable to log in')
 184
 185         if challenge_results is False:
 186             return
 187
 188         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 189         if login_res:
 190             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 191             warn(
 192                 'Unable to login: %s' % 'Invalid password'
 193                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 194             return False
 195
 196         res = try_get(challenge_results, lambda x: x[0][-1], list)
 197         if not res:
 198             warn('Unable to extract result entry')
 199             return False
 200
 201         login_challenge = try_get(res, lambda x: x[0][0], list)
 202         if login_challenge:
 203             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 204             if challenge_str == 'TWO_STEP_VERIFICATION':
 205                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 206                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 207                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 208                 if status == 'QUOTA_EXCEEDED':
 209                     warn('Exceeded the limit of TFA codes, try later')
 210                     return False
 211
 212                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 213                 if not tl:
 214                     warn('Unable to extract TL')
 215                     return False
 216
 217                 tfa_code = self._get_tfa_info('2-step verification code')
 218
 219                 if not tfa_code:
 220                     warn(
 221                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 222                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 223                     return False
 224
 225                 tfa_code = remove_start(tfa_code, 'G-')
 226
 227                 tfa_req = [
 228                     user_hash, None, 2, None,
 229                     [
 230                         9, None, None, None, None, None, None, None,
 231                         [None, tfa_code, True, 2]
 232                     ]]
 233
 234                 tfa_results = req(
 235                     self._TFA_URL.format(tl), tfa_req,
 236                     'Submitting TFA code', 'Unable to submit TFA code')
 237
 238                 if tfa_results is False:
 239                     return False
 240
 241                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 242                 if tfa_res:
 243                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 244                     warn(
 245                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 246                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 247                     return False
 248
 249                 check_cookie_url = try_get(
 250                     tfa_results, lambda x: x[0][-1][2], compat_str)
 251             else:
 252                 CHALLENGES = {
 253                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 254                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 255                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 256                 }
 257                 challenge = CHALLENGES.get(
 258                     challenge_str,
 259                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 260                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 261                 return False
 262         else:
 263             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 264
 265         if not check_cookie_url:
 266             warn('Unable to extract CheckCookie URL')
 267             return False
 268
 269         check_cookie_results = self._download_webpage(
 270             check_cookie_url, None, 'Checking cookie', fatal=False)
 271
 272         if check_cookie_results is False:
 273             return False
 274
 275         if 'https://myaccount.google.com/' not in check_cookie_results:
 276             warn('Unable to log in')
 277             return False
 278
 279         return True
 280         '''
 281
 282     def _initialize_consent(self):
 283         cookies = self._get_cookies('https://www.youtube.com/')
 284         if cookies.get('__Secure-3PSID'):
 285             return
 286         consent_id = None
 287         consent = cookies.get('CONSENT')
 288         if consent:
 289             if 'YES' in consent.value:
 290                 return
 291             consent_id = self._search_regex(
 292                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 293         if not consent_id:
 294             consent_id = random.randint(100, 999)
 295         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 296
 297     def _real_initialize(self):
 298         self._initialize_consent()
 299         if self._downloader is None:
 300             return
 301         if not self._login():
 302             return
 303
 304     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 305     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 306     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 307
 308     _YT_DEFAULT_YTCFGS = {
 309         'WEB': {
 310             'INNERTUBE_API_VERSION': 'v1',
 311             'INNERTUBE_CLIENT_NAME': 'WEB',
 312             'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
 313             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 314             'INNERTUBE_CONTEXT': {
 315                 'client': {
 316                     'clientName': 'WEB',
 317                     'clientVersion': '2.20210622.10.00',
 318                     'hl': 'en',
 319                 }
 320             },
 321             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
 322         },
 323         'WEB_REMIX': {
 324             'INNERTUBE_API_VERSION': 'v1',
 325             'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
 326             'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
 327             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 328             'INNERTUBE_CONTEXT': {
 329                 'client': {
 330                     'clientName': 'WEB_REMIX',
 331                     'clientVersion': '1.20210621.00.00',
 332                     'hl': 'en',
 333                 }
 334             },
 335             'INNERTUBE_CONTEXT_CLIENT_NAME': 67
 336         },
 337         'WEB_EMBEDDED_PLAYER': {
 338             'INNERTUBE_API_VERSION': 'v1',
 339             'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
 340             'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
 341             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 342             'INNERTUBE_CONTEXT': {
 343                 'client': {
 344                     'clientName': 'WEB_EMBEDDED_PLAYER',
 345                     'clientVersion': '1.20210620.0.1',
 346                     'hl': 'en',
 347                 }
 348             },
 349             'INNERTUBE_CONTEXT_CLIENT_NAME': 56
 350         },
 351         'ANDROID': {
 352             'INNERTUBE_API_VERSION': 'v1',
 353             'INNERTUBE_CLIENT_NAME': 'ANDROID',
 354             'INNERTUBE_CLIENT_VERSION': '16.20',
 355             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 356             'INNERTUBE_CONTEXT': {
 357                 'client': {
 358                     'clientName': 'ANDROID',
 359                     'clientVersion': '16.20',
 360                     'hl': 'en',
 361                 }
 362             },
 363             'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
 364         },
 365         'ANDROID_EMBEDDED_PLAYER': {
 366             'INNERTUBE_API_VERSION': 'v1',
 367             'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
 368             'INNERTUBE_CLIENT_VERSION': '16.20',
 369             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 370             'INNERTUBE_CONTEXT': {
 371                 'client': {
 372                     'clientName': 'ANDROID_EMBEDDED_PLAYER',
 373                     'clientVersion': '16.20',
 374                     'hl': 'en',
 375                 }
 376             },
 377             'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
 378         },
 379         'ANDROID_MUSIC': {
 380             'INNERTUBE_API_VERSION': 'v1',
 381             'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
 382             'INNERTUBE_CLIENT_VERSION': '4.32',
 383             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 384             'INNERTUBE_CONTEXT': {
 385                 'client': {
 386                     'clientName': 'ANDROID_MUSIC',
 387                     'clientVersion': '4.32',
 388                     'hl': 'en',
 389                 }
 390             },
 391             'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
 392         }
 393     }
 394
 395     _YT_DEFAULT_INNERTUBE_HOSTS = {
 396         'DIRECT': 'youtubei.googleapis.com',
 397         'WEB': 'www.youtube.com',
 398         'WEB_REMIX': 'music.youtube.com',
 399         'ANDROID_MUSIC': 'music.youtube.com'
 400     }
 401
 402     def _get_default_ytcfg(self, client='WEB'):
 403         if client in self._YT_DEFAULT_YTCFGS:
 404             return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
 405         self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
 406         return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
 407
 408     def _get_innertube_host(self, client='WEB'):
 409         return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
 410
 411     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
 412         # try_get but with fallback to default ytcfg client values when present
 413         _func = lambda y: try_get(y, getter, expected_type)
 414         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 415
 416     def _extract_client_name(self, ytcfg, default_client='WEB'):
 417         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
 418
 419     def _extract_client_version(self, ytcfg, default_client='WEB'):
 420         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
 421
 422     def _extract_api_key(self, ytcfg=None, default_client='WEB'):
 423         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 424
 425     def _extract_context(self, ytcfg=None, default_client='WEB'):
 426         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 427         context = _get_context(ytcfg)
 428         if context:
 429             return context
 430
 431         context = _get_context(self._get_default_ytcfg(default_client))
 432         if not ytcfg:
 433             return context
 434
 435         # Recreate the client context (required)
 436         context['client'].update({
 437             'clientVersion': self._extract_client_version(ytcfg, default_client),
 438             'clientName': self._extract_client_name(ytcfg, default_client),
 439         })
 440         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 441         if visitor_data:
 442             context['client']['visitorData'] = visitor_data
 443         return context
 444
 445     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 446         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 447         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 448         yt_cookies = self._get_cookies('https://www.youtube.com')
 449         sapisid_cookie = dict_get(
 450             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 451         if sapisid_cookie is None:
 452             return
 453         time_now = round(time.time())
 454         # SAPISID cookie is required if not already present
 455         if not yt_cookies.get('SAPISID'):
 456             self._set_cookie(
 457                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 458         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 459         sapisidhash = hashlib.sha1(
 460             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 461         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 462
 463     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 464                   note='Downloading API JSON', errnote='Unable to download API page',
 465                   context=None, api_key=None, api_hostname=None, default_client='WEB'):
 466
 467         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 468         data.update(query)
 469         real_headers = self._generate_api_headers(client=default_client)
 470         real_headers.update({'content-type': 'application/json'})
 471         if headers:
 472             real_headers.update(headers)
 473         return self._download_json(
 474             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 475             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 476             data=json.dumps(data).encode('utf8'), headers=real_headers,
 477             query={'key': api_key or self._extract_api_key()})
 478
 479     def _extract_yt_initial_data(self, video_id, webpage):
 480         return self._parse_json(
 481             self._search_regex(
 482                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 483                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 484             video_id)
 485
 486     def _extract_identity_token(self, webpage, item_id):
 487         ytcfg = self._extract_ytcfg(item_id, webpage)
 488         if ytcfg:
 489             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 490             if token:
 491                 return token
 492         return self._search_regex(
 493             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 494             'identity token', default=None)
 495
 496     @staticmethod
 497     def _extract_account_syncid(data):
 498         """
 499         Extract syncId required to download private playlists of secondary channels
 500         @param data Either response or ytcfg
 501         """
 502         sync_ids = (try_get(
 503             data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 504                    lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 505         if len(sync_ids) >= 2 and sync_ids[1]:
 506             # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 507             # and just "user_syncid||" for primary channel. We only want the channel_syncid
 508             return sync_ids[0]
 509         # ytcfg includes channel_syncid if on secondary channel
 510         return data.get('DELEGATED_SESSION_ID')
 511
 512     def _extract_ytcfg(self, video_id, webpage):
 513         if not webpage:
 514             return {}
 515         return self._parse_json(
 516             self._search_regex(
 517                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 518                 default='{}'), video_id, fatal=False) or {}
 519
 520     def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
 521                               visitor_data=None, api_hostname=None, client='WEB'):
 522         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
 523         headers = {
 524             'X-YouTube-Client-Name': compat_str(
 525                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
 526             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
 527             'Origin': origin
 528         }
 529         if not visitor_data and ytcfg:
 530             visitor_data = try_get(
 531                 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
 532         if identity_token:
 533             headers['X-Youtube-Identity-Token'] = identity_token
 534         if account_syncid:
 535             headers['X-Goog-PageId'] = account_syncid
 536             headers['X-Goog-AuthUser'] = 0
 537         if visitor_data:
 538             headers['X-Goog-Visitor-Id'] = visitor_data
 539         auth = self._generate_sapisidhash_header(origin)
 540         if auth is not None:
 541             headers['Authorization'] = auth
 542             headers['X-Origin'] = origin
 543         return headers
 544
 545     @staticmethod
 546     def _build_api_continuation_query(continuation, ctp=None):
 547         query = {
 548             'continuation': continuation
 549         }
 550         # TODO: Inconsistency with clickTrackingParams.
 551         # Currently we have a fixed ctp contained within context (from ytcfg)
 552         # and a ctp in root query for continuation.
 553         if ctp:
 554             query['clickTracking'] = {'clickTrackingParams': ctp}
 555         return query
 556
 557     @classmethod
 558     def _continuation_query_ajax_to_api(cls, continuation_query):
 559         continuation = dict_get(continuation_query, ('continuation', 'ctoken'))
 560         return cls._build_api_continuation_query(continuation, continuation_query.get('itct'))
 561
 562     @staticmethod
 563     def _build_continuation_query(continuation, ctp=None):
 564         query = {
 565             'ctoken': continuation,
 566             'continuation': continuation,
 567         }
 568         if ctp:
 569             query['itct'] = ctp
 570         return query
 571
 572     @classmethod
 573     def _extract_next_continuation_data(cls, renderer):
 574         next_continuation = try_get(
 575             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 576                        lambda x: x['continuation']['reloadContinuationData']), dict)
 577         if not next_continuation:
 578             return
 579         continuation = next_continuation.get('continuation')
 580         if not continuation:
 581             return
 582         ctp = next_continuation.get('clickTrackingParams')
 583         return cls._build_continuation_query(continuation, ctp)
 584
 585     @classmethod
 586     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 587         if isinstance(continuation_ep, dict):
 588             continuation = try_get(
 589                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 590             if not continuation:
 591                 return
 592             ctp = continuation_ep.get('clickTrackingParams')
 593             return cls._build_continuation_query(continuation, ctp)
 594
 595     @classmethod
 596     def _extract_continuation(cls, renderer):
 597         next_continuation = cls._extract_next_continuation_data(renderer)
 598         if next_continuation:
 599             return next_continuation
 600         contents = []
 601         for key in ('contents', 'items'):
 602             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 603         for content in contents:
 604             if not isinstance(content, dict):
 605                 continue
 606             continuation_ep = try_get(
 607                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 608                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 609                 dict)
 610             continuation = cls._extract_continuation_ep_data(continuation_ep)
 611             if continuation:
 612                 return continuation
 613
 614     @staticmethod
 615     def _extract_alerts(data):
 616         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 617             if not isinstance(alert_dict, dict):
 618                 continue
 619             for alert in alert_dict.values():
 620                 alert_type = alert.get('type')
 621                 if not alert_type:
 622                     continue
 623                 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
 624                 if message:
 625                     yield alert_type, message
 626                 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
 627                     message += try_get(run, lambda x: x['text'], compat_str)
 628                 if message:
 629                     yield alert_type, message
 630
 631     def _report_alerts(self, alerts, expected=True):
 632         errors = []
 633         warnings = []
 634         for alert_type, alert_message in alerts:
 635             if alert_type.lower() == 'error':
 636                 errors.append([alert_type, alert_message])
 637             else:
 638                 warnings.append([alert_type, alert_message])
 639
 640         for alert_type, alert_message in (warnings + errors[:-1]):
 641             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 642         if errors:
 643             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 644
 645     def _extract_and_report_alerts(self, data, *args, **kwargs):
 646         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 647
 648     def _extract_badges(self, renderer: dict):
 649         badges = set()
 650         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 651             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 652             if label:
 653                 badges.add(label.lower())
 654         return badges
 655
 656     @staticmethod
 657     def _join_text_entries(runs):
 658         text = None
 659         for run in runs:
 660             if not isinstance(run, dict):
 661                 continue
 662             sub_text = try_get(run, lambda x: x['text'], compat_str)
 663             if sub_text:
 664                 if not text:
 665                     text = sub_text
 666                     continue
 667                 text += sub_text
 668         return text
 669
 670     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 671                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 672                           default_client='WEB'):
 673         response = None
 674         last_error = None
 675         count = -1
 676         retries = self.get_param('extractor_retries', 3)
 677         if check_get_keys is None:
 678             check_get_keys = []
 679         while count < retries:
 680             count += 1
 681             if last_error:
 682                 self.report_warning('%s. Retrying ...' % last_error)
 683             try:
 684                 response = self._call_api(
 685                     ep=ep, fatal=True, headers=headers,
 686                     video_id=item_id, query=query,
 687                     context=self._extract_context(ytcfg, default_client),
 688                     api_key=self._extract_api_key(ytcfg, default_client),
 689                     api_hostname=api_hostname, default_client=default_client,
 690                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 691             except ExtractorError as e:
 692                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
 693                     # Downloading page may result in intermittent 5xx HTTP error
 694                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 695                     last_error = 'HTTP Error %s' % e.cause.code
 696                     if count < retries:
 697                         continue
 698                 if fatal:
 699                     raise
 700                 else:
 701                     self.report_warning(error_to_compat_str(e))
 702                     return
 703
 704             else:
 705                 # Youtube may send alerts if there was an issue with the continuation page
 706                 try:
 707                     self._extract_and_report_alerts(response, expected=False)
 708                 except ExtractorError as e:
 709                     if fatal:
 710                         raise
 711                     self.report_warning(error_to_compat_str(e))
 712                     return
 713                 if not check_get_keys or dict_get(response, check_get_keys):
 714                     break
 715                 # Youtube sometimes sends incomplete data
 716                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 717                 last_error = 'Incomplete data received'
 718                 if count >= retries:
 719                     if fatal:
 720                         raise ExtractorError(last_error)
 721                     else:
 722                         self.report_warning(last_error)
 723                         return
 724         return response
 725
 726     @staticmethod
 727     def is_music_url(url):
 728         return re.match(r'https?://music\.youtube\.com/', url) is not None
 729
 730     def _extract_video(self, renderer):
 731         video_id = renderer.get('videoId')
 732         title = try_get(
 733             renderer,
 734             (lambda x: x['title']['runs'][0]['text'],
 735              lambda x: x['title']['simpleText']), compat_str)
 736         description = try_get(
 737             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
 738             compat_str)
 739         duration = parse_duration(try_get(
 740             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
 741         view_count_text = try_get(
 742             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
 743         view_count = str_to_int(self._search_regex(
 744             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 745             'view count', default=None))
 746         uploader = try_get(
 747             renderer,
 748             (lambda x: x['ownerText']['runs'][0]['text'],
 749              lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
 750         return {
 751             '_type': 'url',
 752             'ie_key': YoutubeIE.ie_key(),
 753             'id': video_id,
 754             'url': video_id,
 755             'title': title,
 756             'description': description,
 757             'duration': duration,
 758             'view_count': view_count,
 759             'uploader': uploader,
 760         }
 761
 762
 763 class YoutubeIE(YoutubeBaseInfoExtractor):
 764     IE_DESC = 'YouTube.com'
 765     _INVIDIOUS_SITES = (
 766         # invidious-redirect websites
 767         r'(?:www\.)?redirect\.invidious\.io',
 768         r'(?:(?:www|dev)\.)?invidio\.us',
 769         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 770         r'(?:www\.)?invidious\.pussthecat\.org',
 771         r'(?:www\.)?invidious\.zee\.li',
 772         r'(?:www\.)?invidious\.ethibox\.fr',
 773         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 774         # youtube-dl invidious instances list
 775         r'(?:(?:www|no)\.)?invidiou\.sh',
 776         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 777         r'(?:www\.)?invidious\.kabi\.tk',
 778         r'(?:www\.)?invidious\.mastodon\.host',
 779         r'(?:www\.)?invidious\.zapashcanon\.fr',
 780         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 781         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 782         r'(?:www\.)?invidious\.himiko\.cloud',
 783         r'(?:www\.)?invidious\.reallyancient\.tech',
 784         r'(?:www\.)?invidious\.tube',
 785         r'(?:www\.)?invidiou\.site',
 786         r'(?:www\.)?invidious\.site',
 787         r'(?:www\.)?invidious\.xyz',
 788         r'(?:www\.)?invidious\.nixnet\.xyz',
 789         r'(?:www\.)?invidious\.048596\.xyz',
 790         r'(?:www\.)?invidious\.drycat\.fr',
 791         r'(?:www\.)?inv\.skyn3t\.in',
 792         r'(?:www\.)?tube\.poal\.co',
 793         r'(?:www\.)?tube\.connect\.cafe',
 794         r'(?:www\.)?vid\.wxzm\.sx',
 795         r'(?:www\.)?vid\.mint\.lgbt',
 796         r'(?:www\.)?vid\.puffyan\.us',
 797         r'(?:www\.)?yewtu\.be',
 798         r'(?:www\.)?yt\.elukerio\.org',
 799         r'(?:www\.)?yt\.lelux\.fi',
 800         r'(?:www\.)?invidious\.ggc-project\.de',
 801         r'(?:www\.)?yt\.maisputain\.ovh',
 802         r'(?:www\.)?ytprivate\.com',
 803         r'(?:www\.)?invidious\.13ad\.de',
 804         r'(?:www\.)?invidious\.toot\.koeln',
 805         r'(?:www\.)?invidious\.fdn\.fr',
 806         r'(?:www\.)?watch\.nettohikari\.com',
 807         r'(?:www\.)?invidious\.namazso\.eu',
 808         r'(?:www\.)?invidious\.silkky\.cloud',
 809         r'(?:www\.)?invidious\.exonip\.de',
 810         r'(?:www\.)?invidious\.riverside\.rocks',
 811         r'(?:www\.)?invidious\.blamefran\.net',
 812         r'(?:www\.)?invidious\.moomoo\.de',
 813         r'(?:www\.)?ytb\.trom\.tf',
 814         r'(?:www\.)?yt\.cyberhost\.uk',
 815         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 816         r'(?:www\.)?qklhadlycap4cnod\.onion',
 817         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 818         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 819         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 820         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 821         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 822         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 823         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 824         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 825         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 826         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 827     )
 828     _VALID_URL = r"""(?x)^
 829                      (
 830                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 831                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 832                             (?:www\.)?deturl\.com/www\.youtube\.com|
 833                             (?:www\.)?pwnyoutube\.com|
 834                             (?:www\.)?hooktube\.com|
 835                             (?:www\.)?yourepeat\.com|
 836                             tube\.majestyc\.net|
 837                             %(invidious)s|
 838                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 839                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 840                          (?:                                                  # the various things that can precede the ID:
 841                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 842                              |(?:                                             # or the v= param in all its forms
 843                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 844                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 845                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 846                                  v=
 847                              )
 848                          ))
 849                          |(?:
 850                             youtu\.be|                                        # just youtu.be/xxxx
 851                             vid\.plus|                                        # or vid.plus/xxxx
 852                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 853                             %(invidious)s
 854                          )/
 855                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 856                          )
 857                      )?                                                       # all until now is optional -> you can pass the naked ID
 858                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 859                      (?(1).+)?                                                # if we found the ID, everything can follow
 860                      (?:\#|$)""" % {
 861         'invidious': '|'.join(_INVIDIOUS_SITES),
 862     }
 863     _PLAYER_INFO_RE = (
 864         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 865         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 866         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 867     )
 868     _formats = {
 869         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 870         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 871         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 872         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 873         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 874         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 875         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 876         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 877         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 878         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 879         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 880         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 881         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 882         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 883         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 884         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 885         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 886         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 887
 888
 889         # 3D videos
 890         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 891         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 892         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 893         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 894         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 895         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 896         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 897
 898         # Apple HTTP Live Streaming
 899         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 900         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 901         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 902         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 903         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 904         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 905         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 906         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 907
 908         # DASH mp4 video
 909         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 910         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 911         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 912         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 913         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 914         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 915         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 916         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 917         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 918         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 919         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 920         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 921
 922         # Dash mp4 audio
 923         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 924         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 925         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 926         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 927         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 928         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 929         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 930
 931         # Dash webm
 932         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 933         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 934         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 935         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 936         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 937         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 938         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 939         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 940         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 941         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 942         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 943         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 944         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 945         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 946         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 947         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 948         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 949         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 950         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 951         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 952         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 953         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 954
 955         # Dash webm audio
 956         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 957         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 958
 959         # Dash webm audio with opus inside
 960         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 961         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 962         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 963
 964         # RTMP (unnamed)
 965         '_rtmp': {'protocol': 'rtmp'},
 966
 967         # av01 video only formats sometimes served with "unknown" codecs
 968         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 969         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 970         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 971         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 972     }
 973     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 974
 975     _AGE_GATE_REASONS = (
 976         'Sign in to confirm your age',
 977         'This video may be inappropriate for some users.',
 978         'Sorry, this content is age-restricted.')
 979
 980     _GEO_BYPASS = False
 981
 982     IE_NAME = 'youtube'
 983     _TESTS = [
 984         {
 985             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 986             'info_dict': {
 987                 'id': 'BaW_jenozKc',
 988                 'ext': 'mp4',
 989                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 990                 'uploader': 'Philipp Hagemeister',
 991                 'uploader_id': 'phihag',
 992                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 993                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 994                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 995                 'upload_date': '20121002',
 996                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 997                 'categories': ['Science & Technology'],
 998                 'tags': ['youtube-dl'],
 999                 'duration': 10,
1000                 'view_count': int,
1001                 'like_count': int,
1002                 'dislike_count': int,
1003                 'start_time': 1,
1004                 'end_time': 9,
1005             }
1006         },
1007         {
1008             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
1009             'note': 'Embed-only video (#1746)',
1010             'info_dict': {
1011                 'id': 'yZIXLfi8CZQ',
1012                 'ext': 'mp4',
1013                 'upload_date': '20120608',
1014                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
1015                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
1016                 'uploader': 'SET India',
1017                 'uploader_id': 'setindia',
1018                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
1019                 'age_limit': 18,
1020             },
1021             'skip': 'Private video',
1022         },
1023         {
1024             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1025             'note': 'Use the first video ID in the URL',
1026             'info_dict': {
1027                 'id': 'BaW_jenozKc',
1028                 'ext': 'mp4',
1029                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1030                 'uploader': 'Philipp Hagemeister',
1031                 'uploader_id': 'phihag',
1032                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1033                 'upload_date': '20121002',
1034                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1035                 'categories': ['Science & Technology'],
1036                 'tags': ['youtube-dl'],
1037                 'duration': 10,
1038                 'view_count': int,
1039                 'like_count': int,
1040                 'dislike_count': int,
1041             },
1042             'params': {
1043                 'skip_download': True,
1044             },
1045         },
1046         {
1047             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1048             'note': '256k DASH audio (format 141) via DASH manifest',
1049             'info_dict': {
1050                 'id': 'a9LDPn-MO4I',
1051                 'ext': 'm4a',
1052                 'upload_date': '20121002',
1053                 'uploader_id': '8KVIDEO',
1054                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1055                 'description': '',
1056                 'uploader': '8KVIDEO',
1057                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1058             },
1059             'params': {
1060                 'youtube_include_dash_manifest': True,
1061                 'format': '141',
1062             },
1063             'skip': 'format 141 not served anymore',
1064         },
1065         # DASH manifest with encrypted signature
1066         {
1067             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1068             'info_dict': {
1069                 'id': 'IB3lcPjvWLA',
1070                 'ext': 'm4a',
1071                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1072                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1073                 'duration': 244,
1074                 'uploader': 'AfrojackVEVO',
1075                 'uploader_id': 'AfrojackVEVO',
1076                 'upload_date': '20131011',
1077                 'abr': 129.495,
1078             },
1079             'params': {
1080                 'youtube_include_dash_manifest': True,
1081                 'format': '141/bestaudio[ext=m4a]',
1082             },
1083         },
1084         # Controversy video
1085         {
1086             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1087             'info_dict': {
1088                 'id': 'T4XJQO3qol8',
1089                 'ext': 'mp4',
1090                 'duration': 219,
1091                 'upload_date': '20100909',
1092                 'uploader': 'Amazing Atheist',
1093                 'uploader_id': 'TheAmazingAtheist',
1094                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
1095                 'title': 'Burning Everyone\'s Koran',
1096                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
1097             }
1098         },
1099         # Normal age-gate video (embed allowed)
1100         {
1101             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1102             'info_dict': {
1103                 'id': 'HtVdAasjOgU',
1104                 'ext': 'mp4',
1105                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1106                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1107                 'duration': 142,
1108                 'uploader': 'The Witcher',
1109                 'uploader_id': 'WitcherGame',
1110                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1111                 'upload_date': '20140605',
1112                 'age_limit': 18,
1113             },
1114         },
1115         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1116         # YouTube Red ad is not captured for creator
1117         {
1118             'url': '__2ABJjxzNo',
1119             'info_dict': {
1120                 'id': '__2ABJjxzNo',
1121                 'ext': 'mp4',
1122                 'duration': 266,
1123                 'upload_date': '20100430',
1124                 'uploader_id': 'deadmau5',
1125                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1126                 'creator': 'deadmau5',
1127                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1128                 'uploader': 'deadmau5',
1129                 'title': 'Deadmau5 - Some Chords (HD)',
1130                 'alt_title': 'Some Chords',
1131             },
1132             'expected_warnings': [
1133                 'DASH manifest missing',
1134             ]
1135         },
1136         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1137         {
1138             'url': 'lqQg6PlCWgI',
1139             'info_dict': {
1140                 'id': 'lqQg6PlCWgI',
1141                 'ext': 'mp4',
1142                 'duration': 6085,
1143                 'upload_date': '20150827',
1144                 'uploader_id': 'olympic',
1145                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1146                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1147                 'uploader': 'Olympic',
1148                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1149             },
1150             'params': {
1151                 'skip_download': 'requires avconv',
1152             }
1153         },
1154         # Non-square pixels
1155         {
1156             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1157             'info_dict': {
1158                 'id': '_b-2C3KPAM0',
1159                 'ext': 'mp4',
1160                 'stretched_ratio': 16 / 9.,
1161                 'duration': 85,
1162                 'upload_date': '20110310',
1163                 'uploader_id': 'AllenMeow',
1164                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1165                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1166                 'uploader': '孫ᄋᄅ',
1167                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1168             },
1169         },
1170         # url_encoded_fmt_stream_map is empty string
1171         {
1172             'url': 'qEJwOuvDf7I',
1173             'info_dict': {
1174                 'id': 'qEJwOuvDf7I',
1175                 'ext': 'webm',
1176                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1177                 'description': '',
1178                 'upload_date': '20150404',
1179                 'uploader_id': 'spbelect',
1180                 'uploader': 'Наблюдатели Петербурга',
1181             },
1182             'params': {
1183                 'skip_download': 'requires avconv',
1184             },
1185             'skip': 'This live event has ended.',
1186         },
1187         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1188         {
1189             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1190             'info_dict': {
1191                 'id': 'FIl7x6_3R5Y',
1192                 'ext': 'webm',
1193                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1194                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1195                 'duration': 220,
1196                 'upload_date': '20150625',
1197                 'uploader_id': 'dorappi2000',
1198                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1199                 'uploader': 'dorappi2000',
1200                 'formats': 'mincount:31',
1201             },
1202             'skip': 'not actual anymore',
1203         },
1204         # DASH manifest with segment_list
1205         {
1206             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1207             'md5': '8ce563a1d667b599d21064e982ab9e31',
1208             'info_dict': {
1209                 'id': 'CsmdDsKjzN8',
1210                 'ext': 'mp4',
1211                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1212                 'uploader': 'Airtek',
1213                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1214                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1215                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1216             },
1217             'params': {
1218                 'youtube_include_dash_manifest': True,
1219                 'format': '135',  # bestvideo
1220             },
1221             'skip': 'This live event has ended.',
1222         },
1223         {
1224             # Multifeed videos (multiple cameras), URL is for Main Camera
1225             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1226             'info_dict': {
1227                 'id': 'jvGDaLqkpTg',
1228                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1229                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1230             },
1231             'playlist': [{
1232                 'info_dict': {
1233                     'id': 'jvGDaLqkpTg',
1234                     'ext': 'mp4',
1235                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1236                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1237                     'duration': 10643,
1238                     'upload_date': '20161111',
1239                     'uploader': 'Team PGP',
1240                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1241                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1242                 },
1243             }, {
1244                 'info_dict': {
1245                     'id': '3AKt1R1aDnw',
1246                     'ext': 'mp4',
1247                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1248                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1249                     'duration': 10991,
1250                     'upload_date': '20161111',
1251                     'uploader': 'Team PGP',
1252                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1253                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1254                 },
1255             }, {
1256                 'info_dict': {
1257                     'id': 'RtAMM00gpVc',
1258                     'ext': 'mp4',
1259                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1260                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1261                     'duration': 10995,
1262                     'upload_date': '20161111',
1263                     'uploader': 'Team PGP',
1264                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1265                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1266                 },
1267             }, {
1268                 'info_dict': {
1269                     'id': '6N2fdlP3C5U',
1270                     'ext': 'mp4',
1271                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1272                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1273                     'duration': 10990,
1274                     'upload_date': '20161111',
1275                     'uploader': 'Team PGP',
1276                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1277                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1278                 },
1279             }],
1280             'params': {
1281                 'skip_download': True,
1282             },
1283         },
1284         {
1285             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1286             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1287             'info_dict': {
1288                 'id': 'gVfLd0zydlo',
1289                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1290             },
1291             'playlist_count': 2,
1292             'skip': 'Not multifeed anymore',
1293         },
1294         {
1295             'url': 'https://vid.plus/FlRa-iH7PGw',
1296             'only_matching': True,
1297         },
1298         {
1299             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1300             'only_matching': True,
1301         },
1302         {
1303             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1304             # Also tests cut-off URL expansion in video description (see
1305             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1306             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1307             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1308             'info_dict': {
1309                 'id': 'lsguqyKfVQg',
1310                 'ext': 'mp4',
1311                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1312                 'alt_title': 'Dark Walk - Position Music',
1313                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1314                 'duration': 133,
1315                 'upload_date': '20151119',
1316                 'uploader_id': 'IronSoulElf',
1317                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1318                 'uploader': 'IronSoulElf',
1319                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
1320                 'track': 'Dark Walk - Position Music',
1321                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
1322                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1323             },
1324             'params': {
1325                 'skip_download': True,
1326             },
1327         },
1328         {
1329             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1330             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1331             'only_matching': True,
1332         },
1333         {
1334             # Video with yt:stretch=17:0
1335             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1336             'info_dict': {
1337                 'id': 'Q39EVAstoRM',
1338                 'ext': 'mp4',
1339                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1340                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1341                 'upload_date': '20151107',
1342                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1343                 'uploader': 'CH GAMER DROID',
1344             },
1345             'params': {
1346                 'skip_download': True,
1347             },
1348             'skip': 'This video does not exist.',
1349         },
1350         {
1351             # Video with incomplete 'yt:stretch=16:'
1352             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1353             'only_matching': True,
1354         },
1355         {
1356             # Video licensed under Creative Commons
1357             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1358             'info_dict': {
1359                 'id': 'M4gD1WSo5mA',
1360                 'ext': 'mp4',
1361                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1362                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1363                 'duration': 721,
1364                 'upload_date': '20150127',
1365                 'uploader_id': 'BerkmanCenter',
1366                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1367                 'uploader': 'The Berkman Klein Center for Internet & Society',
1368                 'license': 'Creative Commons Attribution license (reuse allowed)',
1369             },
1370             'params': {
1371                 'skip_download': True,
1372             },
1373         },
1374         {
1375             # Channel-like uploader_url
1376             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1377             'info_dict': {
1378                 'id': 'eQcmzGIKrzg',
1379                 'ext': 'mp4',
1380                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1381                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1382                 'duration': 4060,
1383                 'upload_date': '20151119',
1384                 'uploader': 'Bernie Sanders',
1385                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1386                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1387                 'license': 'Creative Commons Attribution license (reuse allowed)',
1388             },
1389             'params': {
1390                 'skip_download': True,
1391             },
1392         },
1393         {
1394             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1395             'only_matching': True,
1396         },
1397         {
1398             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1399             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1400             'only_matching': True,
1401         },
1402         {
1403             # Rental video preview
1404             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1405             'info_dict': {
1406                 'id': 'uGpuVWrhIzE',
1407                 'ext': 'mp4',
1408                 'title': 'Piku - Trailer',
1409                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1410                 'upload_date': '20150811',
1411                 'uploader': 'FlixMatrix',
1412                 'uploader_id': 'FlixMatrixKaravan',
1413                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1414                 'license': 'Standard YouTube License',
1415             },
1416             'params': {
1417                 'skip_download': True,
1418             },
1419             'skip': 'This video is not available.',
1420         },
1421         {
1422             # YouTube Red video with episode data
1423             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1424             'info_dict': {
1425                 'id': 'iqKdEhx-dD4',
1426                 'ext': 'mp4',
1427                 'title': 'Isolation - Mind Field (Ep 1)',
1428                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1429                 'duration': 2085,
1430                 'upload_date': '20170118',
1431                 'uploader': 'Vsauce',
1432                 'uploader_id': 'Vsauce',
1433                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1434                 'series': 'Mind Field',
1435                 'season_number': 1,
1436                 'episode_number': 1,
1437             },
1438             'params': {
1439                 'skip_download': True,
1440             },
1441             'expected_warnings': [
1442                 'Skipping DASH manifest',
1443             ],
1444         },
1445         {
1446             # The following content has been identified by the YouTube community
1447             # as inappropriate or offensive to some audiences.
1448             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1449             'info_dict': {
1450                 'id': '6SJNVb0GnPI',
1451                 'ext': 'mp4',
1452                 'title': 'Race Differences in Intelligence',
1453                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1454                 'duration': 965,
1455                 'upload_date': '20140124',
1456                 'uploader': 'New Century Foundation',
1457                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1458                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1459             },
1460             'params': {
1461                 'skip_download': True,
1462             },
1463             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1464         },
1465         {
1466             # itag 212
1467             'url': '1t24XAntNCY',
1468             'only_matching': True,
1469         },
1470         {
1471             # geo restricted to JP
1472             'url': 'sJL6WA-aGkQ',
1473             'only_matching': True,
1474         },
1475         {
1476             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1477             'only_matching': True,
1478         },
1479         {
1480             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1481             'only_matching': True,
1482         },
1483         {
1484             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1485             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1486             'only_matching': True,
1487         },
1488         {
1489             # DRM protected
1490             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1491             'only_matching': True,
1492         },
1493         {
1494             # Video with unsupported adaptive stream type formats
1495             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1496             'info_dict': {
1497                 'id': 'Z4Vy8R84T1U',
1498                 'ext': 'mp4',
1499                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1500                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1501                 'duration': 433,
1502                 'upload_date': '20130923',
1503                 'uploader': 'Amelia Putri Harwita',
1504                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1505                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1506                 'formats': 'maxcount:10',
1507             },
1508             'params': {
1509                 'skip_download': True,
1510                 'youtube_include_dash_manifest': False,
1511             },
1512             'skip': 'not actual anymore',
1513         },
1514         {
1515             # Youtube Music Auto-generated description
1516             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1517             'info_dict': {
1518                 'id': 'MgNrAu2pzNs',
1519                 'ext': 'mp4',
1520                 'title': 'Voyeur Girl',
1521                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1522                 'upload_date': '20190312',
1523                 'uploader': 'Stephen - Topic',
1524                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1525                 'artist': 'Stephen',
1526                 'track': 'Voyeur Girl',
1527                 'album': 'it\'s too much love to know my dear',
1528                 'release_date': '20190313',
1529                 'release_year': 2019,
1530             },
1531             'params': {
1532                 'skip_download': True,
1533             },
1534         },
1535         {
1536             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1537             'only_matching': True,
1538         },
1539         {
1540             # invalid -> valid video id redirection
1541             'url': 'DJztXj2GPfl',
1542             'info_dict': {
1543                 'id': 'DJztXj2GPfk',
1544                 'ext': 'mp4',
1545                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1546                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1547                 'upload_date': '20090125',
1548                 'uploader': 'Prochorowka',
1549                 'uploader_id': 'Prochorowka',
1550                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1551                 'artist': 'Panjabi MC',
1552                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1553                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1554             },
1555             'params': {
1556                 'skip_download': True,
1557             },
1558             'skip': 'Video unavailable',
1559         },
1560         {
1561             # empty description results in an empty string
1562             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1563             'info_dict': {
1564                 'id': 'x41yOUIvK2k',
1565                 'ext': 'mp4',
1566                 'title': 'IMG 3456',
1567                 'description': '',
1568                 'upload_date': '20170613',
1569                 'uploader_id': 'ElevageOrVert',
1570                 'uploader': 'ElevageOrVert',
1571             },
1572             'params': {
1573                 'skip_download': True,
1574             },
1575         },
1576         {
1577             # with '};' inside yt initial data (see [1])
1578             # see [2] for an example with '};' inside ytInitialPlayerResponse
1579             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1580             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1581             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1582             'info_dict': {
1583                 'id': 'CHqg6qOn4no',
1584                 'ext': 'mp4',
1585                 'title': 'Part 77   Sort a list of simple types in c#',
1586                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1587                 'upload_date': '20130831',
1588                 'uploader_id': 'kudvenkat',
1589                 'uploader': 'kudvenkat',
1590             },
1591             'params': {
1592                 'skip_download': True,
1593             },
1594         },
1595         {
1596             # another example of '};' in ytInitialData
1597             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1598             'only_matching': True,
1599         },
1600         {
1601             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1602             'only_matching': True,
1603         },
1604         {
1605             # https://github.com/ytdl-org/youtube-dl/pull/28094
1606             'url': 'OtqTfy26tG0',
1607             'info_dict': {
1608                 'id': 'OtqTfy26tG0',
1609                 'ext': 'mp4',
1610                 'title': 'Burn Out',
1611                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1612                 'upload_date': '20141120',
1613                 'uploader': 'The Cinematic Orchestra - Topic',
1614                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1615                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1616                 'artist': 'The Cinematic Orchestra',
1617                 'track': 'Burn Out',
1618                 'album': 'Every Day',
1619                 'release_data': None,
1620                 'release_year': None,
1621             },
1622             'params': {
1623                 'skip_download': True,
1624             },
1625         },
1626         {
1627             # controversial video, only works with bpctr when authenticated with cookies
1628             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1629             'only_matching': True,
1630         },
1631         {
1632             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1633             'url': 'cBvYw8_A0vQ',
1634             'info_dict': {
1635                 'id': 'cBvYw8_A0vQ',
1636                 'ext': 'mp4',
1637                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1638                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1639                 'upload_date': '20201120',
1640                 'uploader': 'Walk around Japan',
1641                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1642                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1643             },
1644             'params': {
1645                 'skip_download': True,
1646             },
1647         }, {
1648             # Has multiple audio streams
1649             'url': 'WaOKSUlf4TM',
1650             'only_matching': True
1651         }, {
1652             # Requires Premium: has format 141 when requested using YTM url
1653             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1654             'only_matching': True
1655         }, {
1656             # multiple subtitles with same lang_code
1657             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1658             'only_matching': True,
1659         }, {
1660             # Force use android client fallback
1661             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1662             'info_dict': {
1663                 'id': 'YOelRv7fMxY',
1664                 'title': 'Digging a Secret Tunnel from my Workshop',
1665                 'ext': '3gp',
1666                 'upload_date': '20210624',
1667                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1668                 'uploader': 'colinfurze',
1669                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1670                 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1671             },
1672             'params': {
1673                 'format': '17',  # 3gp format available on android
1674                 'extractor_args': {'youtube': {'player_client': ['android']}},
1675             },
1676         },
1677         {
1678             # Skip download of additional client configs (remix client config in this case)
1679             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1680             'only_matching': True,
1681             'params': {
1682                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1683             },
1684         }
1685     ]
1686
1687     @classmethod
1688     def suitable(cls, url):
1689         # Hack for lazy extractors until more generic solution is implemented
1690         # (see #28780)
1691         from .youtube import parse_qs
1692         qs = parse_qs(url)
1693         if qs.get('list', [None])[0]:
1694             return False
1695         return super(YoutubeIE, cls).suitable(url)
1696
1697     def __init__(self, *args, **kwargs):
1698         super(YoutubeIE, self).__init__(*args, **kwargs)
1699         self._code_cache = {}
1700         self._player_cache = {}
1701
1702     def _extract_player_url(self, ytcfg=None, webpage=None):
1703         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1704         if not player_url:
1705             player_url = self._search_regex(
1706                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1707                 webpage, 'player URL', fatal=False)
1708         if player_url.startswith('//'):
1709             player_url = 'https:' + player_url
1710         elif not re.match(r'https?://', player_url):
1711             player_url = compat_urlparse.urljoin(
1712                 'https://www.youtube.com', player_url)
1713         return player_url
1714
1715     def _signature_cache_id(self, example_sig):
1716         """ Return a string representation of a signature """
1717         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1718
1719     @classmethod
1720     def _extract_player_info(cls, player_url):
1721         for player_re in cls._PLAYER_INFO_RE:
1722             id_m = re.search(player_re, player_url)
1723             if id_m:
1724                 break
1725         else:
1726             raise ExtractorError('Cannot identify player %r' % player_url)
1727         return id_m.group('id')
1728
1729     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1730         player_id = self._extract_player_info(player_url)
1731         if player_id not in self._code_cache:
1732             self._code_cache[player_id] = self._download_webpage(
1733                 player_url, video_id, fatal=fatal,
1734                 note='Downloading player ' + player_id,
1735                 errnote='Download of %s failed' % player_url)
1736         return player_id in self._code_cache
1737
1738     def _extract_signature_function(self, video_id, player_url, example_sig):
1739         player_id = self._extract_player_info(player_url)
1740
1741         # Read from filesystem cache
1742         func_id = 'js_%s_%s' % (
1743             player_id, self._signature_cache_id(example_sig))
1744         assert os.path.basename(func_id) == func_id
1745
1746         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1747         if cache_spec is not None:
1748             return lambda s: ''.join(s[i] for i in cache_spec)
1749
1750         if self._load_player(video_id, player_url):
1751             code = self._code_cache[player_id]
1752             res = self._parse_sig_js(code)
1753
1754             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1755             cache_res = res(test_string)
1756             cache_spec = [ord(c) for c in cache_res]
1757
1758             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1759             return res
1760
1761     def _print_sig_code(self, func, example_sig):
1762         def gen_sig_code(idxs):
1763             def _genslice(start, end, step):
1764                 starts = '' if start == 0 else str(start)
1765                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1766                 steps = '' if step == 1 else (':%d' % step)
1767                 return 's[%s%s%s]' % (starts, ends, steps)
1768
1769             step = None
1770             # Quelch pyflakes warnings - start will be set when step is set
1771             start = '(Never used)'
1772             for i, prev in zip(idxs[1:], idxs[:-1]):
1773                 if step is not None:
1774                     if i - prev == step:
1775                         continue
1776                     yield _genslice(start, prev, step)
1777                     step = None
1778                     continue
1779                 if i - prev in [-1, 1]:
1780                     step = i - prev
1781                     start = prev
1782                     continue
1783                 else:
1784                     yield 's[%d]' % prev
1785             if step is None:
1786                 yield 's[%d]' % i
1787             else:
1788                 yield _genslice(start, i, step)
1789
1790         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1791         cache_res = func(test_string)
1792         cache_spec = [ord(c) for c in cache_res]
1793         expr_code = ' + '.join(gen_sig_code(cache_spec))
1794         signature_id_tuple = '(%s)' % (
1795             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1796         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1797                 '    return %s\n') % (signature_id_tuple, expr_code)
1798         self.to_screen('Extracted signature function:\n' + code)
1799
1800     def _parse_sig_js(self, jscode):
1801         funcname = self._search_regex(
1802             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1803              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1804              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1805              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1806              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1807              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1808              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1809              # Obsolete patterns
1810              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1811              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1812              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1813              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1814              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1815              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1816              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1817              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1818             jscode, 'Initial JS player signature function name', group='sig')
1819
1820         jsi = JSInterpreter(jscode)
1821         initial_function = jsi.extract_function(funcname)
1822         return lambda s: initial_function([s])
1823
1824     def _decrypt_signature(self, s, video_id, player_url):
1825         """Turn the encrypted s field into a working signature"""
1826
1827         if player_url is None:
1828             raise ExtractorError('Cannot decrypt signature without player_url')
1829
1830         try:
1831             player_id = (player_url, self._signature_cache_id(s))
1832             if player_id not in self._player_cache:
1833                 func = self._extract_signature_function(
1834                     video_id, player_url, s
1835                 )
1836                 self._player_cache[player_id] = func
1837             func = self._player_cache[player_id]
1838             if self.get_param('youtube_print_sig_code'):
1839                 self._print_sig_code(func, s)
1840             return func(s)
1841         except Exception as e:
1842             tb = traceback.format_exc()
1843             raise ExtractorError(
1844                 'Signature extraction failed: ' + tb, cause=e)
1845
1846     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1847         """
1848         Extract signatureTimestamp (sts)
1849         Required to tell API what sig/player version is in use.
1850         """
1851         sts = None
1852         if isinstance(ytcfg, dict):
1853             sts = int_or_none(ytcfg.get('STS'))
1854
1855         if not sts:
1856             # Attempt to extract from player
1857             if player_url is None:
1858                 error_msg = 'Cannot extract signature timestamp without player_url.'
1859                 if fatal:
1860                     raise ExtractorError(error_msg)
1861                 self.report_warning(error_msg)
1862                 return
1863             if self._load_player(video_id, player_url, fatal=fatal):
1864                 player_id = self._extract_player_info(player_url)
1865                 code = self._code_cache[player_id]
1866                 sts = int_or_none(self._search_regex(
1867                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1868                     'JS player signature timestamp', group='sts', fatal=fatal))
1869         return sts
1870
1871     def _mark_watched(self, video_id, player_response):
1872         playback_url = url_or_none(try_get(
1873             player_response,
1874             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
1875         if not playback_url:
1876             return
1877         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1878         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1879
1880         # cpn generation algorithm is reverse engineered from base.js.
1881         # In fact it works even with dummy cpn.
1882         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1883         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1884
1885         qs.update({
1886             'ver': ['2'],
1887             'cpn': [cpn],
1888         })
1889         playback_url = compat_urlparse.urlunparse(
1890             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1891
1892         self._download_webpage(
1893             playback_url, video_id, 'Marking watched',
1894             'Unable to mark watched', fatal=False)
1895
1896     @staticmethod
1897     def _extract_urls(webpage):
1898         # Embedded YouTube player
1899         entries = [
1900             unescapeHTML(mobj.group('url'))
1901             for mobj in re.finditer(r'''(?x)
1902             (?:
1903                 <iframe[^>]+?src=|
1904                 data-video-url=|
1905                 <embed[^>]+?src=|
1906                 embedSWF\(?:\s*|
1907                 <object[^>]+data=|
1908                 new\s+SWFObject\(
1909             )
1910             (["\'])
1911                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1912                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1913             \1''', webpage)]
1914
1915         # lazyYT YouTube embed
1916         entries.extend(list(map(
1917             unescapeHTML,
1918             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1919
1920         # Wordpress "YouTube Video Importer" plugin
1921         matches = re.findall(r'''(?x)<div[^>]+
1922             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1923             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1924         entries.extend(m[-1] for m in matches)
1925
1926         return entries
1927
1928     @staticmethod
1929     def _extract_url(webpage):
1930         urls = YoutubeIE._extract_urls(webpage)
1931         return urls[0] if urls else None
1932
1933     @classmethod
1934     def extract_id(cls, url):
1935         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1936         if mobj is None:
1937             raise ExtractorError('Invalid URL: %s' % url)
1938         video_id = mobj.group(2)
1939         return video_id
1940
1941     def _extract_chapters_from_json(self, data, video_id, duration):
1942         chapters_list = try_get(
1943             data,
1944             lambda x: x['playerOverlays']
1945                        ['playerOverlayRenderer']
1946                        ['decoratedPlayerBarRenderer']
1947                        ['decoratedPlayerBarRenderer']
1948                        ['playerBar']
1949                        ['chapteredPlayerBarRenderer']
1950                        ['chapters'],
1951             list)
1952         if not chapters_list:
1953             return
1954
1955         def chapter_time(chapter):
1956             return float_or_none(
1957                 try_get(
1958                     chapter,
1959                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1960                     int),
1961                 scale=1000)
1962         chapters = []
1963         for next_num, chapter in enumerate(chapters_list, start=1):
1964             start_time = chapter_time(chapter)
1965             if start_time is None:
1966                 continue
1967             end_time = (chapter_time(chapters_list[next_num])
1968                         if next_num < len(chapters_list) else duration)
1969             if end_time is None:
1970                 continue
1971             title = try_get(
1972                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1973                 compat_str)
1974             chapters.append({
1975                 'start_time': start_time,
1976                 'end_time': end_time,
1977                 'title': title,
1978             })
1979         return chapters
1980
1981     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1982         return self._parse_json(self._search_regex(
1983             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1984              regex), webpage, name, default='{}'), video_id, fatal=False)
1985
1986     @staticmethod
1987     def parse_time_text(time_text):
1988         """
1989         Parse the comment time text
1990         time_text is in the format 'X units ago (edited)'
1991         """
1992         time_text_split = time_text.split(' ')
1993         if len(time_text_split) >= 3:
1994             return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1995
1996     def _extract_comment(self, comment_renderer, parent=None):
1997         comment_id = comment_renderer.get('commentId')
1998         if not comment_id:
1999             return
2000         comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
2001         text = self._join_text_entries(comment_text_runs) or ''
2002         comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
2003         time_text = self._join_text_entries(comment_time_text)
2004         # note: timestamp is an estimate calculated from the current time and time_text
2005         timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
2006         author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
2007         author_id = try_get(comment_renderer,
2008                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2009         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2010                                                        lambda x: x['likeCount']), compat_str)) or 0
2011         author_thumbnail = try_get(comment_renderer,
2012                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2013
2014         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2015         is_favorited = 'creatorHeart' in (try_get(
2016             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2017         return {
2018             'id': comment_id,
2019             'text': text,
2020             'timestamp': timestamp,
2021             'time_text': time_text,
2022             'like_count': votes,
2023             'is_favorited': is_favorited,
2024             'author': author,
2025             'author_id': author_id,
2026             'author_thumbnail': author_thumbnail,
2027             'author_is_uploader': author_is_uploader,
2028             'parent': parent or 'root'
2029         }
2030
2031     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2032                          ytcfg, video_id, parent=None, comment_counts=None):
2033
2034         def extract_header(contents):
2035             _total_comments = 0
2036             _continuation = None
2037             for content in contents:
2038                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2039                 expected_comment_count = try_get(comments_header_renderer,
2040                                                  (lambda x: x['countText']['runs'][0]['text'],
2041                                                   lambda x: x['commentsCount']['runs'][0]['text']),
2042                                                  compat_str)
2043                 if expected_comment_count:
2044                     comment_counts[1] = str_to_int(expected_comment_count)
2045                     self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
2046                     _total_comments = comment_counts[1]
2047                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2048                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2049
2050                 sort_menu_item = try_get(
2051                     comments_header_renderer,
2052                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2053                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2054
2055                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2056                 if not _continuation:
2057                     continue
2058
2059                 sort_text = sort_menu_item.get('title')
2060                 if isinstance(sort_text, compat_str):
2061                     sort_text = sort_text.lower()
2062                 else:
2063                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2064                 self.to_screen('Sorting comments by %s' % sort_text)
2065                 break
2066             return _total_comments, _continuation
2067
2068         def extract_thread(contents):
2069             if not parent:
2070                 comment_counts[2] = 0
2071             for content in contents:
2072                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2073                 comment_renderer = try_get(
2074                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2075                     content, (lambda x: x['commentRenderer'], dict))
2076
2077                 if not comment_renderer:
2078                     continue
2079                 comment = self._extract_comment(comment_renderer, parent)
2080                 if not comment:
2081                     continue
2082                 comment_counts[0] += 1
2083                 yield comment
2084                 # Attempt to get the replies
2085                 comment_replies_renderer = try_get(
2086                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2087
2088                 if comment_replies_renderer:
2089                     comment_counts[2] += 1
2090                     comment_entries_iter = self._comment_entries(
2091                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2092                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2093
2094                     for reply_comment in comment_entries_iter:
2095                         yield reply_comment
2096
2097         # YouTube comments have a max depth of 2
2098         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2099         if max_depth == 1 and parent:
2100             return
2101         if not comment_counts:
2102             # comment so far, est. total comments, current comment thread #
2103             comment_counts = [0, 0, 0]
2104
2105         continuation = self._extract_continuation(root_continuation_data)
2106         if continuation and len(continuation['ctoken']) < 27:
2107             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2108             continuation_token = self._generate_comment_continuation(video_id)
2109             continuation = self._build_continuation_query(continuation_token, None)
2110
2111         visitor_data = None
2112         is_first_continuation = parent is None
2113
2114         for page_num in itertools.count(0):
2115             if not continuation:
2116                 break
2117             headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2118             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2119             if page_num == 0:
2120                 if is_first_continuation:
2121                     note_prefix = 'Downloading comment section API JSON'
2122                 else:
2123                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2124                         comment_counts[2], comment_prog_str)
2125             else:
2126                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2127                     '       ' if parent else '', ' replies' if parent else '',
2128                     page_num, comment_prog_str)
2129
2130             response = self._extract_response(
2131                 item_id=None, query=self._continuation_query_ajax_to_api(continuation),
2132                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2133                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2134             if not response:
2135                 break
2136             visitor_data = try_get(
2137                 response,
2138                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2139                 compat_str) or visitor_data
2140
2141             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2142
2143             continuation = None
2144             if isinstance(continuation_contents, list):
2145                 for continuation_section in continuation_contents:
2146                     if not isinstance(continuation_section, dict):
2147                         continue
2148                     continuation_items = try_get(
2149                         continuation_section,
2150                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2151                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2152                         list) or []
2153                     if is_first_continuation:
2154                         total_comments, continuation = extract_header(continuation_items)
2155                         if total_comments:
2156                             yield total_comments
2157                         is_first_continuation = False
2158                         if continuation:
2159                             break
2160                         continue
2161                     count = 0
2162                     for count, entry in enumerate(extract_thread(continuation_items)):
2163                         yield entry
2164                     continuation = self._extract_continuation({'contents': continuation_items})
2165                     if continuation:
2166                         # Sometimes YouTube provides a continuation without any comments
2167                         # In most cases we end up just downloading these with very little comments to come.
2168                         if count == 0:
2169                             if not parent:
2170                                 self.report_warning('No comments received - assuming end of comments')
2171                             continuation = None
2172                         break
2173
2174             # Deprecated response structure
2175             elif isinstance(continuation_contents, dict):
2176                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2177                 for key, continuation_renderer in continuation_contents.items():
2178                     if key not in known_continuation_renderers:
2179                         continue
2180                     if not isinstance(continuation_renderer, dict):
2181                         continue
2182                     if is_first_continuation:
2183                         header_continuation_items = [continuation_renderer.get('header') or {}]
2184                         total_comments, continuation = extract_header(header_continuation_items)
2185                         if total_comments:
2186                             yield total_comments
2187                         is_first_continuation = False
2188                         if continuation:
2189                             break
2190
2191                     # Sometimes YouTube provides a continuation without any comments
2192                     # In most cases we end up just downloading these with very little comments to come.
2193                     count = 0
2194                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2195                         yield entry
2196                     continuation = self._extract_continuation(continuation_renderer)
2197                     if count == 0:
2198                         if not parent:
2199                             self.report_warning('No comments received - assuming end of comments')
2200                         continuation = None
2201                     break
2202
2203     @staticmethod
2204     def _generate_comment_continuation(video_id):
2205         """
2206         Generates initial comment section continuation token from given video id
2207         """
2208         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2209         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2210         new_continuation_intlist = list(itertools.chain.from_iterable(
2211             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2212         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2213
2214     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2215         """Entry for comment extraction"""
2216         def _real_comment_extract(contents):
2217             if isinstance(contents, list):
2218                 for entry in contents:
2219                     for key, renderer in entry.items():
2220                         if key not in known_entry_comment_renderers:
2221                             continue
2222                         yield from self._comment_entries(
2223                             renderer, video_id=video_id, ytcfg=ytcfg,
2224                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2225                             account_syncid=self._extract_account_syncid(ytcfg))
2226                         break
2227         comments = []
2228         known_entry_comment_renderers = ('itemSectionRenderer',)
2229         estimated_total = 0
2230         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2231
2232         try:
2233             for comment in _real_comment_extract(contents):
2234                 if len(comments) >= max_comments:
2235                     break
2236                 if isinstance(comment, int):
2237                     estimated_total = comment
2238                     continue
2239                 comments.append(comment)
2240         except KeyboardInterrupt:
2241             self.to_screen('Interrupted by user')
2242         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2243         return {
2244             'comments': comments,
2245             'comment_count': len(comments),
2246         }
2247
2248     @staticmethod
2249     def _generate_player_context(sts=None):
2250         context = {
2251             'html5Preference': 'HTML5_PREF_WANTS',
2252         }
2253         if sts is not None:
2254             context['signatureTimestamp'] = sts
2255         return {
2256             'playbackContext': {
2257                 'contentPlaybackContext': context
2258             }
2259         }
2260
2261     @staticmethod
2262     def _get_video_info_params(video_id, client='TVHTML5'):
2263         GVI_CLIENTS = {
2264             'ANDROID': {
2265                 'c': 'ANDROID',
2266                 'cver': '16.20',
2267             },
2268             'TVHTML5': {
2269                 'c': 'TVHTML5',
2270                 'cver': '6.20180913',
2271             }
2272         }
2273         query = {
2274             'video_id': video_id,
2275             'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2276             'html5': '1'
2277         }
2278         query.update(GVI_CLIENTS.get(client))
2279         return query
2280
2281     def _real_extract(self, url):
2282         url, smuggled_data = unsmuggle_url(url, {})
2283         video_id = self._match_id(url)
2284
2285         is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2286
2287         base_url = self.http_scheme() + '//www.youtube.com/'
2288         webpage_url = base_url + 'watch?v=' + video_id
2289         webpage = self._download_webpage(
2290             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2291
2292         ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2293         identity_token = self._extract_identity_token(webpage, video_id)
2294         syncid = self._extract_account_syncid(ytcfg)
2295         headers = self._generate_api_headers(ytcfg, identity_token, syncid)
2296
2297         player_url = self._extract_player_url(ytcfg, webpage)
2298
2299         player_client = self._configuration_arg('player_client', [''])[0]
2300         if player_client not in ('web', 'android', ''):
2301             self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2302         force_mobile_client = player_client != 'web'
2303         player_skip = self._configuration_arg('player_skip')
2304
2305         def get_text(x):
2306             if not x:
2307                 return
2308             text = x.get('simpleText')
2309             if text and isinstance(text, compat_str):
2310                 return text
2311             runs = x.get('runs')
2312             if not isinstance(runs, list):
2313                 return
2314             return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
2315
2316         ytm_streaming_data = {}
2317         if is_music_url:
2318             ytm_webpage = None
2319             sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2320             if sts and not force_mobile_client and 'configs' not in player_skip:
2321                 ytm_webpage = self._download_webpage(
2322                     'https://music.youtube.com',
2323                     video_id, fatal=False, note='Downloading remix client config')
2324
2325             ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2326             ytm_client = 'WEB_REMIX'
2327             if not sts or force_mobile_client:
2328                 # Android client already has signature descrambled
2329                 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2330                 if not sts:
2331                     self.report_warning('Falling back to android remix client for player API.')
2332                 ytm_client = 'ANDROID_MUSIC'
2333                 ytm_cfg = {}
2334
2335             ytm_headers = self._generate_api_headers(
2336                 ytm_cfg, identity_token, syncid,
2337                 client=ytm_client)
2338             ytm_query = {'videoId': video_id}
2339             ytm_query.update(self._generate_player_context(sts))
2340
2341             ytm_player_response = self._extract_response(
2342                 item_id=video_id, ep='player', query=ytm_query,
2343                 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2344                 default_client=ytm_client,
2345                 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2346             ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
2347
2348         player_response = None
2349         if webpage:
2350             player_response = self._extract_yt_initial_variable(
2351                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2352                 video_id, 'initial player response')
2353
2354         if not player_response or force_mobile_client:
2355             sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2356             yt_client = 'WEB'
2357             ytpcfg = ytcfg
2358             ytp_headers = headers
2359             if not sts or force_mobile_client:
2360                 # Android client already has signature descrambled
2361                 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2362                 if not sts:
2363                     self.report_warning('Falling back to android client for player API.')
2364                 yt_client = 'ANDROID'
2365                 ytpcfg = {}
2366                 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)
2367
2368             yt_query = {'videoId': video_id}
2369             yt_query.update(self._generate_player_context(sts))
2370             player_response = self._extract_response(
2371                 item_id=video_id, ep='player', query=yt_query,
2372                 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2373                 default_client=yt_client,
2374                 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2375             ) or player_response
2376
2377         # Age-gate workarounds
2378         playability_status = player_response.get('playabilityStatus') or {}
2379         if playability_status.get('reason') in self._AGE_GATE_REASONS:
2380             gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2381             for gvi_client in gvi_clients:
2382                 pr = self._parse_json(try_get(compat_parse_qs(
2383                     self._download_webpage(
2384                         base_url + 'get_video_info', video_id,
2385                         'Refetching age-gated %s info webpage' % gvi_client.lower(),
2386                         'unable to download video info webpage', fatal=False,
2387                         query=self._get_video_info_params(video_id, client=gvi_client))),
2388                     lambda x: x['player_response'][0],
2389                     compat_str) or '{}', video_id)
2390                 if pr:
2391                     break
2392             if not pr:
2393                 self.report_warning('Falling back to embedded-only age-gate workaround.')
2394                 embed_webpage = None
2395                 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2396                 if sts and not force_mobile_client and 'configs' not in player_skip:
2397                     embed_webpage = self._download_webpage(
2398                         'https://www.youtube.com/embed/%s?html5=1' % video_id,
2399                         video_id=video_id, note='Downloading age-gated embed config')
2400
2401                 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2402                 # If we extracted the embed webpage, it'll tell us if we can view the video
2403                 embedded_pr = self._parse_json(
2404                     try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2405                     video_id=video_id)
2406                 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2407                 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2408                     yt_client = 'WEB_EMBEDDED_PLAYER'
2409                     if not sts or force_mobile_client:
2410                         # Android client already has signature descrambled
2411                         # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2412                         if not sts:
2413                             self.report_warning(
2414                                 'Falling back to android embedded client for player API (note: some formats may be missing).')
2415                         yt_client = 'ANDROID_EMBEDDED_PLAYER'
2416                         ytcfg_age = {}
2417
2418                     ytage_headers = self._generate_api_headers(
2419                         ytcfg_age, identity_token, syncid, client=yt_client)
2420                     yt_age_query = {'videoId': video_id}
2421                     yt_age_query.update(self._generate_player_context(sts))
2422                     pr = self._extract_response(
2423                         item_id=video_id, ep='player', query=yt_age_query,
2424                         ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2425                         default_client=yt_client,
2426                         note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
2427                     ) or {}
2428
2429             if pr:
2430                 player_response = pr
2431
2432         trailer_video_id = try_get(
2433             playability_status,
2434             lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2435             compat_str)
2436         if trailer_video_id:
2437             return self.url_result(
2438                 trailer_video_id, self.ie_key(), trailer_video_id)
2439
2440         search_meta = (
2441             lambda x: self._html_search_meta(x, webpage, default=None)) \
2442             if webpage else lambda x: None
2443
2444         video_details = player_response.get('videoDetails') or {}
2445         microformat = try_get(
2446             player_response,
2447             lambda x: x['microformat']['playerMicroformatRenderer'],
2448             dict) or {}
2449         video_title = video_details.get('title') \
2450             or get_text(microformat.get('title')) \
2451             or search_meta(['og:title', 'twitter:title', 'title'])
2452         video_description = video_details.get('shortDescription')
2453
2454         if not smuggled_data.get('force_singlefeed', False):
2455             if not self.get_param('noplaylist'):
2456                 multifeed_metadata_list = try_get(
2457                     player_response,
2458                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2459                     compat_str)
2460                 if multifeed_metadata_list:
2461                     entries = []
2462                     feed_ids = []
2463                     for feed in multifeed_metadata_list.split(','):
2464                         # Unquote should take place before split on comma (,) since textual
2465                         # fields may contain comma as well (see
2466                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2467                         feed_data = compat_parse_qs(
2468                             compat_urllib_parse_unquote_plus(feed))
2469
2470                         def feed_entry(name):
2471                             return try_get(
2472                                 feed_data, lambda x: x[name][0], compat_str)
2473
2474                         feed_id = feed_entry('id')
2475                         if not feed_id:
2476                             continue
2477                         feed_title = feed_entry('title')
2478                         title = video_title
2479                         if feed_title:
2480                             title += ' (%s)' % feed_title
2481                         entries.append({
2482                             '_type': 'url_transparent',
2483                             'ie_key': 'Youtube',
2484                             'url': smuggle_url(
2485                                 base_url + 'watch?v=' + feed_data['id'][0],
2486                                 {'force_singlefeed': True}),
2487                             'title': title,
2488                         })
2489                         feed_ids.append(feed_id)
2490                     self.to_screen(
2491                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2492                         % (', '.join(feed_ids), video_id))
2493                     return self.playlist_result(
2494                         entries, video_id, video_title, video_description)
2495             else:
2496                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2497
2498         formats, itags, stream_ids = [], [], []
2499         itag_qualities = {}
2500         q = qualities([
2501             # "tiny" is the smallest video-only format. But some audio-only formats
2502             # was also labeled "tiny". It is not clear if such formats still exist
2503             'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2504             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2505         ])
2506
2507         streaming_data = player_response.get('streamingData') or {}
2508         streaming_formats = streaming_data.get('formats') or []
2509         streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
2510         streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2511         streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2512
2513         for fmt in streaming_formats:
2514             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2515                 continue
2516
2517             itag = str_or_none(fmt.get('itag'))
2518             audio_track = fmt.get('audioTrack') or {}
2519             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2520             if stream_id in stream_ids:
2521                 continue
2522
2523             quality = fmt.get('quality')
2524             if quality == 'tiny' or not quality:
2525                 quality = fmt.get('audioQuality', '').lower() or quality
2526             if itag and quality:
2527                 itag_qualities[itag] = quality
2528             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2529             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2530             # number of fragment that would subsequently requested with (`&sq=N`)
2531             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2532                 continue
2533
2534             fmt_url = fmt.get('url')
2535             if not fmt_url:
2536                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2537                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2538                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2539                 if not (sc and fmt_url and encrypted_sig):
2540                     continue
2541                 if not player_url:
2542                     continue
2543                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2544                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2545                 fmt_url += '&' + sp + '=' + signature
2546
2547             if itag:
2548                 itags.append(itag)
2549                 stream_ids.append(stream_id)
2550
2551             tbr = float_or_none(
2552                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2553             dct = {
2554                 'asr': int_or_none(fmt.get('audioSampleRate')),
2555                 'filesize': int_or_none(fmt.get('contentLength')),
2556                 'format_id': itag,
2557                 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
2558                 'fps': int_or_none(fmt.get('fps')),
2559                 'height': int_or_none(fmt.get('height')),
2560                 'quality': q(quality),
2561                 'tbr': tbr,
2562                 'url': fmt_url,
2563                 'width': fmt.get('width'),
2564                 'language': audio_track.get('id', '').split('.')[0],
2565             }
2566             mime_mobj = re.match(
2567                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2568             if mime_mobj:
2569                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2570                 dct.update(parse_codecs(mime_mobj.group(2)))
2571                 # The 3gp format in android client has a quality of "small",
2572                 # but is actually worse than all other formats
2573                 if dct['ext'] == '3gp':
2574                     dct['quality'] = q('tiny')
2575             no_audio = dct.get('acodec') == 'none'
2576             no_video = dct.get('vcodec') == 'none'
2577             if no_audio:
2578                 dct['vbr'] = tbr
2579             if no_video:
2580                 dct['abr'] = tbr
2581             if no_audio or no_video:
2582                 dct['downloader_options'] = {
2583                     # Youtube throttles chunks >~10M
2584                     'http_chunk_size': 10485760,
2585                 }
2586                 if dct.get('ext'):
2587                     dct['container'] = dct['ext'] + '_dash'
2588             formats.append(dct)
2589
2590         skip_manifests = self._configuration_arg('skip')
2591         get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2592         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2593
2594         for sd in (streaming_data, ytm_streaming_data):
2595             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2596             if hls_manifest_url:
2597                 for f in self._extract_m3u8_formats(
2598                         hls_manifest_url, video_id, 'mp4', fatal=False):
2599                     itag = self._search_regex(
2600                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2601                     if itag:
2602                         f['format_id'] = itag
2603                     formats.append(f)
2604
2605             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2606             if dash_manifest_url:
2607                 for f in self._extract_mpd_formats(
2608                         dash_manifest_url, video_id, fatal=False):
2609                     itag = f['format_id']
2610                     if itag in itags:
2611                         continue
2612                     if itag in itag_qualities:
2613                         f['quality'] = q(itag_qualities[itag])
2614                     filesize = int_or_none(self._search_regex(
2615                         r'/clen/(\d+)', f.get('fragment_base_url')
2616                         or f['url'], 'file size', default=None))
2617                     if filesize:
2618                         f['filesize'] = filesize
2619                     formats.append(f)
2620
2621         if not formats:
2622             if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
2623                 self.raise_no_formats(
2624                     'This video is DRM protected.', expected=True)
2625             pemr = try_get(
2626                 playability_status,
2627                 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2628                 dict) or {}
2629             reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2630             subreason = pemr.get('subreason')
2631             if subreason:
2632                 subreason = clean_html(get_text(subreason))
2633                 if subreason == 'The uploader has not made this video available in your country.':
2634                     countries = microformat.get('availableCountries')
2635                     if not countries:
2636                         regions_allowed = search_meta('regionsAllowed')
2637                         countries = regions_allowed.split(',') if regions_allowed else None
2638                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2639                 reason += '\n' + subreason
2640             if reason:
2641                 self.raise_no_formats(reason, expected=True)
2642
2643         self._sort_formats(formats)
2644
2645         keywords = video_details.get('keywords') or []
2646         if not keywords and webpage:
2647             keywords = [
2648                 unescapeHTML(m.group('content'))
2649                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2650         for keyword in keywords:
2651             if keyword.startswith('yt:stretch='):
2652                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2653                 if mobj:
2654                     # NB: float is intentional for forcing float division
2655                     w, h = (float(v) for v in mobj.groups())
2656                     if w > 0 and h > 0:
2657                         ratio = w / h
2658                         for f in formats:
2659                             if f.get('vcodec') != 'none':
2660                                 f['stretched_ratio'] = ratio
2661                         break
2662
2663         thumbnails = []
2664         for container in (video_details, microformat):
2665             for thumbnail in (try_get(
2666                     container,
2667                     lambda x: x['thumbnail']['thumbnails'], list) or []):
2668                 thumbnail_url = thumbnail.get('url')
2669                 if not thumbnail_url:
2670                     continue
2671                 # Sometimes youtube gives a wrong thumbnail URL. See:
2672                 # https://github.com/yt-dlp/yt-dlp/issues/233
2673                 # https://github.com/ytdl-org/youtube-dl/issues/28023
2674                 if 'maxresdefault' in thumbnail_url:
2675                     thumbnail_url = thumbnail_url.split('?')[0]
2676                 thumbnails.append({
2677                     'url': thumbnail_url,
2678                     'height': int_or_none(thumbnail.get('height')),
2679                     'width': int_or_none(thumbnail.get('width')),
2680                     'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2681                 })
2682         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2683         if thumbnail_url:
2684             thumbnails.append({
2685                 'url': thumbnail_url,
2686                 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2687             })
2688         # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2689         # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2690         thumbnails.append({
2691             'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2692             'preference': 1,
2693         })
2694         self._remove_duplicate_formats(thumbnails)
2695
2696         category = microformat.get('category') or search_meta('genre')
2697         channel_id = video_details.get('channelId') \
2698             or microformat.get('externalChannelId') \
2699             or search_meta('channelId')
2700         duration = int_or_none(
2701             video_details.get('lengthSeconds')
2702             or microformat.get('lengthSeconds')) \
2703             or parse_duration(search_meta('duration'))
2704         is_live = video_details.get('isLive')
2705         is_upcoming = video_details.get('isUpcoming')
2706         owner_profile_url = microformat.get('ownerProfileUrl')
2707
2708         info = {
2709             'id': video_id,
2710             'title': self._live_title(video_title) if is_live else video_title,
2711             'formats': formats,
2712             'thumbnails': thumbnails,
2713             'description': video_description,
2714             'upload_date': unified_strdate(
2715                 microformat.get('uploadDate')
2716                 or search_meta('uploadDate')),
2717             'uploader': video_details['author'],
2718             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2719             'uploader_url': owner_profile_url,
2720             'channel_id': channel_id,
2721             'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2722             'duration': duration,
2723             'view_count': int_or_none(
2724                 video_details.get('viewCount')
2725                 or microformat.get('viewCount')
2726                 or search_meta('interactionCount')),
2727             'average_rating': float_or_none(video_details.get('averageRating')),
2728             'age_limit': 18 if (
2729                 microformat.get('isFamilySafe') is False
2730                 or search_meta('isFamilyFriendly') == 'false'
2731                 or search_meta('og:restrictions:age') == '18+') else 0,
2732             'webpage_url': webpage_url,
2733             'categories': [category] if category else None,
2734             'tags': keywords,
2735             'is_live': is_live,
2736             'playable_in_embed': playability_status.get('playableInEmbed'),
2737             'was_live': video_details.get('isLiveContent'),
2738         }
2739
2740         pctr = try_get(
2741             player_response,
2742             lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2743         subtitles = {}
2744         if pctr:
2745             def process_language(container, base_url, lang_code, sub_name, query):
2746                 lang_subs = container.setdefault(lang_code, [])
2747                 for fmt in self._SUBTITLE_FORMATS:
2748                     query.update({
2749                         'fmt': fmt,
2750                     })
2751                     lang_subs.append({
2752                         'ext': fmt,
2753                         'url': update_url_query(base_url, query),
2754                         'name': sub_name,
2755                     })
2756
2757             for caption_track in (pctr.get('captionTracks') or []):
2758                 base_url = caption_track.get('baseUrl')
2759                 if not base_url:
2760                     continue
2761                 if caption_track.get('kind') != 'asr':
2762                     lang_code = (
2763                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2764                         or caption_track.get('languageCode'))
2765                     if not lang_code:
2766                         continue
2767                     process_language(
2768                         subtitles, base_url, lang_code,
2769                         try_get(caption_track, lambda x: x['name']['simpleText']),
2770                         {})
2771                     continue
2772                 automatic_captions = {}
2773                 for translation_language in (pctr.get('translationLanguages') or []):
2774                     translation_language_code = translation_language.get('languageCode')
2775                     if not translation_language_code:
2776                         continue
2777                     process_language(
2778                         automatic_captions, base_url, translation_language_code,
2779                         try_get(translation_language, (
2780                             lambda x: x['languageName']['simpleText'],
2781                             lambda x: x['languageName']['runs'][0]['text'])),
2782                         {'tlang': translation_language_code})
2783                 info['automatic_captions'] = automatic_captions
2784         info['subtitles'] = subtitles
2785
2786         parsed_url = compat_urllib_parse_urlparse(url)
2787         for component in [parsed_url.fragment, parsed_url.query]:
2788             query = compat_parse_qs(component)
2789             for k, v in query.items():
2790                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2791                     d_k += '_time'
2792                     if d_k not in info and k in s_ks:
2793                         info[d_k] = parse_duration(query[k][0])
2794
2795         # Youtube Music Auto-generated description
2796         if video_description:
2797             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2798             if mobj:
2799                 release_year = mobj.group('release_year')
2800                 release_date = mobj.group('release_date')
2801                 if release_date:
2802                     release_date = release_date.replace('-', '')
2803                     if not release_year:
2804                         release_year = release_date[:4]
2805                 info.update({
2806                     'album': mobj.group('album'.strip()),
2807                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2808                     'track': mobj.group('track').strip(),
2809                     'release_date': release_date,
2810                     'release_year': int_or_none(release_year),
2811                 })
2812
2813         initial_data = None
2814         if webpage:
2815             initial_data = self._extract_yt_initial_variable(
2816                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2817                 'yt initial data')
2818         if not initial_data:
2819             initial_data = self._extract_response(
2820                 item_id=video_id, ep='next', fatal=False,
2821                 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2822                 note='Downloading initial data API JSON')
2823
2824         try:
2825             # This will error if there is no livechat
2826             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2827             info['subtitles']['live_chat'] = [{
2828                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2829                 'video_id': video_id,
2830                 'ext': 'json',
2831                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2832             }]
2833         except (KeyError, IndexError, TypeError):
2834             pass
2835
2836         if initial_data:
2837             chapters = self._extract_chapters_from_json(
2838                 initial_data, video_id, duration)
2839             if not chapters:
2840                 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2841                     contents = try_get(
2842                         engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2843                         list)
2844                     if not contents:
2845                         continue
2846
2847                     def chapter_time(mmlir):
2848                         return parse_duration(
2849                             get_text(mmlir.get('timeDescription')))
2850
2851                     chapters = []
2852                     for next_num, content in enumerate(contents, start=1):
2853                         mmlir = content.get('macroMarkersListItemRenderer') or {}
2854                         start_time = chapter_time(mmlir)
2855                         end_time = chapter_time(try_get(
2856                             contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2857                             if next_num < len(contents) else duration
2858                         if start_time is None or end_time is None:
2859                             continue
2860                         chapters.append({
2861                             'start_time': start_time,
2862                             'end_time': end_time,
2863                             'title': get_text(mmlir.get('title')),
2864                         })
2865                     if chapters:
2866                         break
2867             if chapters:
2868                 info['chapters'] = chapters
2869
2870             contents = try_get(
2871                 initial_data,
2872                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2873                 list) or []
2874             for content in contents:
2875                 vpir = content.get('videoPrimaryInfoRenderer')
2876                 if vpir:
2877                     stl = vpir.get('superTitleLink')
2878                     if stl:
2879                         stl = get_text(stl)
2880                         if try_get(
2881                                 vpir,
2882                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2883                             info['location'] = stl
2884                         else:
2885                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2886                             if mobj:
2887                                 info.update({
2888                                     'series': mobj.group(1),
2889                                     'season_number': int(mobj.group(2)),
2890                                     'episode_number': int(mobj.group(3)),
2891                                 })
2892                     for tlb in (try_get(
2893                             vpir,
2894                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2895                             list) or []):
2896                         tbr = tlb.get('toggleButtonRenderer') or {}
2897                         for getter, regex in [(
2898                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2899                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2900                                     lambda x: x['accessibility'],
2901                                     lambda x: x['accessibilityData']['accessibilityData'],
2902                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2903                             label = (try_get(tbr, getter, dict) or {}).get('label')
2904                             if label:
2905                                 mobj = re.match(regex, label)
2906                                 if mobj:
2907                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2908                                     break
2909                     sbr_tooltip = try_get(
2910                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2911                     if sbr_tooltip:
2912                         like_count, dislike_count = sbr_tooltip.split(' / ')
2913                         info.update({
2914                             'like_count': str_to_int(like_count),
2915                             'dislike_count': str_to_int(dislike_count),
2916                         })
2917                 vsir = content.get('videoSecondaryInfoRenderer')
2918                 if vsir:
2919                     info['channel'] = get_text(try_get(
2920                         vsir,
2921                         lambda x: x['owner']['videoOwnerRenderer']['title'],
2922                         dict))
2923                     rows = try_get(
2924                         vsir,
2925                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2926                         list) or []
2927                     multiple_songs = False
2928                     for row in rows:
2929                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2930                             multiple_songs = True
2931                             break
2932                     for row in rows:
2933                         mrr = row.get('metadataRowRenderer') or {}
2934                         mrr_title = mrr.get('title')
2935                         if not mrr_title:
2936                             continue
2937                         mrr_title = get_text(mrr['title'])
2938                         mrr_contents_text = get_text(mrr['contents'][0])
2939                         if mrr_title == 'License':
2940                             info['license'] = mrr_contents_text
2941                         elif not multiple_songs:
2942                             if mrr_title == 'Album':
2943                                 info['album'] = mrr_contents_text
2944                             elif mrr_title == 'Artist':
2945                                 info['artist'] = mrr_contents_text
2946                             elif mrr_title == 'Song':
2947                                 info['track'] = mrr_contents_text
2948
2949         fallbacks = {
2950             'channel': 'uploader',
2951             'channel_id': 'uploader_id',
2952             'channel_url': 'uploader_url',
2953         }
2954         for to, frm in fallbacks.items():
2955             if not info.get(to):
2956                 info[to] = info.get(frm)
2957
2958         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2959             v = info.get(s_k)
2960             if v:
2961                 info[d_k] = v
2962
2963         is_private = bool_or_none(video_details.get('isPrivate'))
2964         is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2965         is_membersonly = None
2966         is_premium = None
2967         if initial_data and is_private is not None:
2968             is_membersonly = False
2969             is_premium = False
2970             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
2971             badge_labels = set()
2972             for content in contents:
2973                 if not isinstance(content, dict):
2974                     continue
2975                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
2976             for badge_label in badge_labels:
2977                 if badge_label.lower() == 'members only':
2978                     is_membersonly = True
2979                 elif badge_label.lower() == 'premium':
2980                     is_premium = True
2981                 elif badge_label.lower() == 'unlisted':
2982                     is_unlisted = True
2983
2984         info['availability'] = self._availability(
2985             is_private=is_private,
2986             needs_premium=is_premium,
2987             needs_subscription=is_membersonly,
2988             needs_auth=info['age_limit'] >= 18,
2989             is_unlisted=None if is_private is None else is_unlisted)
2990
2991         # get xsrf for annotations or comments
2992         get_annotations = self.get_param('writeannotations', False)
2993         get_comments = self.get_param('getcomments', False)
2994         if get_annotations or get_comments:
2995             xsrf_token = None
2996             ytcfg = self._extract_ytcfg(video_id, webpage)
2997             if ytcfg:
2998                 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2999             if not xsrf_token:
3000                 xsrf_token = self._search_regex(
3001                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
3002                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
3003
3004         # annotations
3005         if get_annotations:
3006             invideo_url = try_get(
3007                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
3008             if xsrf_token and invideo_url:
3009                 xsrf_field_name = None
3010                 if ytcfg:
3011                     xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3012                 if not xsrf_field_name:
3013                     xsrf_field_name = self._search_regex(
3014                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3015                         webpage, 'xsrf field name',
3016                         group='xsrf_field_name', default='session_token')
3017                 info['annotations'] = self._download_webpage(
3018                     self._proto_relative_url(invideo_url),
3019                     video_id, note='Downloading annotations',
3020                     errnote='Unable to download video annotations', fatal=False,
3021                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3022
3023         if get_comments:
3024             info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
3025
3026         self.mark_watched(video_id, player_response)
3027
3028         return info
3029
3030
3031 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3032     IE_DESC = 'YouTube.com tab'
3033     _VALID_URL = r'''(?x)
3034                     https?://
3035                         (?:\w+\.)?
3036                         (?:
3037                             youtube(?:kids)?\.com|
3038                             invidio\.us
3039                         )/
3040                         (?:
3041                             (?P<channel_type>channel|c|user|browse)/|
3042                             (?P<not_channel>
3043                                 feed/|hashtag/|
3044                                 (?:playlist|watch)\?.*?\blist=
3045                             )|
3046                             (?!(?:%s)\b)  # Direct URLs
3047                         )
3048                         (?P<id>[^/?\#&]+)
3049                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3050     IE_NAME = 'youtube:tab'
3051
3052     _TESTS = [{
3053         'note': 'playlists, multipage',
3054         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3055         'playlist_mincount': 94,
3056         'info_dict': {
3057             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3058             'title': 'Игорь Клейнер - Playlists',
3059             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3060             'uploader': 'Игорь Клейнер',
3061             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3062         },
3063     }, {
3064         'note': 'playlists, multipage, different order',
3065         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3066         'playlist_mincount': 94,
3067         'info_dict': {
3068             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3069             'title': 'Игорь Клейнер - Playlists',
3070             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3071             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3072             'uploader': 'Игорь Клейнер',
3073         },
3074     }, {
3075         'note': 'playlists, series',
3076         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3077         'playlist_mincount': 5,
3078         'info_dict': {
3079             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3080             'title': '3Blue1Brown - Playlists',
3081             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3082             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3083             'uploader': '3Blue1Brown',
3084         },
3085     }, {
3086         'note': 'playlists, singlepage',
3087         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3088         'playlist_mincount': 4,
3089         'info_dict': {
3090             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3091             'title': 'ThirstForScience - Playlists',
3092             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3093             'uploader': 'ThirstForScience',
3094             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3095         }
3096     }, {
3097         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3098         'only_matching': True,
3099     }, {
3100         'note': 'basic, single video playlist',
3101         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3102         'info_dict': {
3103             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3104             'uploader': 'Sergey M.',
3105             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3106             'title': 'youtube-dl public playlist',
3107         },
3108         'playlist_count': 1,
3109     }, {
3110         'note': 'empty playlist',
3111         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3112         'info_dict': {
3113             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3114             'uploader': 'Sergey M.',
3115             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3116             'title': 'youtube-dl empty playlist',
3117         },
3118         'playlist_count': 0,
3119     }, {
3120         'note': 'Home tab',
3121         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3122         'info_dict': {
3123             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3124             'title': 'lex will - Home',
3125             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3126             'uploader': 'lex will',
3127             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3128         },
3129         'playlist_mincount': 2,
3130     }, {
3131         'note': 'Videos tab',
3132         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3133         'info_dict': {
3134             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3135             'title': 'lex will - Videos',
3136             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3137             'uploader': 'lex will',
3138             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3139         },
3140         'playlist_mincount': 975,
3141     }, {
3142         'note': 'Videos tab, sorted by popular',
3143         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3144         'info_dict': {
3145             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3146             'title': 'lex will - Videos',
3147             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3148             'uploader': 'lex will',
3149             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3150         },
3151         'playlist_mincount': 199,
3152     }, {
3153         'note': 'Playlists tab',
3154         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3155         'info_dict': {
3156             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3157             'title': 'lex will - Playlists',
3158             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3159             'uploader': 'lex will',
3160             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3161         },
3162         'playlist_mincount': 17,
3163     }, {
3164         'note': 'Community tab',
3165         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3166         'info_dict': {
3167             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3168             'title': 'lex will - Community',
3169             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3170             'uploader': 'lex will',
3171             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3172         },
3173         'playlist_mincount': 18,
3174     }, {
3175         'note': 'Channels tab',
3176         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3177         'info_dict': {
3178             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3179             'title': 'lex will - Channels',
3180             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3181             'uploader': 'lex will',
3182             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3183         },
3184         'playlist_mincount': 12,
3185     }, {
3186         'note': 'Search tab',
3187         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3188         'playlist_mincount': 40,
3189         'info_dict': {
3190             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3191             'title': '3Blue1Brown - Search - linear algebra',
3192             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3193             'uploader': '3Blue1Brown',
3194             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3195         },
3196     }, {
3197         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3198         'only_matching': True,
3199     }, {
3200         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3201         'only_matching': True,
3202     }, {
3203         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3204         'only_matching': True,
3205     }, {
3206         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3207         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3208         'info_dict': {
3209             'title': '29C3: Not my department',
3210             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3211             'uploader': 'Christiaan008',
3212             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3213             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3214         },
3215         'playlist_count': 96,
3216     }, {
3217         'note': 'Large playlist',
3218         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3219         'info_dict': {
3220             'title': 'Uploads from Cauchemar',
3221             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3222             'uploader': 'Cauchemar',
3223             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3224         },
3225         'playlist_mincount': 1123,
3226     }, {
3227         'note': 'even larger playlist, 8832 videos',
3228         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3229         'only_matching': True,
3230     }, {
3231         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3232         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3233         'info_dict': {
3234             'title': 'Uploads from Interstellar Movie',
3235             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3236             'uploader': 'Interstellar Movie',
3237             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3238         },
3239         'playlist_mincount': 21,
3240     }, {
3241         'note': 'Playlist with "show unavailable videos" button',
3242         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3243         'info_dict': {
3244             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3245             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3246             'uploader': 'Phim Siêu Nhân Nhật Bản',
3247             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3248         },
3249         'playlist_mincount': 200,
3250     }, {
3251         'note': 'Playlist with unavailable videos in page 7',
3252         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3253         'info_dict': {
3254             'title': 'Uploads from BlankTV',
3255             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3256             'uploader': 'BlankTV',
3257             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3258         },
3259         'playlist_mincount': 1000,
3260     }, {
3261         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3262         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3263         'info_dict': {
3264             'title': 'Data Analysis with Dr Mike Pound',
3265             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3266             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3267             'uploader': 'Computerphile',
3268             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3269         },
3270         'playlist_mincount': 11,
3271     }, {
3272         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3273         'only_matching': True,
3274     }, {
3275         'note': 'Playlist URL that does not actually serve a playlist',
3276         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3277         'info_dict': {
3278             'id': 'FqZTN594JQw',
3279             'ext': 'webm',
3280             'title': "Smiley's People 01 detective, Adventure Series, Action",
3281             'uploader': 'STREEM',
3282             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3283             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3284             'upload_date': '20150526',
3285             'license': 'Standard YouTube License',
3286             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3287             'categories': ['People & Blogs'],
3288             'tags': list,
3289             'view_count': int,
3290             'like_count': int,
3291             'dislike_count': int,
3292         },
3293         'params': {
3294             'skip_download': True,
3295         },
3296         'skip': 'This video is not available.',
3297         'add_ie': [YoutubeIE.ie_key()],
3298     }, {
3299         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3300         'only_matching': True,
3301     }, {
3302         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3303         'only_matching': True,
3304     }, {
3305         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3306         'info_dict': {
3307             'id': 'X1whbWASnNQ',  # This will keep changing
3308             'ext': 'mp4',
3309             'title': compat_str,
3310             'uploader': 'Sky News',
3311             'uploader_id': 'skynews',
3312             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3313             'upload_date': r're:\d{8}',
3314             'description': compat_str,
3315             'categories': ['News & Politics'],
3316             'tags': list,
3317             'like_count': int,
3318             'dislike_count': int,
3319         },
3320         'params': {
3321             'skip_download': True,
3322         },
3323         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3324     }, {
3325         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3326         'info_dict': {
3327             'id': 'a48o2S1cPoo',
3328             'ext': 'mp4',
3329             'title': 'The Young Turks - Live Main Show',
3330             'uploader': 'The Young Turks',
3331             'uploader_id': 'TheYoungTurks',
3332             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3333             'upload_date': '20150715',
3334             'license': 'Standard YouTube License',
3335             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3336             'categories': ['News & Politics'],
3337             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3338             'like_count': int,
3339             'dislike_count': int,
3340         },
3341         'params': {
3342             'skip_download': True,
3343         },
3344         'only_matching': True,
3345     }, {
3346         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3347         'only_matching': True,
3348     }, {
3349         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3350         'only_matching': True,
3351     }, {
3352         'note': 'A channel that is not live. Should raise error',
3353         'url': 'https://www.youtube.com/user/numberphile/live',
3354         'only_matching': True,
3355     }, {
3356         'url': 'https://www.youtube.com/feed/trending',
3357         'only_matching': True,
3358     }, {
3359         'url': 'https://www.youtube.com/feed/library',
3360         'only_matching': True,
3361     }, {
3362         'url': 'https://www.youtube.com/feed/history',
3363         'only_matching': True,
3364     }, {
3365         'url': 'https://www.youtube.com/feed/subscriptions',
3366         'only_matching': True,
3367     }, {
3368         'url': 'https://www.youtube.com/feed/watch_later',
3369         'only_matching': True,
3370     }, {
3371         'note': 'Recommended - redirects to home page',
3372         'url': 'https://www.youtube.com/feed/recommended',
3373         'only_matching': True,
3374     }, {
3375         'note': 'inline playlist with not always working continuations',
3376         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3377         'only_matching': True,
3378     }, {
3379         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3380         'only_matching': True,
3381     }, {
3382         'url': 'https://www.youtube.com/course',
3383         'only_matching': True,
3384     }, {
3385         'url': 'https://www.youtube.com/zsecurity',
3386         'only_matching': True,
3387     }, {
3388         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3389         'only_matching': True,
3390     }, {
3391         'url': 'https://www.youtube.com/TheYoungTurks/live',
3392         'only_matching': True,
3393     }, {
3394         'url': 'https://www.youtube.com/hashtag/cctv9',
3395         'info_dict': {
3396             'id': 'cctv9',
3397             'title': '#cctv9',
3398         },
3399         'playlist_mincount': 350,
3400     }, {
3401         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3402         'only_matching': True,
3403     }, {
3404         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3405         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3406         'only_matching': True
3407     }, {
3408         'note': '/browse/ should redirect to /channel/',
3409         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3410         'only_matching': True
3411     }, {
3412         'note': 'VLPL, should redirect to playlist?list=PL...',
3413         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3414         'info_dict': {
3415             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3416             'uploader': 'NoCopyrightSounds',
3417             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3418             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3419             'title': 'NCS Releases',
3420         },
3421         'playlist_mincount': 166,
3422     }, {
3423         'note': 'Topic, should redirect to playlist?list=UU...',
3424         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3425         'info_dict': {
3426             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3427             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3428             'title': 'Uploads from Royalty Free Music - Topic',
3429             'uploader': 'Royalty Free Music - Topic',
3430         },
3431         'expected_warnings': [
3432             'A channel/user page was given',
3433             'The URL does not have a videos tab',
3434         ],
3435         'playlist_mincount': 101,
3436     }, {
3437         'note': 'Topic without a UU playlist',
3438         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3439         'info_dict': {
3440             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3441             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3442         },
3443         'expected_warnings': [
3444             'A channel/user page was given',
3445             'The URL does not have a videos tab',
3446             'Falling back to channel URL',
3447         ],
3448         'playlist_mincount': 9,
3449     }, {
3450         'note': 'Youtube music Album',
3451         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3452         'info_dict': {
3453             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3454             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3455         },
3456         'playlist_count': 50,
3457     }, {
3458         'note': 'unlisted single video playlist',
3459         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3460         'info_dict': {
3461             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
3462             'uploader': 'colethedj',
3463             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
3464             'title': 'yt-dlp unlisted playlist test',
3465             'availability': 'unlisted'
3466         },
3467         'playlist_count': 1,
3468     }]
3469
3470     @classmethod
3471     def suitable(cls, url):
3472         return False if YoutubeIE.suitable(url) else super(
3473             YoutubeTabIE, cls).suitable(url)
3474
3475     def _extract_channel_id(self, webpage):
3476         channel_id = self._html_search_meta(
3477             'channelId', webpage, 'channel id', default=None)
3478         if channel_id:
3479             return channel_id
3480         channel_url = self._html_search_meta(
3481             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3482              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3483              'twitter:app:url:googleplay'), webpage, 'channel url')
3484         return self._search_regex(
3485             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3486             channel_url, 'channel id')
3487
3488     @staticmethod
3489     def _extract_basic_item_renderer(item):
3490         # Modified from _extract_grid_item_renderer
3491         known_basic_renderers = (
3492             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3493         )
3494         for key, renderer in item.items():
3495             if not isinstance(renderer, dict):
3496                 continue
3497             elif key in known_basic_renderers:
3498                 return renderer
3499             elif key.startswith('grid') and key.endswith('Renderer'):
3500                 return renderer
3501
3502     def _grid_entries(self, grid_renderer):
3503         for item in grid_renderer['items']:
3504             if not isinstance(item, dict):
3505                 continue
3506             renderer = self._extract_basic_item_renderer(item)
3507             if not isinstance(renderer, dict):
3508                 continue
3509             title = try_get(
3510                 renderer, (lambda x: x['title']['runs'][0]['text'],
3511                            lambda x: x['title']['simpleText']), compat_str)
3512             # playlist
3513             playlist_id = renderer.get('playlistId')
3514             if playlist_id:
3515                 yield self.url_result(
3516                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3517                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3518                     video_title=title)
3519                 continue
3520             # video
3521             video_id = renderer.get('videoId')
3522             if video_id:
3523                 yield self._extract_video(renderer)
3524                 continue
3525             # channel
3526             channel_id = renderer.get('channelId')
3527             if channel_id:
3528                 title = try_get(
3529                     renderer, lambda x: x['title']['simpleText'], compat_str)
3530                 yield self.url_result(
3531                     'https://www.youtube.com/channel/%s' % channel_id,
3532                     ie=YoutubeTabIE.ie_key(), video_title=title)
3533                 continue
3534             # generic endpoint URL support
3535             ep_url = urljoin('https://www.youtube.com/', try_get(
3536                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3537                 compat_str))
3538             if ep_url:
3539                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3540                     if ie.suitable(ep_url):
3541                         yield self.url_result(
3542                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3543                         break
3544
3545     def _shelf_entries_from_content(self, shelf_renderer):
3546         content = shelf_renderer.get('content')
3547         if not isinstance(content, dict):
3548             return
3549         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3550         if renderer:
3551             # TODO: add support for nested playlists so each shelf is processed
3552             # as separate playlist
3553             # TODO: this includes only first N items
3554             for entry in self._grid_entries(renderer):
3555                 yield entry
3556         renderer = content.get('horizontalListRenderer')
3557         if renderer:
3558             # TODO
3559             pass
3560
3561     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3562         ep = try_get(
3563             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3564             compat_str)
3565         shelf_url = urljoin('https://www.youtube.com', ep)
3566         if shelf_url:
3567             # Skipping links to another channels, note that checking for
3568             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3569             # will not work
3570             if skip_channels and '/channels?' in shelf_url:
3571                 return
3572             title = try_get(
3573                 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3574             yield self.url_result(shelf_url, video_title=title)
3575         # Shelf may not contain shelf URL, fallback to extraction from content
3576         for entry in self._shelf_entries_from_content(shelf_renderer):
3577             yield entry
3578
3579     def _playlist_entries(self, video_list_renderer):
3580         for content in video_list_renderer['contents']:
3581             if not isinstance(content, dict):
3582                 continue
3583             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3584             if not isinstance(renderer, dict):
3585                 continue
3586             video_id = renderer.get('videoId')
3587             if not video_id:
3588                 continue
3589             yield self._extract_video(renderer)
3590
3591     def _rich_entries(self, rich_grid_renderer):
3592         renderer = try_get(
3593             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3594         video_id = renderer.get('videoId')
3595         if not video_id:
3596             return
3597         yield self._extract_video(renderer)
3598
3599     def _video_entry(self, video_renderer):
3600         video_id = video_renderer.get('videoId')
3601         if video_id:
3602             return self._extract_video(video_renderer)
3603
3604     def _post_thread_entries(self, post_thread_renderer):
3605         post_renderer = try_get(
3606             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3607         if not post_renderer:
3608             return
3609         # video attachment
3610         video_renderer = try_get(
3611             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3612         video_id = video_renderer.get('videoId')
3613         if video_id:
3614             entry = self._extract_video(video_renderer)
3615             if entry:
3616                 yield entry
3617         # playlist attachment
3618         playlist_id = try_get(
3619             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3620         if playlist_id:
3621             yield self.url_result(
3622                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3623                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3624         # inline video links
3625         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3626         for run in runs:
3627             if not isinstance(run, dict):
3628                 continue
3629             ep_url = try_get(
3630                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3631             if not ep_url:
3632                 continue
3633             if not YoutubeIE.suitable(ep_url):
3634                 continue
3635             ep_video_id = YoutubeIE._match_id(ep_url)
3636             if video_id == ep_video_id:
3637                 continue
3638             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3639
3640     def _post_thread_continuation_entries(self, post_thread_continuation):
3641         contents = post_thread_continuation.get('contents')
3642         if not isinstance(contents, list):
3643             return
3644         for content in contents:
3645             renderer = content.get('backstagePostThreadRenderer')
3646             if not isinstance(renderer, dict):
3647                 continue
3648             for entry in self._post_thread_entries(renderer):
3649                 yield entry
3650
3651     r''' # unused
3652     def _rich_grid_entries(self, contents):
3653         for content in contents:
3654             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3655             if video_renderer:
3656                 entry = self._video_entry(video_renderer)
3657                 if entry:
3658                     yield entry
3659     '''
3660     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3661
3662         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3663             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3664             for content in contents:
3665                 if not isinstance(content, dict):
3666                     continue
3667                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3668                 if not is_renderer:
3669                     renderer = content.get('richItemRenderer')
3670                     if renderer:
3671                         for entry in self._rich_entries(renderer):
3672                             yield entry
3673                         continuation_list[0] = self._extract_continuation(parent_renderer)
3674                     continue
3675                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3676                 for isr_content in isr_contents:
3677                     if not isinstance(isr_content, dict):
3678                         continue
3679
3680                     known_renderers = {
3681                         'playlistVideoListRenderer': self._playlist_entries,
3682                         'gridRenderer': self._grid_entries,
3683                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3684                         'backstagePostThreadRenderer': self._post_thread_entries,
3685                         'videoRenderer': lambda x: [self._video_entry(x)],
3686                     }
3687                     for key, renderer in isr_content.items():
3688                         if key not in known_renderers:
3689                             continue
3690                         for entry in known_renderers[key](renderer):
3691                             if entry:
3692                                 yield entry
3693                         continuation_list[0] = self._extract_continuation(renderer)
3694                         break
3695
3696                 if not continuation_list[0]:
3697                     continuation_list[0] = self._extract_continuation(is_renderer)
3698
3699             if not continuation_list[0]:
3700                 continuation_list[0] = self._extract_continuation(parent_renderer)
3701
3702         continuation_list = [None]  # Python 2 doesnot support nonlocal
3703         tab_content = try_get(tab, lambda x: x['content'], dict)
3704         if not tab_content:
3705             return
3706         parent_renderer = (
3707             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3708             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3709         for entry in extract_entries(parent_renderer):
3710             yield entry
3711         continuation = continuation_list[0]
3712         context = self._extract_context(ytcfg)
3713         visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
3714
3715         for page_num in itertools.count(1):
3716             if not continuation:
3717                 break
3718             query = {
3719                 'continuation': continuation['continuation'],
3720                 'clickTracking': {'clickTrackingParams': continuation['itct']}
3721             }
3722             headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3723             response = self._extract_response(
3724                 item_id='%s page %s' % (item_id, page_num),
3725                 query=query, headers=headers, ytcfg=ytcfg,
3726                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3727
3728             if not response:
3729                 break
3730             visitor_data = try_get(
3731                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3732
3733             known_continuation_renderers = {
3734                 'playlistVideoListContinuation': self._playlist_entries,
3735                 'gridContinuation': self._grid_entries,
3736                 'itemSectionContinuation': self._post_thread_continuation_entries,
3737                 'sectionListContinuation': extract_entries,  # for feeds
3738             }
3739             continuation_contents = try_get(
3740                 response, lambda x: x['continuationContents'], dict) or {}
3741             continuation_renderer = None
3742             for key, value in continuation_contents.items():
3743                 if key not in known_continuation_renderers:
3744                     continue
3745                 continuation_renderer = value
3746                 continuation_list = [None]
3747                 for entry in known_continuation_renderers[key](continuation_renderer):
3748                     yield entry
3749                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3750                 break
3751             if continuation_renderer:
3752                 continue
3753
3754             known_renderers = {
3755                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3756                 'gridVideoRenderer': (self._grid_entries, 'items'),
3757                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3758                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3759                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3760                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3761             }
3762             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3763             continuation_items = try_get(
3764                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3765             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3766             video_items_renderer = None
3767             for key, value in continuation_item.items():
3768                 if key not in known_renderers:
3769                     continue
3770                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3771                 continuation_list = [None]
3772                 for entry in known_renderers[key][0](video_items_renderer):
3773                     yield entry
3774                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3775                 break
3776             if video_items_renderer:
3777                 continue
3778             break
3779
3780     @staticmethod
3781     def _extract_selected_tab(tabs):
3782         for tab in tabs:
3783             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3784             if renderer.get('selected') is True:
3785                 return renderer
3786         else:
3787             raise ExtractorError('Unable to find selected tab')
3788
3789     @classmethod
3790     def _extract_uploader(cls, data):
3791         uploader = {}
3792         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3793         owner = try_get(
3794             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3795         if owner:
3796             uploader['uploader'] = owner.get('text')
3797             uploader['uploader_id'] = try_get(
3798                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3799             uploader['uploader_url'] = urljoin(
3800                 'https://www.youtube.com/',
3801                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3802         return {k: v for k, v in uploader.items() if v is not None}
3803
3804     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3805         playlist_id = title = description = channel_url = channel_name = channel_id = None
3806         thumbnails_list = tags = []
3807
3808         selected_tab = self._extract_selected_tab(tabs)
3809         renderer = try_get(
3810             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3811         if renderer:
3812             channel_name = renderer.get('title')
3813             channel_url = renderer.get('channelUrl')
3814             channel_id = renderer.get('externalId')
3815         else:
3816             renderer = try_get(
3817                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3818
3819         if renderer:
3820             title = renderer.get('title')
3821             description = renderer.get('description', '')
3822             playlist_id = channel_id
3823             tags = renderer.get('keywords', '').split()
3824             thumbnails_list = (
3825                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3826                 or try_get(
3827                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3828                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3829                     list)
3830                 or [])
3831
3832         thumbnails = []
3833         for t in thumbnails_list:
3834             if not isinstance(t, dict):
3835                 continue
3836             thumbnail_url = url_or_none(t.get('url'))
3837             if not thumbnail_url:
3838                 continue
3839             thumbnails.append({
3840                 'url': thumbnail_url,
3841                 'width': int_or_none(t.get('width')),
3842                 'height': int_or_none(t.get('height')),
3843             })
3844         if playlist_id is None:
3845             playlist_id = item_id
3846         if title is None:
3847             title = (
3848                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3849                 or playlist_id)
3850         title += format_field(selected_tab, 'title', ' - %s')
3851         title += format_field(selected_tab, 'expandedText', ' - %s')
3852         metadata = {
3853             'playlist_id': playlist_id,
3854             'playlist_title': title,
3855             'playlist_description': description,
3856             'uploader': channel_name,
3857             'uploader_id': channel_id,
3858             'uploader_url': channel_url,
3859             'thumbnails': thumbnails,
3860             'tags': tags,
3861         }
3862         availability = self._extract_availability(data)
3863         if availability:
3864             metadata['availability'] = availability
3865         if not channel_id:
3866             metadata.update(self._extract_uploader(data))
3867         metadata.update({
3868             'channel': metadata['uploader'],
3869             'channel_id': metadata['uploader_id'],
3870             'channel_url': metadata['uploader_url']})
3871         return self.playlist_result(
3872             self._entries(
3873                 selected_tab, playlist_id,
3874                 self._extract_identity_token(webpage, item_id),
3875                 self._extract_account_syncid(data),
3876                 self._extract_ytcfg(item_id, webpage)),
3877             **metadata)
3878
3879     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3880         first_id = last_id = None
3881         ytcfg = self._extract_ytcfg(playlist_id, webpage)
3882         headers = self._generate_api_headers(
3883             ytcfg, account_syncid=self._extract_account_syncid(data),
3884             identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3885             visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3886         for page_num in itertools.count(1):
3887             videos = list(self._playlist_entries(playlist))
3888             if not videos:
3889                 return
3890             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3891             if start >= len(videos):
3892                 return
3893             for video in videos[start:]:
3894                 if video['id'] == first_id:
3895                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3896                     return
3897                 yield video
3898             first_id = first_id or videos[0]['id']
3899             last_id = videos[-1]['id']
3900             watch_endpoint = try_get(
3901                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3902             query = {
3903                 'playlistId': playlist_id,
3904                 'videoId': watch_endpoint.get('videoId') or last_id,
3905                 'index': watch_endpoint.get('index') or len(videos),
3906                 'params': watch_endpoint.get('params') or 'OAE%3D'
3907             }
3908             response = self._extract_response(
3909                 item_id='%s page %d' % (playlist_id, page_num),
3910                 query=query,
3911                 ep='next',
3912                 headers=headers,
3913                 check_get_keys='contents'
3914             )
3915             playlist = try_get(
3916                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3917
3918     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3919         title = playlist.get('title') or try_get(
3920             data, lambda x: x['titleText']['simpleText'], compat_str)
3921         playlist_id = playlist.get('playlistId') or item_id
3922
3923         # Delegating everything except mix playlists to regular tab-based playlist URL
3924         playlist_url = urljoin(url, try_get(
3925             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3926             compat_str))
3927         if playlist_url and playlist_url != url:
3928             return self.url_result(
3929                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3930                 video_title=title)
3931
3932         return self.playlist_result(
3933             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
3934             playlist_id=playlist_id, playlist_title=title)
3935
3936     def _extract_availability(self, data):
3937         """
3938         Gets the availability of a given playlist/tab.
3939         Note: Unless YouTube tells us explicitly, we do not assume it is public
3940         @param data: response
3941         """
3942         is_private = is_unlisted = None
3943         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3944         badge_labels = self._extract_badges(renderer)
3945
3946         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3947         privacy_dropdown_entries = try_get(
3948             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3949         for renderer_dict in privacy_dropdown_entries:
3950             is_selected = try_get(
3951                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3952             if not is_selected:
3953                 continue
3954             label = self._join_text_entries(
3955                 try_get(renderer_dict, lambda x: x['privacyDropdownItemRenderer']['label']['runs'], list) or [])
3956             if label:
3957                 badge_labels.add(label.lower())
3958                 break
3959
3960         for badge_label in badge_labels:
3961             if badge_label == 'unlisted':
3962                 is_unlisted = True
3963             elif badge_label == 'private':
3964                 is_private = True
3965             elif badge_label == 'public':
3966                 is_unlisted = is_private = False
3967         return self._availability(is_private, False, False, False, is_unlisted)
3968
3969     @staticmethod
3970     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3971         sidebar_renderer = try_get(
3972             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3973         for item in sidebar_renderer:
3974             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3975             if renderer:
3976                 return renderer
3977
3978     def _reload_with_unavailable_videos(self, item_id, data, webpage):
3979         """
3980         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3981         """
3982         browse_id = params = None
3983         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3984         if not renderer:
3985             return
3986         menu_renderer = try_get(
3987             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3988         for menu_item in menu_renderer:
3989             if not isinstance(menu_item, dict):
3990                 continue
3991             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3992             text = try_get(
3993                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3994             if not text or text.lower() != 'show unavailable videos':
3995                 continue
3996             browse_endpoint = try_get(
3997                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3998             browse_id = browse_endpoint.get('browseId')
3999             params = browse_endpoint.get('params')
4000             break
4001
4002         ytcfg = self._extract_ytcfg(item_id, webpage)
4003         headers = self._generate_api_headers(
4004             ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
4005             identity_token=self._extract_identity_token(webpage, item_id=item_id),
4006             visitor_data=try_get(
4007                 self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
4008         query = {
4009             'params': params or 'wgYCCAA=',
4010             'browseId': browse_id or 'VL%s' % item_id
4011         }
4012         return self._extract_response(
4013             item_id=item_id, headers=headers, query=query,
4014             check_get_keys='contents', fatal=False,
4015             note='Downloading API JSON with unavailable videos')
4016
4017     def _extract_webpage(self, url, item_id):
4018         retries = self.get_param('extractor_retries', 3)
4019         count = -1
4020         last_error = 'Incomplete yt initial data recieved'
4021         while count < retries:
4022             count += 1
4023             # Sometimes youtube returns a webpage with incomplete ytInitialData
4024             # See: https://github.com/yt-dlp/yt-dlp/issues/116
4025             if count:
4026                 self.report_warning('%s. Retrying ...' % last_error)
4027             webpage = self._download_webpage(
4028                 url, item_id,
4029                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
4030             data = self._extract_yt_initial_data(item_id, webpage)
4031             if data.get('contents') or data.get('currentVideoEndpoint'):
4032                 break
4033             # Extract alerts here only when there is error
4034             self._extract_and_report_alerts(data)
4035             if count >= retries:
4036                 raise ExtractorError(last_error)
4037         return webpage, data
4038
4039     @staticmethod
4040     def _smuggle_data(entries, data):
4041         for entry in entries:
4042             if data:
4043                 entry['url'] = smuggle_url(entry['url'], data)
4044             yield entry
4045
4046     def _real_extract(self, url):
4047         url, smuggled_data = unsmuggle_url(url, {})
4048         if self.is_music_url(url):
4049             smuggled_data['is_music_url'] = True
4050         info_dict = self.__real_extract(url, smuggled_data)
4051         if info_dict.get('entries'):
4052             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4053         return info_dict
4054
4055     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4056
4057     def __real_extract(self, url, smuggled_data):
4058         item_id = self._match_id(url)
4059         url = compat_urlparse.urlunparse(
4060             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4061         compat_opts = self.get_param('compat_opts', [])
4062
4063         def get_mobj(url):
4064             mobj = self._url_re.match(url).groupdict()
4065             mobj.update((k, '') for k, v in mobj.items() if v is None)
4066             return mobj
4067
4068         mobj = get_mobj(url)
4069         # Youtube returns incomplete data if tabname is not lower case
4070         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4071
4072         if is_channel:
4073             if smuggled_data.get('is_music_url'):
4074                 if item_id[:2] == 'VL':
4075                     # Youtube music VL channels have an equivalent playlist
4076                     item_id = item_id[2:]
4077                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4078                 elif item_id[:2] == 'MP':
4079                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4080                     item_id = self._search_regex(
4081                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4082                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4083                         'playlist id')
4084                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4085                 elif mobj['channel_type'] == 'browse':
4086                     # Youtube music /browse/ should be changed to /channel/
4087                     pre = 'https://www.youtube.com/channel/%s' % item_id
4088         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4089             # Home URLs should redirect to /videos/
4090             self.report_warning(
4091                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4092                 'To download only the videos in the home page, add a "/featured" to the URL')
4093             tab = '/videos'
4094
4095         url = ''.join((pre, tab, post))
4096         mobj = get_mobj(url)
4097
4098         # Handle both video/playlist URLs
4099         qs = parse_qs(url)
4100         video_id = qs.get('v', [None])[0]
4101         playlist_id = qs.get('list', [None])[0]
4102
4103         if not video_id and mobj['not_channel'].startswith('watch'):
4104             if not playlist_id:
4105                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4106                 raise ExtractorError('Unable to recognize tab page')
4107             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4108             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4109             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4110             mobj = get_mobj(url)
4111
4112         if video_id and playlist_id:
4113             if self.get_param('noplaylist'):
4114                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4115                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4116             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4117
4118         webpage, data = self._extract_webpage(url, item_id)
4119
4120         tabs = try_get(
4121             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4122         if tabs:
4123             selected_tab = self._extract_selected_tab(tabs)
4124             tab_name = selected_tab.get('title', '')
4125             if 'no-youtube-channel-redirect' not in compat_opts:
4126                 if mobj['tab'] == '/live':
4127                     # Live tab should have redirected to the video
4128                     raise ExtractorError('The channel is not currently live', expected=True)
4129                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4130                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4131                         # Topic channels don't have /videos. Use the equivalent playlist instead
4132                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4133                         pl_id = 'UU%s' % item_id[2:]
4134                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4135                         try:
4136                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4137                             for alert_type, alert_message in self._extract_alerts(pl_data):
4138                                 if alert_type == 'error':
4139                                     raise ExtractorError('Youtube said: %s' % alert_message)
4140                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4141                         except ExtractorError:
4142                             self.report_warning('The playlist gave error. Falling back to channel URL')
4143                     else:
4144                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4145
4146         self.write_debug('Final URL: %s' % url)
4147
4148         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4149         if 'no-youtube-unavailable-videos' not in compat_opts:
4150             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4151         self._extract_and_report_alerts(data)
4152         tabs = try_get(
4153             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4154         if tabs:
4155             return self._extract_from_tabs(item_id, webpage, data, tabs)
4156
4157         playlist = try_get(
4158             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4159         if playlist:
4160             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4161
4162         video_id = try_get(
4163             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4164             compat_str) or video_id
4165         if video_id:
4166             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4167                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4168             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4169
4170         raise ExtractorError('Unable to recognize tab page')
4171
4172
4173 class YoutubePlaylistIE(InfoExtractor):
4174     IE_DESC = 'YouTube.com playlists'
4175     _VALID_URL = r'''(?x)(?:
4176                         (?:https?://)?
4177                         (?:\w+\.)?
4178                         (?:
4179                             (?:
4180                                 youtube(?:kids)?\.com|
4181                                 invidio\.us
4182                             )
4183                             /.*?\?.*?\blist=
4184                         )?
4185                         (?P<id>%(playlist_id)s)
4186                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4187     IE_NAME = 'youtube:playlist'
4188     _TESTS = [{
4189         'note': 'issue #673',
4190         'url': 'PLBB231211A4F62143',
4191         'info_dict': {
4192             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4193             'id': 'PLBB231211A4F62143',
4194             'uploader': 'Wickydoo',
4195             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4196         },
4197         'playlist_mincount': 29,
4198     }, {
4199         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4200         'info_dict': {
4201             'title': 'YDL_safe_search',
4202             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4203         },
4204         'playlist_count': 2,
4205         'skip': 'This playlist is private',
4206     }, {
4207         'note': 'embedded',
4208         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4209         'playlist_count': 4,
4210         'info_dict': {
4211             'title': 'JODA15',
4212             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4213             'uploader': 'milan',
4214             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4215         }
4216     }, {
4217         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4218         'playlist_mincount': 982,
4219         'info_dict': {
4220             'title': '2018 Chinese New Singles (11/6 updated)',
4221             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4222             'uploader': 'LBK',
4223             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4224         }
4225     }, {
4226         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4227         'only_matching': True,
4228     }, {
4229         # music album playlist
4230         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4231         'only_matching': True,
4232     }]
4233
4234     @classmethod
4235     def suitable(cls, url):
4236         if YoutubeTabIE.suitable(url):
4237             return False
4238         # Hack for lazy extractors until more generic solution is implemented
4239         # (see #28780)
4240         from .youtube import parse_qs
4241         qs = parse_qs(url)
4242         if qs.get('v', [None])[0]:
4243             return False
4244         return super(YoutubePlaylistIE, cls).suitable(url)
4245
4246     def _real_extract(self, url):
4247         playlist_id = self._match_id(url)
4248         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4249         url = update_url_query(
4250             'https://www.youtube.com/playlist',
4251             parse_qs(url) or {'list': playlist_id})
4252         if is_music_url:
4253             url = smuggle_url(url, {'is_music_url': True})
4254         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4255
4256
4257 class YoutubeYtBeIE(InfoExtractor):
4258     IE_DESC = 'youtu.be'
4259     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4260     _TESTS = [{
4261         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4262         'info_dict': {
4263             'id': 'yeWKywCrFtk',
4264             'ext': 'mp4',
4265             'title': 'Small Scale Baler and Braiding Rugs',
4266             'uploader': 'Backus-Page House Museum',
4267             'uploader_id': 'backuspagemuseum',
4268             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4269             'upload_date': '20161008',
4270             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4271             'categories': ['Nonprofits & Activism'],
4272             'tags': list,
4273             'like_count': int,
4274             'dislike_count': int,
4275         },
4276         'params': {
4277             'noplaylist': True,
4278             'skip_download': True,
4279         },
4280     }, {
4281         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4282         'only_matching': True,
4283     }]
4284
4285     def _real_extract(self, url):
4286         mobj = re.match(self._VALID_URL, url)
4287         video_id = mobj.group('id')
4288         playlist_id = mobj.group('playlist_id')
4289         return self.url_result(
4290             update_url_query('https://www.youtube.com/watch', {
4291                 'v': video_id,
4292                 'list': playlist_id,
4293                 'feature': 'youtu.be',
4294             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4295
4296
4297 class YoutubeYtUserIE(InfoExtractor):
4298     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4299     _VALID_URL = r'ytuser:(?P<id>.+)'
4300     _TESTS = [{
4301         'url': 'ytuser:phihag',
4302         'only_matching': True,
4303     }]
4304
4305     def _real_extract(self, url):
4306         user_id = self._match_id(url)
4307         return self.url_result(
4308             'https://www.youtube.com/user/%s' % user_id,
4309             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4310
4311
4312 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4313     IE_NAME = 'youtube:favorites'
4314     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4315     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4316     _LOGIN_REQUIRED = True
4317     _TESTS = [{
4318         'url': ':ytfav',
4319         'only_matching': True,
4320     }, {
4321         'url': ':ytfavorites',
4322         'only_matching': True,
4323     }]
4324
4325     def _real_extract(self, url):
4326         return self.url_result(
4327             'https://www.youtube.com/playlist?list=LL',
4328             ie=YoutubeTabIE.ie_key())
4329
4330
4331 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4332     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4333     # there doesn't appear to be a real limit, for example if you search for
4334     # 'python' you get more than 8.000.000 results
4335     _MAX_RESULTS = float('inf')
4336     IE_NAME = 'youtube:search'
4337     _SEARCH_KEY = 'ytsearch'
4338     _SEARCH_PARAMS = None
4339     _TESTS = []
4340
4341     def _entries(self, query, n):
4342         data = {'query': query}
4343         if self._SEARCH_PARAMS:
4344             data['params'] = self._SEARCH_PARAMS
4345         total = 0
4346         for page_num in itertools.count(1):
4347             search = self._extract_response(
4348                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4349                 check_get_keys=('contents', 'onResponseReceivedCommands')
4350             )
4351             if not search:
4352                 break
4353             slr_contents = try_get(
4354                 search,
4355                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4356                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4357                 list)
4358             if not slr_contents:
4359                 break
4360
4361             # Youtube sometimes adds promoted content to searches,
4362             # changing the index location of videos and token.
4363             # So we search through all entries till we find them.
4364             continuation_token = None
4365             for slr_content in slr_contents:
4366                 if continuation_token is None:
4367                     continuation_token = try_get(
4368                         slr_content,
4369                         lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4370                         compat_str)
4371
4372                 isr_contents = try_get(
4373                     slr_content,
4374                     lambda x: x['itemSectionRenderer']['contents'],
4375                     list)
4376                 if not isr_contents:
4377                     continue
4378                 for content in isr_contents:
4379                     if not isinstance(content, dict):
4380                         continue
4381                     video = content.get('videoRenderer')
4382                     if not isinstance(video, dict):
4383                         continue
4384                     video_id = video.get('videoId')
4385                     if not video_id:
4386                         continue
4387
4388                     yield self._extract_video(video)
4389                     total += 1
4390                     if total == n:
4391                         return
4392
4393             if not continuation_token:
4394                 break
4395             data['continuation'] = continuation_token
4396
4397     def _get_n_results(self, query, n):
4398         """Get a specified number of results for a query"""
4399         return self.playlist_result(self._entries(query, n), query)
4400
4401
4402 class YoutubeSearchDateIE(YoutubeSearchIE):
4403     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4404     _SEARCH_KEY = 'ytsearchdate'
4405     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4406     _SEARCH_PARAMS = 'CAI%3D'
4407
4408
4409 class YoutubeSearchURLIE(YoutubeSearchIE):
4410     IE_DESC = 'YouTube.com search URLs'
4411     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4412     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4413     # _MAX_RESULTS = 100
4414     _TESTS = [{
4415         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4416         'playlist_mincount': 5,
4417         'info_dict': {
4418             'title': 'youtube-dl test video',
4419         }
4420     }, {
4421         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4422         'only_matching': True,
4423     }]
4424
4425     @classmethod
4426     def _make_valid_url(cls):
4427         return cls._VALID_URL
4428
4429     def _real_extract(self, url):
4430         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4431         query = (qs.get('search_query') or qs.get('q'))[0]
4432         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4433         return self._get_n_results(query, self._MAX_RESULTS)
4434
4435
4436 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4437     """
4438     Base class for feed extractors
4439     Subclasses must define the _FEED_NAME property.
4440     """
4441     _LOGIN_REQUIRED = True
4442     _TESTS = []
4443
4444     @property
4445     def IE_NAME(self):
4446         return 'youtube:%s' % self._FEED_NAME
4447
4448     def _real_extract(self, url):
4449         return self.url_result(
4450             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4451             ie=YoutubeTabIE.ie_key())
4452
4453
4454 class YoutubeWatchLaterIE(InfoExtractor):
4455     IE_NAME = 'youtube:watchlater'
4456     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4457     _VALID_URL = r':ytwatchlater'
4458     _TESTS = [{
4459         'url': ':ytwatchlater',
4460         'only_matching': True,
4461     }]
4462
4463     def _real_extract(self, url):
4464         return self.url_result(
4465             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4466
4467
4468 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4469     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4470     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4471     _FEED_NAME = 'recommended'
4472     _LOGIN_REQUIRED = False
4473     _TESTS = [{
4474         'url': ':ytrec',
4475         'only_matching': True,
4476     }, {
4477         'url': ':ytrecommended',
4478         'only_matching': True,
4479     }, {
4480         'url': 'https://youtube.com',
4481         'only_matching': True,
4482     }]
4483
4484
4485 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4486     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4487     _VALID_URL = r':ytsub(?:scription)?s?'
4488     _FEED_NAME = 'subscriptions'
4489     _TESTS = [{
4490         'url': ':ytsubs',
4491         'only_matching': True,
4492     }, {
4493         'url': ':ytsubscriptions',
4494         'only_matching': True,
4495     }]
4496
4497
4498 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4499     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4500     _VALID_URL = r':ythis(?:tory)?'
4501     _FEED_NAME = 'history'
4502     _TESTS = [{
4503         'url': ':ythistory',
4504         'only_matching': True,
4505     }]
4506
4507
4508 class YoutubeTruncatedURLIE(InfoExtractor):
4509     IE_NAME = 'youtube:truncated_url'
4510     IE_DESC = False  # Do not list
4511     _VALID_URL = r'''(?x)
4512         (?:https?://)?
4513         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4514         (?:watch\?(?:
4515             feature=[a-z_]+|
4516             annotation_id=annotation_[^&]+|
4517             x-yt-cl=[0-9]+|
4518             hl=[^&]*|
4519             t=[0-9]+
4520         )?
4521         |
4522             attribution_link\?a=[^&]+
4523         )
4524         $
4525     '''
4526
4527     _TESTS = [{
4528         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4529         'only_matching': True,
4530     }, {
4531         'url': 'https://www.youtube.com/watch?',
4532         'only_matching': True,
4533     }, {
4534         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4535         'only_matching': True,
4536     }, {
4537         'url': 'https://www.youtube.com/watch?feature=foo',
4538         'only_matching': True,
4539     }, {
4540         'url': 'https://www.youtube.com/watch?hl=en-GB',
4541         'only_matching': True,
4542     }, {
4543         'url': 'https://www.youtube.com/watch?t=2372',
4544         'only_matching': True,
4545     }]
4546
4547     def _real_extract(self, url):
4548         raise ExtractorError(
4549             'Did you forget to quote the URL? Remember that & is a meta '
4550             'character in most shells, so you want to put the URL in quotes, '
4551             'like  youtube-dl '
4552             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4553             ' or simply  youtube-dl BaW_jenozKc  .',
4554             expected=True)
4555
4556
4557 class YoutubeTruncatedIDIE(InfoExtractor):
4558     IE_NAME = 'youtube:truncated_id'
4559     IE_DESC = False  # Do not list
4560     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4561
4562     _TESTS = [{
4563         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4564         'only_matching': True,
4565     }]
4566
4567     def _real_extract(self, url):
4568         video_id = self._match_id(url)
4569         raise ExtractorError(
4570             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4571             expected=True)