yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import hashlib
   9 import itertools
  10 import json
  11 import os.path
  12 import random
  13 import re
  14 import time
  15 import traceback
  16
  17 from .common import InfoExtractor, SearchInfoExtractor
  18 from ..compat import (
  19     compat_chr,
  20     compat_HTTPError,
  21     compat_parse_qs,
  22     compat_str,
  23     compat_urllib_parse_unquote_plus,
  24     compat_urllib_parse_urlencode,
  25     compat_urllib_parse_urlparse,
  26     compat_urlparse,
  27 )
  28 from ..jsinterp import JSInterpreter
  29 from ..utils import (
  30     bool_or_none,
  31     bytes_to_intlist,
  32     clean_html,
  33     dict_get,
  34     datetime_from_str,
  35     error_to_compat_str,
  36     ExtractorError,
  37     format_field,
  38     float_or_none,
  39     int_or_none,
  40     intlist_to_bytes,
  41     mimetype2ext,
  42     parse_codecs,
  43     parse_count,
  44     parse_duration,
  45     qualities,
  46     remove_start,
  47     smuggle_url,
  48     str_or_none,
  49     str_to_int,
  50     try_get,
  51     unescapeHTML,
  52     unified_strdate,
  53     unsmuggle_url,
  54     update_url_query,
  55     url_or_none,
  56     urlencode_postdata,
  57     urljoin
  58 )
  59
  60
  61 def parse_qs(url):
  62     return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  63
  64
  65 class YoutubeBaseInfoExtractor(InfoExtractor):
  66     """Provide base functions for Youtube extractors"""
  67     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
  68     _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
  69
  70     _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
  71     _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
  72     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
  73
  74     _RESERVED_NAMES = (
  75         r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|'
  76         r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|'
  77         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
  78
  79     _NETRC_MACHINE = 'youtube'
  80     # If True it will raise an error if no login info is provided
  81     _LOGIN_REQUIRED = False
  82
  83     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  84
  85     def _login(self):
  86         """
  87         Attempt to log in to YouTube.
  88         True is returned if successful or skipped.
  89         False is returned if login failed.
  90
  91         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
  92         """
  93
  94         def warn(message):
  95             self.report_warning(message)
  96
  97         # username+password login is broken
  98         if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
  99             self.raise_login_required(
 100                 'Login details are needed to download this content', method='cookies')
 101         username, password = self._get_login_info()
 102         if username:
 103             warn('Logging in using username and password is broken. %s' % self._LOGIN_HINTS['cookies'])
 104         return
 105
 106         # Everything below this is broken!
 107         r'''
 108         # No authentication to be performed
 109         if username is None:
 110             if self._LOGIN_REQUIRED and self.get_param('cookiefile') is None:
 111                 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
 112             # if self.get_param('cookiefile'):  # TODO remove 'and False' later - too many people using outdated cookies and open issues, remind them.
 113             #     self.to_screen('[Cookies] Reminder - Make sure to always use up to date cookies!')
 114             return True
 115
 116         login_page = self._download_webpage(
 117             self._LOGIN_URL, None,
 118             note='Downloading login page',
 119             errnote='unable to fetch login page', fatal=False)
 120         if login_page is False:
 121             return
 122
 123         login_form = self._hidden_inputs(login_page)
 124
 125         def req(url, f_req, note, errnote):
 126             data = login_form.copy()
 127             data.update({
 128                 'pstMsg': 1,
 129                 'checkConnection': 'youtube',
 130                 'checkedDomains': 'youtube',
 131                 'hl': 'en',
 132                 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
 133                 'f.req': json.dumps(f_req),
 134                 'flowName': 'GlifWebSignIn',
 135                 'flowEntry': 'ServiceLogin',
 136                 # TODO: reverse actual botguard identifier generation algo
 137                 'bgRequest': '["identifier",""]',
 138             })
 139             return self._download_json(
 140                 url, None, note=note, errnote=errnote,
 141                 transform_source=lambda s: re.sub(r'^[^[]*', '', s),
 142                 fatal=False,
 143                 data=urlencode_postdata(data), headers={
 144                     'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
 145                     'Google-Accounts-XSRF': 1,
 146                 })
 147
 148         lookup_req = [
 149             username,
 150             None, [], None, 'US', None, None, 2, False, True,
 151             [
 152                 None, None,
 153                 [2, 1, None, 1,
 154                  'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
 155                  None, [], 4],
 156                 1, [None, None, []], None, None, None, True
 157             ],
 158             username,
 159         ]
 160
 161         lookup_results = req(
 162             self._LOOKUP_URL, lookup_req,
 163             'Looking up account info', 'Unable to look up account info')
 164
 165         if lookup_results is False:
 166             return False
 167
 168         user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
 169         if not user_hash:
 170             warn('Unable to extract user hash')
 171             return False
 172
 173         challenge_req = [
 174             user_hash,
 175             None, 1, None, [1, None, None, None, [password, None, True]],
 176             [
 177                 None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
 178                 1, [None, None, []], None, None, None, True
 179             ]]
 180
 181         challenge_results = req(
 182             self._CHALLENGE_URL, challenge_req,
 183             'Logging in', 'Unable to log in')
 184
 185         if challenge_results is False:
 186             return
 187
 188         login_res = try_get(challenge_results, lambda x: x[0][5], list)
 189         if login_res:
 190             login_msg = try_get(login_res, lambda x: x[5], compat_str)
 191             warn(
 192                 'Unable to login: %s' % 'Invalid password'
 193                 if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
 194             return False
 195
 196         res = try_get(challenge_results, lambda x: x[0][-1], list)
 197         if not res:
 198             warn('Unable to extract result entry')
 199             return False
 200
 201         login_challenge = try_get(res, lambda x: x[0][0], list)
 202         if login_challenge:
 203             challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
 204             if challenge_str == 'TWO_STEP_VERIFICATION':
 205                 # SEND_SUCCESS - TFA code has been successfully sent to phone
 206                 # QUOTA_EXCEEDED - reached the limit of TFA codes
 207                 status = try_get(login_challenge, lambda x: x[5], compat_str)
 208                 if status == 'QUOTA_EXCEEDED':
 209                     warn('Exceeded the limit of TFA codes, try later')
 210                     return False
 211
 212                 tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
 213                 if not tl:
 214                     warn('Unable to extract TL')
 215                     return False
 216
 217                 tfa_code = self._get_tfa_info('2-step verification code')
 218
 219                 if not tfa_code:
 220                     warn(
 221                         'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
 222                         '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
 223                     return False
 224
 225                 tfa_code = remove_start(tfa_code, 'G-')
 226
 227                 tfa_req = [
 228                     user_hash, None, 2, None,
 229                     [
 230                         9, None, None, None, None, None, None, None,
 231                         [None, tfa_code, True, 2]
 232                     ]]
 233
 234                 tfa_results = req(
 235                     self._TFA_URL.format(tl), tfa_req,
 236                     'Submitting TFA code', 'Unable to submit TFA code')
 237
 238                 if tfa_results is False:
 239                     return False
 240
 241                 tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
 242                 if tfa_res:
 243                     tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
 244                     warn(
 245                         'Unable to finish TFA: %s' % 'Invalid TFA code'
 246                         if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
 247                     return False
 248
 249                 check_cookie_url = try_get(
 250                     tfa_results, lambda x: x[0][-1][2], compat_str)
 251             else:
 252                 CHALLENGES = {
 253                     'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
 254                     'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
 255                     'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
 256                 }
 257                 challenge = CHALLENGES.get(
 258                     challenge_str,
 259                     '%s returned error %s.' % (self.IE_NAME, challenge_str))
 260                 warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
 261                 return False
 262         else:
 263             check_cookie_url = try_get(res, lambda x: x[2], compat_str)
 264
 265         if not check_cookie_url:
 266             warn('Unable to extract CheckCookie URL')
 267             return False
 268
 269         check_cookie_results = self._download_webpage(
 270             check_cookie_url, None, 'Checking cookie', fatal=False)
 271
 272         if check_cookie_results is False:
 273             return False
 274
 275         if 'https://myaccount.google.com/' not in check_cookie_results:
 276             warn('Unable to log in')
 277             return False
 278
 279         return True
 280         '''
 281
 282     def _initialize_consent(self):
 283         cookies = self._get_cookies('https://www.youtube.com/')
 284         if cookies.get('__Secure-3PSID'):
 285             return
 286         consent_id = None
 287         consent = cookies.get('CONSENT')
 288         if consent:
 289             if 'YES' in consent.value:
 290                 return
 291             consent_id = self._search_regex(
 292                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 293         if not consent_id:
 294             consent_id = random.randint(100, 999)
 295         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 296
 297     def _real_initialize(self):
 298         self._initialize_consent()
 299         if self._downloader is None:
 300             return
 301         if not self._login():
 302             return
 303
 304     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 305     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 306     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 307
 308     _YT_DEFAULT_YTCFGS = {
 309         'WEB': {
 310             'INNERTUBE_API_VERSION': 'v1',
 311             'INNERTUBE_CLIENT_NAME': 'WEB',
 312             'INNERTUBE_CLIENT_VERSION': '2.20210622.10.00',
 313             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 314             'INNERTUBE_CONTEXT': {
 315                 'client': {
 316                     'clientName': 'WEB',
 317                     'clientVersion': '2.20210622.10.00',
 318                     'hl': 'en',
 319                 }
 320             },
 321             'INNERTUBE_CONTEXT_CLIENT_NAME': 1
 322         },
 323         'WEB_REMIX': {
 324             'INNERTUBE_API_VERSION': 'v1',
 325             'INNERTUBE_CLIENT_NAME': 'WEB_REMIX',
 326             'INNERTUBE_CLIENT_VERSION': '1.20210621.00.00',
 327             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 328             'INNERTUBE_CONTEXT': {
 329                 'client': {
 330                     'clientName': 'WEB_REMIX',
 331                     'clientVersion': '1.20210621.00.00',
 332                     'hl': 'en',
 333                 }
 334             },
 335             'INNERTUBE_CONTEXT_CLIENT_NAME': 67
 336         },
 337         'WEB_EMBEDDED_PLAYER': {
 338             'INNERTUBE_API_VERSION': 'v1',
 339             'INNERTUBE_CLIENT_NAME': 'WEB_EMBEDDED_PLAYER',
 340             'INNERTUBE_CLIENT_VERSION': '1.20210620.0.1',
 341             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 342             'INNERTUBE_CONTEXT': {
 343                 'client': {
 344                     'clientName': 'WEB_EMBEDDED_PLAYER',
 345                     'clientVersion': '1.20210620.0.1',
 346                     'hl': 'en',
 347                 }
 348             },
 349             'INNERTUBE_CONTEXT_CLIENT_NAME': 56
 350         },
 351         'ANDROID': {
 352             'INNERTUBE_API_VERSION': 'v1',
 353             'INNERTUBE_CLIENT_NAME': 'ANDROID',
 354             'INNERTUBE_CLIENT_VERSION': '16.20',
 355             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 356             'INNERTUBE_CONTEXT': {
 357                 'client': {
 358                     'clientName': 'ANDROID',
 359                     'clientVersion': '16.20',
 360                     'hl': 'en',
 361                 }
 362             },
 363             'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID'
 364         },
 365         'ANDROID_EMBEDDED_PLAYER': {
 366             'INNERTUBE_API_VERSION': 'v1',
 367             'INNERTUBE_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER',
 368             'INNERTUBE_CLIENT_VERSION': '16.20',
 369             'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 370             'INNERTUBE_CONTEXT': {
 371                 'client': {
 372                     'clientName': 'ANDROID_EMBEDDED_PLAYER',
 373                     'clientVersion': '16.20',
 374                     'hl': 'en',
 375                 }
 376             },
 377             'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_EMBEDDED_PLAYER'
 378         },
 379         'ANDROID_MUSIC': {
 380             'INNERTUBE_API_VERSION': 'v1',
 381             'INNERTUBE_CLIENT_NAME': 'ANDROID_MUSIC',
 382             'INNERTUBE_CLIENT_VERSION': '4.32',
 383             'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 384             'INNERTUBE_CONTEXT': {
 385                 'client': {
 386                     'clientName': 'ANDROID_MUSIC',
 387                     'clientVersion': '4.32',
 388                     'hl': 'en',
 389                 }
 390             },
 391             'INNERTUBE_CONTEXT_CLIENT_NAME': 'ANDROID_MUSIC'
 392         }
 393     }
 394
 395     _YT_DEFAULT_INNERTUBE_HOSTS = {
 396         'DIRECT': 'youtubei.googleapis.com',
 397         'WEB': 'www.youtube.com',
 398         'WEB_REMIX': 'music.youtube.com',
 399         'ANDROID_MUSIC': 'music.youtube.com'
 400     }
 401
 402     def _get_default_ytcfg(self, client='WEB'):
 403         if client in self._YT_DEFAULT_YTCFGS:
 404             return copy.deepcopy(self._YT_DEFAULT_YTCFGS[client])
 405         self.write_debug(f'INNERTUBE default client {client} does not exist - falling back to WEB client.')
 406         return copy.deepcopy(self._YT_DEFAULT_YTCFGS['WEB'])
 407
 408     def _get_innertube_host(self, client='WEB'):
 409         return dict_get(self._YT_DEFAULT_INNERTUBE_HOSTS, (client, 'WEB'))
 410
 411     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='WEB'):
 412         # try_get but with fallback to default ytcfg client values when present
 413         _func = lambda y: try_get(y, getter, expected_type)
 414         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 415
 416     def _extract_client_name(self, ytcfg, default_client='WEB'):
 417         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str, default_client)
 418
 419     def _extract_client_version(self, ytcfg, default_client='WEB'):
 420         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str, default_client)
 421
 422     def _extract_api_key(self, ytcfg=None, default_client='WEB'):
 423         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 424
 425     def _extract_context(self, ytcfg=None, default_client='WEB'):
 426         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 427         context = _get_context(ytcfg)
 428         if context:
 429             return context
 430
 431         context = _get_context(self._get_default_ytcfg(default_client))
 432         if not ytcfg:
 433             return context
 434
 435         # Recreate the client context (required)
 436         context['client'].update({
 437             'clientVersion': self._extract_client_version(ytcfg, default_client),
 438             'clientName': self._extract_client_name(ytcfg, default_client),
 439         })
 440         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 441         if visitor_data:
 442             context['client']['visitorData'] = visitor_data
 443         return context
 444
 445     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 446         # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 447         # See: https://github.com/yt-dlp/yt-dlp/issues/393
 448         yt_cookies = self._get_cookies('https://www.youtube.com')
 449         sapisid_cookie = dict_get(
 450             yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 451         if sapisid_cookie is None:
 452             return
 453         time_now = round(time.time())
 454         # SAPISID cookie is required if not already present
 455         if not yt_cookies.get('SAPISID'):
 456             self._set_cookie(
 457                 '.youtube.com', 'SAPISID', sapisid_cookie.value, secure=True, expire_time=time_now + 3600)
 458         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 459         sapisidhash = hashlib.sha1(
 460             f'{time_now} {sapisid_cookie.value} {origin}'.encode('utf-8')).hexdigest()
 461         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 462
 463     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 464                   note='Downloading API JSON', errnote='Unable to download API page',
 465                   context=None, api_key=None, api_hostname=None, default_client='WEB'):
 466
 467         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 468         data.update(query)
 469         real_headers = self._generate_api_headers(client=default_client)
 470         real_headers.update({'content-type': 'application/json'})
 471         if headers:
 472             real_headers.update(headers)
 473         return self._download_json(
 474             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 475             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 476             data=json.dumps(data).encode('utf8'), headers=real_headers,
 477             query={'key': api_key or self._extract_api_key()})
 478
 479     def _extract_yt_initial_data(self, video_id, webpage):
 480         return self._parse_json(
 481             self._search_regex(
 482                 (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 483                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
 484             video_id)
 485
 486     def _extract_identity_token(self, webpage, item_id):
 487         ytcfg = self._extract_ytcfg(item_id, webpage)
 488         if ytcfg:
 489             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 490             if token:
 491                 return token
 492         return self._search_regex(
 493             r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 494             'identity token', default=None)
 495
 496     @staticmethod
 497     def _extract_account_syncid(data):
 498         """
 499         Extract syncId required to download private playlists of secondary channels
 500         @param data Either response or ytcfg
 501         """
 502         sync_ids = (try_get(
 503             data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 504                    lambda x: x['DATASYNC_ID']), compat_str) or '').split("||")
 505         if len(sync_ids) >= 2 and sync_ids[1]:
 506             # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 507             # and just "user_syncid||" for primary channel. We only want the channel_syncid
 508             return sync_ids[0]
 509         # ytcfg includes channel_syncid if on secondary channel
 510         return data.get('DELEGATED_SESSION_ID')
 511
 512     def _extract_ytcfg(self, video_id, webpage):
 513         if not webpage:
 514             return {}
 515         return self._parse_json(
 516             self._search_regex(
 517                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 518                 default='{}'), video_id, fatal=False) or {}
 519
 520     def _generate_api_headers(self, ytcfg=None, identity_token=None, account_syncid=None,
 521                               visitor_data=None, api_hostname=None, client='WEB'):
 522         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(client))
 523         headers = {
 524             'X-YouTube-Client-Name': compat_str(
 525                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=client)),
 526             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, client),
 527             'Origin': origin
 528         }
 529         if not visitor_data and ytcfg:
 530             visitor_data = try_get(
 531                 self._extract_context(ytcfg, client), lambda x: x['client']['visitorData'], compat_str)
 532         if identity_token:
 533             headers['X-Youtube-Identity-Token'] = identity_token
 534         if account_syncid:
 535             headers['X-Goog-PageId'] = account_syncid
 536             headers['X-Goog-AuthUser'] = 0
 537         if visitor_data:
 538             headers['X-Goog-Visitor-Id'] = visitor_data
 539         auth = self._generate_sapisidhash_header(origin)
 540         if auth is not None:
 541             headers['Authorization'] = auth
 542             headers['X-Origin'] = origin
 543         return headers
 544
 545     @staticmethod
 546     def _build_api_continuation_query(continuation, ctp=None):
 547         query = {
 548             'continuation': continuation
 549         }
 550         # TODO: Inconsistency with clickTrackingParams.
 551         # Currently we have a fixed ctp contained within context (from ytcfg)
 552         # and a ctp in root query for continuation.
 553         if ctp:
 554             query['clickTracking'] = {'clickTrackingParams': ctp}
 555         return query
 556
 557     @classmethod
 558     def _continuation_query_ajax_to_api(cls, continuation_query):
 559         continuation = dict_get(continuation_query, ('continuation', 'ctoken'))
 560         return cls._build_api_continuation_query(continuation, continuation_query.get('itct'))
 561
 562     @staticmethod
 563     def _build_continuation_query(continuation, ctp=None):
 564         query = {
 565             'ctoken': continuation,
 566             'continuation': continuation,
 567         }
 568         if ctp:
 569             query['itct'] = ctp
 570         return query
 571
 572     @classmethod
 573     def _extract_next_continuation_data(cls, renderer):
 574         next_continuation = try_get(
 575             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 576                        lambda x: x['continuation']['reloadContinuationData']), dict)
 577         if not next_continuation:
 578             return
 579         continuation = next_continuation.get('continuation')
 580         if not continuation:
 581             return
 582         ctp = next_continuation.get('clickTrackingParams')
 583         return cls._build_continuation_query(continuation, ctp)
 584
 585     @classmethod
 586     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 587         if isinstance(continuation_ep, dict):
 588             continuation = try_get(
 589                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 590             if not continuation:
 591                 return
 592             ctp = continuation_ep.get('clickTrackingParams')
 593             return cls._build_continuation_query(continuation, ctp)
 594
 595     @classmethod
 596     def _extract_continuation(cls, renderer):
 597         next_continuation = cls._extract_next_continuation_data(renderer)
 598         if next_continuation:
 599             return next_continuation
 600         contents = []
 601         for key in ('contents', 'items'):
 602             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 603         for content in contents:
 604             if not isinstance(content, dict):
 605                 continue
 606             continuation_ep = try_get(
 607                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 608                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 609                 dict)
 610             continuation = cls._extract_continuation_ep_data(continuation_ep)
 611             if continuation:
 612                 return continuation
 613
 614     @staticmethod
 615     def _extract_alerts(data):
 616         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 617             if not isinstance(alert_dict, dict):
 618                 continue
 619             for alert in alert_dict.values():
 620                 alert_type = alert.get('type')
 621                 if not alert_type:
 622                     continue
 623                 message = try_get(alert, lambda x: x['text']['simpleText'], compat_str) or ''
 624                 if message:
 625                     yield alert_type, message
 626                 for run in try_get(alert, lambda x: x['text']['runs'], list) or []:
 627                     message += try_get(run, lambda x: x['text'], compat_str)
 628                 if message:
 629                     yield alert_type, message
 630
 631     def _report_alerts(self, alerts, expected=True):
 632         errors = []
 633         warnings = []
 634         for alert_type, alert_message in alerts:
 635             if alert_type.lower() == 'error':
 636                 errors.append([alert_type, alert_message])
 637             else:
 638                 warnings.append([alert_type, alert_message])
 639
 640         for alert_type, alert_message in (warnings + errors[:-1]):
 641             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message))
 642         if errors:
 643             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 644
 645     def _extract_and_report_alerts(self, data, *args, **kwargs):
 646         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 647
 648     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 649                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 650                           default_client='WEB'):
 651         response = None
 652         last_error = None
 653         count = -1
 654         retries = self.get_param('extractor_retries', 3)
 655         if check_get_keys is None:
 656             check_get_keys = []
 657         while count < retries:
 658             count += 1
 659             if last_error:
 660                 self.report_warning('%s. Retrying ...' % last_error)
 661             try:
 662                 response = self._call_api(
 663                     ep=ep, fatal=True, headers=headers,
 664                     video_id=item_id, query=query,
 665                     context=self._extract_context(ytcfg, default_client),
 666                     api_key=self._extract_api_key(ytcfg, default_client),
 667                     api_hostname=api_hostname, default_client=default_client,
 668                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 669             except ExtractorError as e:
 670                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503, 404):
 671                     # Downloading page may result in intermittent 5xx HTTP error
 672                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 673                     last_error = 'HTTP Error %s' % e.cause.code
 674                     if count < retries:
 675                         continue
 676                 if fatal:
 677                     raise
 678                 else:
 679                     self.report_warning(error_to_compat_str(e))
 680                     return
 681
 682             else:
 683                 # Youtube may send alerts if there was an issue with the continuation page
 684                 try:
 685                     self._extract_and_report_alerts(response, expected=False)
 686                 except ExtractorError as e:
 687                     if fatal:
 688                         raise
 689                     self.report_warning(error_to_compat_str(e))
 690                     return
 691                 if not check_get_keys or dict_get(response, check_get_keys):
 692                     break
 693                 # Youtube sometimes sends incomplete data
 694                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 695                 last_error = 'Incomplete data received'
 696                 if count >= retries:
 697                     if fatal:
 698                         raise ExtractorError(last_error)
 699                     else:
 700                         self.report_warning(last_error)
 701                         return
 702         return response
 703
 704     @staticmethod
 705     def is_music_url(url):
 706         return re.match(r'https?://music\.youtube\.com/', url) is not None
 707
 708     def _extract_video(self, renderer):
 709         video_id = renderer.get('videoId')
 710         title = try_get(
 711             renderer,
 712             (lambda x: x['title']['runs'][0]['text'],
 713              lambda x: x['title']['simpleText']), compat_str)
 714         description = try_get(
 715             renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
 716             compat_str)
 717         duration = parse_duration(try_get(
 718             renderer, lambda x: x['lengthText']['simpleText'], compat_str))
 719         view_count_text = try_get(
 720             renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
 721         view_count = str_to_int(self._search_regex(
 722             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 723             'view count', default=None))
 724         uploader = try_get(
 725             renderer,
 726             (lambda x: x['ownerText']['runs'][0]['text'],
 727              lambda x: x['shortBylineText']['runs'][0]['text']), compat_str)
 728         return {
 729             '_type': 'url',
 730             'ie_key': YoutubeIE.ie_key(),
 731             'id': video_id,
 732             'url': video_id,
 733             'title': title,
 734             'description': description,
 735             'duration': duration,
 736             'view_count': view_count,
 737             'uploader': uploader,
 738         }
 739
 740
 741 class YoutubeIE(YoutubeBaseInfoExtractor):
 742     IE_DESC = 'YouTube.com'
 743     _INVIDIOUS_SITES = (
 744         # invidious-redirect websites
 745         r'(?:www\.)?redirect\.invidious\.io',
 746         r'(?:(?:www|dev)\.)?invidio\.us',
 747         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 748         r'(?:www\.)?invidious\.pussthecat\.org',
 749         r'(?:www\.)?invidious\.zee\.li',
 750         r'(?:www\.)?invidious\.ethibox\.fr',
 751         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 752         # youtube-dl invidious instances list
 753         r'(?:(?:www|no)\.)?invidiou\.sh',
 754         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 755         r'(?:www\.)?invidious\.kabi\.tk',
 756         r'(?:www\.)?invidious\.mastodon\.host',
 757         r'(?:www\.)?invidious\.zapashcanon\.fr',
 758         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 759         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 760         r'(?:www\.)?invidious\.himiko\.cloud',
 761         r'(?:www\.)?invidious\.reallyancient\.tech',
 762         r'(?:www\.)?invidious\.tube',
 763         r'(?:www\.)?invidiou\.site',
 764         r'(?:www\.)?invidious\.site',
 765         r'(?:www\.)?invidious\.xyz',
 766         r'(?:www\.)?invidious\.nixnet\.xyz',
 767         r'(?:www\.)?invidious\.048596\.xyz',
 768         r'(?:www\.)?invidious\.drycat\.fr',
 769         r'(?:www\.)?inv\.skyn3t\.in',
 770         r'(?:www\.)?tube\.poal\.co',
 771         r'(?:www\.)?tube\.connect\.cafe',
 772         r'(?:www\.)?vid\.wxzm\.sx',
 773         r'(?:www\.)?vid\.mint\.lgbt',
 774         r'(?:www\.)?vid\.puffyan\.us',
 775         r'(?:www\.)?yewtu\.be',
 776         r'(?:www\.)?yt\.elukerio\.org',
 777         r'(?:www\.)?yt\.lelux\.fi',
 778         r'(?:www\.)?invidious\.ggc-project\.de',
 779         r'(?:www\.)?yt\.maisputain\.ovh',
 780         r'(?:www\.)?ytprivate\.com',
 781         r'(?:www\.)?invidious\.13ad\.de',
 782         r'(?:www\.)?invidious\.toot\.koeln',
 783         r'(?:www\.)?invidious\.fdn\.fr',
 784         r'(?:www\.)?watch\.nettohikari\.com',
 785         r'(?:www\.)?invidious\.namazso\.eu',
 786         r'(?:www\.)?invidious\.silkky\.cloud',
 787         r'(?:www\.)?invidious\.exonip\.de',
 788         r'(?:www\.)?invidious\.riverside\.rocks',
 789         r'(?:www\.)?invidious\.blamefran\.net',
 790         r'(?:www\.)?invidious\.moomoo\.de',
 791         r'(?:www\.)?ytb\.trom\.tf',
 792         r'(?:www\.)?yt\.cyberhost\.uk',
 793         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 794         r'(?:www\.)?qklhadlycap4cnod\.onion',
 795         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 796         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 797         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 798         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 799         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 800         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 801         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 802         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 803         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 804         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 805     )
 806     _VALID_URL = r"""(?x)^
 807                      (
 808                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 809                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 810                             (?:www\.)?deturl\.com/www\.youtube\.com|
 811                             (?:www\.)?pwnyoutube\.com|
 812                             (?:www\.)?hooktube\.com|
 813                             (?:www\.)?yourepeat\.com|
 814                             tube\.majestyc\.net|
 815                             %(invidious)s|
 816                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 817                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 818                          (?:                                                  # the various things that can precede the ID:
 819                              (?:(?:v|embed|e)/(?!videoseries))                # v/ or embed/ or e/
 820                              |(?:                                             # or the v= param in all its forms
 821                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 822                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 823                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 824                                  v=
 825                              )
 826                          ))
 827                          |(?:
 828                             youtu\.be|                                        # just youtu.be/xxxx
 829                             vid\.plus|                                        # or vid.plus/xxxx
 830                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 831                             %(invidious)s
 832                          )/
 833                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 834                          )
 835                      )?                                                       # all until now is optional -> you can pass the naked ID
 836                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 837                      (?(1).+)?                                                # if we found the ID, everything can follow
 838                      (?:\#|$)""" % {
 839         'invidious': '|'.join(_INVIDIOUS_SITES),
 840     }
 841     _PLAYER_INFO_RE = (
 842         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 843         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 844         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 845     )
 846     _formats = {
 847         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 848         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 849         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 850         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 851         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 852         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 853         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 854         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 855         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 856         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 857         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 858         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 859         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 860         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 861         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 862         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 863         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 864         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 865
 866
 867         # 3D videos
 868         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 869         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 870         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 871         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 872         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 873         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 874         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 875
 876         # Apple HTTP Live Streaming
 877         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 878         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 879         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 880         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 881         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 882         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 883         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 884         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 885
 886         # DASH mp4 video
 887         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 888         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 889         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 890         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 891         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 892         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 893         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 894         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 895         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 896         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 897         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 898         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 899
 900         # Dash mp4 audio
 901         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 902         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 903         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 904         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 905         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 906         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 907         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 908
 909         # Dash webm
 910         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 911         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 912         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 913         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 914         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 915         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 916         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 917         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 918         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 919         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 920         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 921         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 922         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 923         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 924         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 925         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 926         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 927         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 928         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 929         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 930         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 931         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 932
 933         # Dash webm audio
 934         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 935         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 936
 937         # Dash webm audio with opus inside
 938         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 939         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 940         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 941
 942         # RTMP (unnamed)
 943         '_rtmp': {'protocol': 'rtmp'},
 944
 945         # av01 video only formats sometimes served with "unknown" codecs
 946         '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 947         '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 948         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 949         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
 950     }
 951     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 952
 953     _AGE_GATE_REASONS = (
 954         'Sign in to confirm your age',
 955         'This video may be inappropriate for some users.',
 956         'Sorry, this content is age-restricted.')
 957
 958     _GEO_BYPASS = False
 959
 960     IE_NAME = 'youtube'
 961     _TESTS = [
 962         {
 963             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 964             'info_dict': {
 965                 'id': 'BaW_jenozKc',
 966                 'ext': 'mp4',
 967                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 968                 'uploader': 'Philipp Hagemeister',
 969                 'uploader_id': 'phihag',
 970                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 971                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 972                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 973                 'upload_date': '20121002',
 974                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 975                 'categories': ['Science & Technology'],
 976                 'tags': ['youtube-dl'],
 977                 'duration': 10,
 978                 'view_count': int,
 979                 'like_count': int,
 980                 'dislike_count': int,
 981                 'start_time': 1,
 982                 'end_time': 9,
 983             }
 984         },
 985         {
 986             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 987             'note': 'Embed-only video (#1746)',
 988             'info_dict': {
 989                 'id': 'yZIXLfi8CZQ',
 990                 'ext': 'mp4',
 991                 'upload_date': '20120608',
 992                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 993                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 994                 'uploader': 'SET India',
 995                 'uploader_id': 'setindia',
 996                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 997                 'age_limit': 18,
 998             },
 999             'skip': 'Private video',
1000         },
1001         {
1002             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
1003             'note': 'Use the first video ID in the URL',
1004             'info_dict': {
1005                 'id': 'BaW_jenozKc',
1006                 'ext': 'mp4',
1007                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
1008                 'uploader': 'Philipp Hagemeister',
1009                 'uploader_id': 'phihag',
1010                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
1011                 'upload_date': '20121002',
1012                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
1013                 'categories': ['Science & Technology'],
1014                 'tags': ['youtube-dl'],
1015                 'duration': 10,
1016                 'view_count': int,
1017                 'like_count': int,
1018                 'dislike_count': int,
1019             },
1020             'params': {
1021                 'skip_download': True,
1022             },
1023         },
1024         {
1025             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
1026             'note': '256k DASH audio (format 141) via DASH manifest',
1027             'info_dict': {
1028                 'id': 'a9LDPn-MO4I',
1029                 'ext': 'm4a',
1030                 'upload_date': '20121002',
1031                 'uploader_id': '8KVIDEO',
1032                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1033                 'description': '',
1034                 'uploader': '8KVIDEO',
1035                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1036             },
1037             'params': {
1038                 'youtube_include_dash_manifest': True,
1039                 'format': '141',
1040             },
1041             'skip': 'format 141 not served anymore',
1042         },
1043         # DASH manifest with encrypted signature
1044         {
1045             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1046             'info_dict': {
1047                 'id': 'IB3lcPjvWLA',
1048                 'ext': 'm4a',
1049                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1050                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1051                 'duration': 244,
1052                 'uploader': 'AfrojackVEVO',
1053                 'uploader_id': 'AfrojackVEVO',
1054                 'upload_date': '20131011',
1055                 'abr': 129.495,
1056             },
1057             'params': {
1058                 'youtube_include_dash_manifest': True,
1059                 'format': '141/bestaudio[ext=m4a]',
1060             },
1061         },
1062         # Controversy video
1063         {
1064             'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
1065             'info_dict': {
1066                 'id': 'T4XJQO3qol8',
1067                 'ext': 'mp4',
1068                 'duration': 219,
1069                 'upload_date': '20100909',
1070                 'uploader': 'Amazing Atheist',
1071                 'uploader_id': 'TheAmazingAtheist',
1072                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
1073                 'title': 'Burning Everyone\'s Koran',
1074                 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms \r\n\r\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
1075             }
1076         },
1077         # Normal age-gate video (embed allowed)
1078         {
1079             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1080             'info_dict': {
1081                 'id': 'HtVdAasjOgU',
1082                 'ext': 'mp4',
1083                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1084                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1085                 'duration': 142,
1086                 'uploader': 'The Witcher',
1087                 'uploader_id': 'WitcherGame',
1088                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1089                 'upload_date': '20140605',
1090                 'age_limit': 18,
1091             },
1092         },
1093         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1094         # YouTube Red ad is not captured for creator
1095         {
1096             'url': '__2ABJjxzNo',
1097             'info_dict': {
1098                 'id': '__2ABJjxzNo',
1099                 'ext': 'mp4',
1100                 'duration': 266,
1101                 'upload_date': '20100430',
1102                 'uploader_id': 'deadmau5',
1103                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1104                 'creator': 'deadmau5',
1105                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1106                 'uploader': 'deadmau5',
1107                 'title': 'Deadmau5 - Some Chords (HD)',
1108                 'alt_title': 'Some Chords',
1109             },
1110             'expected_warnings': [
1111                 'DASH manifest missing',
1112             ]
1113         },
1114         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1115         {
1116             'url': 'lqQg6PlCWgI',
1117             'info_dict': {
1118                 'id': 'lqQg6PlCWgI',
1119                 'ext': 'mp4',
1120                 'duration': 6085,
1121                 'upload_date': '20150827',
1122                 'uploader_id': 'olympic',
1123                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1124                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1125                 'uploader': 'Olympic',
1126                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1127             },
1128             'params': {
1129                 'skip_download': 'requires avconv',
1130             }
1131         },
1132         # Non-square pixels
1133         {
1134             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1135             'info_dict': {
1136                 'id': '_b-2C3KPAM0',
1137                 'ext': 'mp4',
1138                 'stretched_ratio': 16 / 9.,
1139                 'duration': 85,
1140                 'upload_date': '20110310',
1141                 'uploader_id': 'AllenMeow',
1142                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1143                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1144                 'uploader': '孫ᄋᄅ',
1145                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1146             },
1147         },
1148         # url_encoded_fmt_stream_map is empty string
1149         {
1150             'url': 'qEJwOuvDf7I',
1151             'info_dict': {
1152                 'id': 'qEJwOuvDf7I',
1153                 'ext': 'webm',
1154                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1155                 'description': '',
1156                 'upload_date': '20150404',
1157                 'uploader_id': 'spbelect',
1158                 'uploader': 'Наблюдатели Петербурга',
1159             },
1160             'params': {
1161                 'skip_download': 'requires avconv',
1162             },
1163             'skip': 'This live event has ended.',
1164         },
1165         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1166         {
1167             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1168             'info_dict': {
1169                 'id': 'FIl7x6_3R5Y',
1170                 'ext': 'webm',
1171                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1172                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1173                 'duration': 220,
1174                 'upload_date': '20150625',
1175                 'uploader_id': 'dorappi2000',
1176                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1177                 'uploader': 'dorappi2000',
1178                 'formats': 'mincount:31',
1179             },
1180             'skip': 'not actual anymore',
1181         },
1182         # DASH manifest with segment_list
1183         {
1184             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1185             'md5': '8ce563a1d667b599d21064e982ab9e31',
1186             'info_dict': {
1187                 'id': 'CsmdDsKjzN8',
1188                 'ext': 'mp4',
1189                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1190                 'uploader': 'Airtek',
1191                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1192                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1193                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1194             },
1195             'params': {
1196                 'youtube_include_dash_manifest': True,
1197                 'format': '135',  # bestvideo
1198             },
1199             'skip': 'This live event has ended.',
1200         },
1201         {
1202             # Multifeed videos (multiple cameras), URL is for Main Camera
1203             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1204             'info_dict': {
1205                 'id': 'jvGDaLqkpTg',
1206                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1207                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1208             },
1209             'playlist': [{
1210                 'info_dict': {
1211                     'id': 'jvGDaLqkpTg',
1212                     'ext': 'mp4',
1213                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1214                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1215                     'duration': 10643,
1216                     'upload_date': '20161111',
1217                     'uploader': 'Team PGP',
1218                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1219                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1220                 },
1221             }, {
1222                 'info_dict': {
1223                     'id': '3AKt1R1aDnw',
1224                     'ext': 'mp4',
1225                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1226                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1227                     'duration': 10991,
1228                     'upload_date': '20161111',
1229                     'uploader': 'Team PGP',
1230                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1231                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1232                 },
1233             }, {
1234                 'info_dict': {
1235                     'id': 'RtAMM00gpVc',
1236                     'ext': 'mp4',
1237                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1238                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1239                     'duration': 10995,
1240                     'upload_date': '20161111',
1241                     'uploader': 'Team PGP',
1242                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1243                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1244                 },
1245             }, {
1246                 'info_dict': {
1247                     'id': '6N2fdlP3C5U',
1248                     'ext': 'mp4',
1249                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1250                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1251                     'duration': 10990,
1252                     'upload_date': '20161111',
1253                     'uploader': 'Team PGP',
1254                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1255                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1256                 },
1257             }],
1258             'params': {
1259                 'skip_download': True,
1260             },
1261         },
1262         {
1263             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1264             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1265             'info_dict': {
1266                 'id': 'gVfLd0zydlo',
1267                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1268             },
1269             'playlist_count': 2,
1270             'skip': 'Not multifeed anymore',
1271         },
1272         {
1273             'url': 'https://vid.plus/FlRa-iH7PGw',
1274             'only_matching': True,
1275         },
1276         {
1277             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1278             'only_matching': True,
1279         },
1280         {
1281             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1282             # Also tests cut-off URL expansion in video description (see
1283             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1284             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1285             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1286             'info_dict': {
1287                 'id': 'lsguqyKfVQg',
1288                 'ext': 'mp4',
1289                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1290                 'alt_title': 'Dark Walk - Position Music',
1291                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1292                 'duration': 133,
1293                 'upload_date': '20151119',
1294                 'uploader_id': 'IronSoulElf',
1295                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1296                 'uploader': 'IronSoulElf',
1297                 'creator': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
1298                 'track': 'Dark Walk - Position Music',
1299                 'artist': 'Todd Haberman,  Daniel Law Heath and Aaron Kaplan',
1300                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1301             },
1302             'params': {
1303                 'skip_download': True,
1304             },
1305         },
1306         {
1307             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1308             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1309             'only_matching': True,
1310         },
1311         {
1312             # Video with yt:stretch=17:0
1313             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1314             'info_dict': {
1315                 'id': 'Q39EVAstoRM',
1316                 'ext': 'mp4',
1317                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1318                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1319                 'upload_date': '20151107',
1320                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1321                 'uploader': 'CH GAMER DROID',
1322             },
1323             'params': {
1324                 'skip_download': True,
1325             },
1326             'skip': 'This video does not exist.',
1327         },
1328         {
1329             # Video with incomplete 'yt:stretch=16:'
1330             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1331             'only_matching': True,
1332         },
1333         {
1334             # Video licensed under Creative Commons
1335             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1336             'info_dict': {
1337                 'id': 'M4gD1WSo5mA',
1338                 'ext': 'mp4',
1339                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1340                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1341                 'duration': 721,
1342                 'upload_date': '20150127',
1343                 'uploader_id': 'BerkmanCenter',
1344                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1345                 'uploader': 'The Berkman Klein Center for Internet & Society',
1346                 'license': 'Creative Commons Attribution license (reuse allowed)',
1347             },
1348             'params': {
1349                 'skip_download': True,
1350             },
1351         },
1352         {
1353             # Channel-like uploader_url
1354             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1355             'info_dict': {
1356                 'id': 'eQcmzGIKrzg',
1357                 'ext': 'mp4',
1358                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1359                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1360                 'duration': 4060,
1361                 'upload_date': '20151119',
1362                 'uploader': 'Bernie Sanders',
1363                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1364                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1365                 'license': 'Creative Commons Attribution license (reuse allowed)',
1366             },
1367             'params': {
1368                 'skip_download': True,
1369             },
1370         },
1371         {
1372             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1373             'only_matching': True,
1374         },
1375         {
1376             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1377             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1378             'only_matching': True,
1379         },
1380         {
1381             # Rental video preview
1382             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1383             'info_dict': {
1384                 'id': 'uGpuVWrhIzE',
1385                 'ext': 'mp4',
1386                 'title': 'Piku - Trailer',
1387                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1388                 'upload_date': '20150811',
1389                 'uploader': 'FlixMatrix',
1390                 'uploader_id': 'FlixMatrixKaravan',
1391                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1392                 'license': 'Standard YouTube License',
1393             },
1394             'params': {
1395                 'skip_download': True,
1396             },
1397             'skip': 'This video is not available.',
1398         },
1399         {
1400             # YouTube Red video with episode data
1401             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1402             'info_dict': {
1403                 'id': 'iqKdEhx-dD4',
1404                 'ext': 'mp4',
1405                 'title': 'Isolation - Mind Field (Ep 1)',
1406                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1407                 'duration': 2085,
1408                 'upload_date': '20170118',
1409                 'uploader': 'Vsauce',
1410                 'uploader_id': 'Vsauce',
1411                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1412                 'series': 'Mind Field',
1413                 'season_number': 1,
1414                 'episode_number': 1,
1415             },
1416             'params': {
1417                 'skip_download': True,
1418             },
1419             'expected_warnings': [
1420                 'Skipping DASH manifest',
1421             ],
1422         },
1423         {
1424             # The following content has been identified by the YouTube community
1425             # as inappropriate or offensive to some audiences.
1426             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1427             'info_dict': {
1428                 'id': '6SJNVb0GnPI',
1429                 'ext': 'mp4',
1430                 'title': 'Race Differences in Intelligence',
1431                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1432                 'duration': 965,
1433                 'upload_date': '20140124',
1434                 'uploader': 'New Century Foundation',
1435                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1436                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1437             },
1438             'params': {
1439                 'skip_download': True,
1440             },
1441             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1442         },
1443         {
1444             # itag 212
1445             'url': '1t24XAntNCY',
1446             'only_matching': True,
1447         },
1448         {
1449             # geo restricted to JP
1450             'url': 'sJL6WA-aGkQ',
1451             'only_matching': True,
1452         },
1453         {
1454             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1455             'only_matching': True,
1456         },
1457         {
1458             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1459             'only_matching': True,
1460         },
1461         {
1462             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1463             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1464             'only_matching': True,
1465         },
1466         {
1467             # DRM protected
1468             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1469             'only_matching': True,
1470         },
1471         {
1472             # Video with unsupported adaptive stream type formats
1473             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1474             'info_dict': {
1475                 'id': 'Z4Vy8R84T1U',
1476                 'ext': 'mp4',
1477                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1478                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1479                 'duration': 433,
1480                 'upload_date': '20130923',
1481                 'uploader': 'Amelia Putri Harwita',
1482                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1483                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1484                 'formats': 'maxcount:10',
1485             },
1486             'params': {
1487                 'skip_download': True,
1488                 'youtube_include_dash_manifest': False,
1489             },
1490             'skip': 'not actual anymore',
1491         },
1492         {
1493             # Youtube Music Auto-generated description
1494             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1495             'info_dict': {
1496                 'id': 'MgNrAu2pzNs',
1497                 'ext': 'mp4',
1498                 'title': 'Voyeur Girl',
1499                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1500                 'upload_date': '20190312',
1501                 'uploader': 'Stephen - Topic',
1502                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1503                 'artist': 'Stephen',
1504                 'track': 'Voyeur Girl',
1505                 'album': 'it\'s too much love to know my dear',
1506                 'release_date': '20190313',
1507                 'release_year': 2019,
1508             },
1509             'params': {
1510                 'skip_download': True,
1511             },
1512         },
1513         {
1514             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1515             'only_matching': True,
1516         },
1517         {
1518             # invalid -> valid video id redirection
1519             'url': 'DJztXj2GPfl',
1520             'info_dict': {
1521                 'id': 'DJztXj2GPfk',
1522                 'ext': 'mp4',
1523                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1524                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1525                 'upload_date': '20090125',
1526                 'uploader': 'Prochorowka',
1527                 'uploader_id': 'Prochorowka',
1528                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1529                 'artist': 'Panjabi MC',
1530                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1531                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1532             },
1533             'params': {
1534                 'skip_download': True,
1535             },
1536             'skip': 'Video unavailable',
1537         },
1538         {
1539             # empty description results in an empty string
1540             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1541             'info_dict': {
1542                 'id': 'x41yOUIvK2k',
1543                 'ext': 'mp4',
1544                 'title': 'IMG 3456',
1545                 'description': '',
1546                 'upload_date': '20170613',
1547                 'uploader_id': 'ElevageOrVert',
1548                 'uploader': 'ElevageOrVert',
1549             },
1550             'params': {
1551                 'skip_download': True,
1552             },
1553         },
1554         {
1555             # with '};' inside yt initial data (see [1])
1556             # see [2] for an example with '};' inside ytInitialPlayerResponse
1557             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1558             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1559             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1560             'info_dict': {
1561                 'id': 'CHqg6qOn4no',
1562                 'ext': 'mp4',
1563                 'title': 'Part 77   Sort a list of simple types in c#',
1564                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1565                 'upload_date': '20130831',
1566                 'uploader_id': 'kudvenkat',
1567                 'uploader': 'kudvenkat',
1568             },
1569             'params': {
1570                 'skip_download': True,
1571             },
1572         },
1573         {
1574             # another example of '};' in ytInitialData
1575             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1576             'only_matching': True,
1577         },
1578         {
1579             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1580             'only_matching': True,
1581         },
1582         {
1583             # https://github.com/ytdl-org/youtube-dl/pull/28094
1584             'url': 'OtqTfy26tG0',
1585             'info_dict': {
1586                 'id': 'OtqTfy26tG0',
1587                 'ext': 'mp4',
1588                 'title': 'Burn Out',
1589                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1590                 'upload_date': '20141120',
1591                 'uploader': 'The Cinematic Orchestra - Topic',
1592                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1593                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1594                 'artist': 'The Cinematic Orchestra',
1595                 'track': 'Burn Out',
1596                 'album': 'Every Day',
1597                 'release_data': None,
1598                 'release_year': None,
1599             },
1600             'params': {
1601                 'skip_download': True,
1602             },
1603         },
1604         {
1605             # controversial video, only works with bpctr when authenticated with cookies
1606             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1607             'only_matching': True,
1608         },
1609         {
1610             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1611             'url': 'cBvYw8_A0vQ',
1612             'info_dict': {
1613                 'id': 'cBvYw8_A0vQ',
1614                 'ext': 'mp4',
1615                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1616                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1617                 'upload_date': '20201120',
1618                 'uploader': 'Walk around Japan',
1619                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1620                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1621             },
1622             'params': {
1623                 'skip_download': True,
1624             },
1625         }, {
1626             # Has multiple audio streams
1627             'url': 'WaOKSUlf4TM',
1628             'only_matching': True
1629         }, {
1630             # Requires Premium: has format 141 when requested using YTM url
1631             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1632             'only_matching': True
1633         }, {
1634             # multiple subtitles with same lang_code
1635             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1636             'only_matching': True,
1637         }, {
1638             # Force use android client fallback
1639             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1640             'info_dict': {
1641                 'id': 'YOelRv7fMxY',
1642                 'title': 'Digging a Secret Tunnel from my Workshop',
1643                 'ext': '3gp',
1644                 'upload_date': '20210624',
1645                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1646                 'uploader': 'colinfurze',
1647                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1648                 'description': 'md5:ecb672623246d98c6c562eed6ae798c3'
1649             },
1650             'params': {
1651                 'format': '17',  # 3gp format available on android
1652                 'extractor_args': {'youtube': {'player_client': ['android']}},
1653             },
1654         },
1655         {
1656             # Skip download of additional client configs (remix client config in this case)
1657             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1658             'only_matching': True,
1659             'params': {
1660                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1661             },
1662         }
1663     ]
1664
1665     @classmethod
1666     def suitable(cls, url):
1667         # Hack for lazy extractors until more generic solution is implemented
1668         # (see #28780)
1669         from .youtube import parse_qs
1670         qs = parse_qs(url)
1671         if qs.get('list', [None])[0]:
1672             return False
1673         return super(YoutubeIE, cls).suitable(url)
1674
1675     def __init__(self, *args, **kwargs):
1676         super(YoutubeIE, self).__init__(*args, **kwargs)
1677         self._code_cache = {}
1678         self._player_cache = {}
1679
1680     def _extract_player_url(self, ytcfg=None, webpage=None):
1681         player_url = try_get(ytcfg, (lambda x: x['PLAYER_JS_URL']), str)
1682         if not player_url:
1683             player_url = self._search_regex(
1684                 r'"(?:PLAYER_JS_URL|jsUrl)"\s*:\s*"([^"]+)"',
1685                 webpage, 'player URL', fatal=False)
1686         if player_url.startswith('//'):
1687             player_url = 'https:' + player_url
1688         elif not re.match(r'https?://', player_url):
1689             player_url = compat_urlparse.urljoin(
1690                 'https://www.youtube.com', player_url)
1691         return player_url
1692
1693     def _signature_cache_id(self, example_sig):
1694         """ Return a string representation of a signature """
1695         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1696
1697     @classmethod
1698     def _extract_player_info(cls, player_url):
1699         for player_re in cls._PLAYER_INFO_RE:
1700             id_m = re.search(player_re, player_url)
1701             if id_m:
1702                 break
1703         else:
1704             raise ExtractorError('Cannot identify player %r' % player_url)
1705         return id_m.group('id')
1706
1707     def _load_player(self, video_id, player_url, fatal=True) -> bool:
1708         player_id = self._extract_player_info(player_url)
1709         if player_id not in self._code_cache:
1710             self._code_cache[player_id] = self._download_webpage(
1711                 player_url, video_id, fatal=fatal,
1712                 note='Downloading player ' + player_id,
1713                 errnote='Download of %s failed' % player_url)
1714         return player_id in self._code_cache
1715
1716     def _extract_signature_function(self, video_id, player_url, example_sig):
1717         player_id = self._extract_player_info(player_url)
1718
1719         # Read from filesystem cache
1720         func_id = 'js_%s_%s' % (
1721             player_id, self._signature_cache_id(example_sig))
1722         assert os.path.basename(func_id) == func_id
1723
1724         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1725         if cache_spec is not None:
1726             return lambda s: ''.join(s[i] for i in cache_spec)
1727
1728         if self._load_player(video_id, player_url):
1729             code = self._code_cache[player_id]
1730             res = self._parse_sig_js(code)
1731
1732             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1733             cache_res = res(test_string)
1734             cache_spec = [ord(c) for c in cache_res]
1735
1736             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1737             return res
1738
1739     def _print_sig_code(self, func, example_sig):
1740         def gen_sig_code(idxs):
1741             def _genslice(start, end, step):
1742                 starts = '' if start == 0 else str(start)
1743                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1744                 steps = '' if step == 1 else (':%d' % step)
1745                 return 's[%s%s%s]' % (starts, ends, steps)
1746
1747             step = None
1748             # Quelch pyflakes warnings - start will be set when step is set
1749             start = '(Never used)'
1750             for i, prev in zip(idxs[1:], idxs[:-1]):
1751                 if step is not None:
1752                     if i - prev == step:
1753                         continue
1754                     yield _genslice(start, prev, step)
1755                     step = None
1756                     continue
1757                 if i - prev in [-1, 1]:
1758                     step = i - prev
1759                     start = prev
1760                     continue
1761                 else:
1762                     yield 's[%d]' % prev
1763             if step is None:
1764                 yield 's[%d]' % i
1765             else:
1766                 yield _genslice(start, i, step)
1767
1768         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1769         cache_res = func(test_string)
1770         cache_spec = [ord(c) for c in cache_res]
1771         expr_code = ' + '.join(gen_sig_code(cache_spec))
1772         signature_id_tuple = '(%s)' % (
1773             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1774         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1775                 '    return %s\n') % (signature_id_tuple, expr_code)
1776         self.to_screen('Extracted signature function:\n' + code)
1777
1778     def _parse_sig_js(self, jscode):
1779         funcname = self._search_regex(
1780             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1781              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1782              r'\bm=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(h\.s\)\)',
1783              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2})\(decodeURIComponent\(c\)\)',
1784              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1785              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1786              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1787              # Obsolete patterns
1788              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1789              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1790              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1791              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1792              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1793              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1794              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1795              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1796             jscode, 'Initial JS player signature function name', group='sig')
1797
1798         jsi = JSInterpreter(jscode)
1799         initial_function = jsi.extract_function(funcname)
1800         return lambda s: initial_function([s])
1801
1802     def _decrypt_signature(self, s, video_id, player_url):
1803         """Turn the encrypted s field into a working signature"""
1804
1805         if player_url is None:
1806             raise ExtractorError('Cannot decrypt signature without player_url')
1807
1808         try:
1809             player_id = (player_url, self._signature_cache_id(s))
1810             if player_id not in self._player_cache:
1811                 func = self._extract_signature_function(
1812                     video_id, player_url, s
1813                 )
1814                 self._player_cache[player_id] = func
1815             func = self._player_cache[player_id]
1816             if self.get_param('youtube_print_sig_code'):
1817                 self._print_sig_code(func, s)
1818             return func(s)
1819         except Exception as e:
1820             tb = traceback.format_exc()
1821             raise ExtractorError(
1822                 'Signature extraction failed: ' + tb, cause=e)
1823
1824     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1825         """
1826         Extract signatureTimestamp (sts)
1827         Required to tell API what sig/player version is in use.
1828         """
1829         sts = None
1830         if isinstance(ytcfg, dict):
1831             sts = int_or_none(ytcfg.get('STS'))
1832
1833         if not sts:
1834             # Attempt to extract from player
1835             if player_url is None:
1836                 error_msg = 'Cannot extract signature timestamp without player_url.'
1837                 if fatal:
1838                     raise ExtractorError(error_msg)
1839                 self.report_warning(error_msg)
1840                 return
1841             if self._load_player(video_id, player_url, fatal=fatal):
1842                 player_id = self._extract_player_info(player_url)
1843                 code = self._code_cache[player_id]
1844                 sts = int_or_none(self._search_regex(
1845                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1846                     'JS player signature timestamp', group='sts', fatal=fatal))
1847         return sts
1848
1849     def _mark_watched(self, video_id, player_response):
1850         playback_url = url_or_none(try_get(
1851             player_response,
1852             lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))
1853         if not playback_url:
1854             return
1855         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1856         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1857
1858         # cpn generation algorithm is reverse engineered from base.js.
1859         # In fact it works even with dummy cpn.
1860         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1861         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1862
1863         qs.update({
1864             'ver': ['2'],
1865             'cpn': [cpn],
1866         })
1867         playback_url = compat_urlparse.urlunparse(
1868             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1869
1870         self._download_webpage(
1871             playback_url, video_id, 'Marking watched',
1872             'Unable to mark watched', fatal=False)
1873
1874     @staticmethod
1875     def _extract_urls(webpage):
1876         # Embedded YouTube player
1877         entries = [
1878             unescapeHTML(mobj.group('url'))
1879             for mobj in re.finditer(r'''(?x)
1880             (?:
1881                 <iframe[^>]+?src=|
1882                 data-video-url=|
1883                 <embed[^>]+?src=|
1884                 embedSWF\(?:\s*|
1885                 <object[^>]+data=|
1886                 new\s+SWFObject\(
1887             )
1888             (["\'])
1889                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1890                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1891             \1''', webpage)]
1892
1893         # lazyYT YouTube embed
1894         entries.extend(list(map(
1895             unescapeHTML,
1896             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1897
1898         # Wordpress "YouTube Video Importer" plugin
1899         matches = re.findall(r'''(?x)<div[^>]+
1900             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1901             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1902         entries.extend(m[-1] for m in matches)
1903
1904         return entries
1905
1906     @staticmethod
1907     def _extract_url(webpage):
1908         urls = YoutubeIE._extract_urls(webpage)
1909         return urls[0] if urls else None
1910
1911     @classmethod
1912     def extract_id(cls, url):
1913         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
1914         if mobj is None:
1915             raise ExtractorError('Invalid URL: %s' % url)
1916         video_id = mobj.group(2)
1917         return video_id
1918
1919     def _extract_chapters_from_json(self, data, video_id, duration):
1920         chapters_list = try_get(
1921             data,
1922             lambda x: x['playerOverlays']
1923                        ['playerOverlayRenderer']
1924                        ['decoratedPlayerBarRenderer']
1925                        ['decoratedPlayerBarRenderer']
1926                        ['playerBar']
1927                        ['chapteredPlayerBarRenderer']
1928                        ['chapters'],
1929             list)
1930         if not chapters_list:
1931             return
1932
1933         def chapter_time(chapter):
1934             return float_or_none(
1935                 try_get(
1936                     chapter,
1937                     lambda x: x['chapterRenderer']['timeRangeStartMillis'],
1938                     int),
1939                 scale=1000)
1940         chapters = []
1941         for next_num, chapter in enumerate(chapters_list, start=1):
1942             start_time = chapter_time(chapter)
1943             if start_time is None:
1944                 continue
1945             end_time = (chapter_time(chapters_list[next_num])
1946                         if next_num < len(chapters_list) else duration)
1947             if end_time is None:
1948                 continue
1949             title = try_get(
1950                 chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
1951                 compat_str)
1952             chapters.append({
1953                 'start_time': start_time,
1954                 'end_time': end_time,
1955                 'title': title,
1956             })
1957         return chapters
1958
1959     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
1960         return self._parse_json(self._search_regex(
1961             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
1962              regex), webpage, name, default='{}'), video_id, fatal=False)
1963
1964     @staticmethod
1965     def parse_time_text(time_text):
1966         """
1967         Parse the comment time text
1968         time_text is in the format 'X units ago (edited)'
1969         """
1970         time_text_split = time_text.split(' ')
1971         if len(time_text_split) >= 3:
1972             return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
1973
1974     @staticmethod
1975     def _join_text_entries(runs):
1976         text = None
1977         for run in runs:
1978             if not isinstance(run, dict):
1979                 continue
1980             sub_text = try_get(run, lambda x: x['text'], compat_str)
1981             if sub_text:
1982                 if not text:
1983                     text = sub_text
1984                     continue
1985                 text += sub_text
1986         return text
1987
1988     def _extract_comment(self, comment_renderer, parent=None):
1989         comment_id = comment_renderer.get('commentId')
1990         if not comment_id:
1991             return
1992         comment_text_runs = try_get(comment_renderer, lambda x: x['contentText']['runs']) or []
1993         text = self._join_text_entries(comment_text_runs) or ''
1994         comment_time_text = try_get(comment_renderer, lambda x: x['publishedTimeText']['runs']) or []
1995         time_text = self._join_text_entries(comment_time_text)
1996         # note: timestamp is an estimate calculated from the current time and time_text
1997         timestamp = calendar.timegm(self.parse_time_text(time_text).timetuple())
1998         author = try_get(comment_renderer, lambda x: x['authorText']['simpleText'], compat_str)
1999         author_id = try_get(comment_renderer,
2000                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2001         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2002                                                        lambda x: x['likeCount']), compat_str)) or 0
2003         author_thumbnail = try_get(comment_renderer,
2004                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2005
2006         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2007         is_favorited = 'creatorHeart' in (try_get(
2008             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2009         return {
2010             'id': comment_id,
2011             'text': text,
2012             'timestamp': timestamp,
2013             'time_text': time_text,
2014             'like_count': votes,
2015             'is_favorited': is_favorited,
2016             'author': author,
2017             'author_id': author_id,
2018             'author_thumbnail': author_thumbnail,
2019             'author_is_uploader': author_is_uploader,
2020             'parent': parent or 'root'
2021         }
2022
2023     def _comment_entries(self, root_continuation_data, identity_token, account_syncid,
2024                          ytcfg, video_id, parent=None, comment_counts=None):
2025
2026         def extract_header(contents):
2027             _total_comments = 0
2028             _continuation = None
2029             for content in contents:
2030                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2031                 expected_comment_count = try_get(comments_header_renderer,
2032                                                  (lambda x: x['countText']['runs'][0]['text'],
2033                                                   lambda x: x['commentsCount']['runs'][0]['text']),
2034                                                  compat_str)
2035                 if expected_comment_count:
2036                     comment_counts[1] = str_to_int(expected_comment_count)
2037                     self.to_screen('Downloading ~%d comments' % str_to_int(expected_comment_count))
2038                     _total_comments = comment_counts[1]
2039                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2040                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2041
2042                 sort_menu_item = try_get(
2043                     comments_header_renderer,
2044                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2045                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2046
2047                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2048                 if not _continuation:
2049                     continue
2050
2051                 sort_text = sort_menu_item.get('title')
2052                 if isinstance(sort_text, compat_str):
2053                     sort_text = sort_text.lower()
2054                 else:
2055                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2056                 self.to_screen('Sorting comments by %s' % sort_text)
2057                 break
2058             return _total_comments, _continuation
2059
2060         def extract_thread(contents):
2061             if not parent:
2062                 comment_counts[2] = 0
2063             for content in contents:
2064                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2065                 comment_renderer = try_get(
2066                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2067                     content, (lambda x: x['commentRenderer'], dict))
2068
2069                 if not comment_renderer:
2070                     continue
2071                 comment = self._extract_comment(comment_renderer, parent)
2072                 if not comment:
2073                     continue
2074                 comment_counts[0] += 1
2075                 yield comment
2076                 # Attempt to get the replies
2077                 comment_replies_renderer = try_get(
2078                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2079
2080                 if comment_replies_renderer:
2081                     comment_counts[2] += 1
2082                     comment_entries_iter = self._comment_entries(
2083                         comment_replies_renderer, identity_token, account_syncid, ytcfg,
2084                         video_id, parent=comment.get('id'), comment_counts=comment_counts)
2085
2086                     for reply_comment in comment_entries_iter:
2087                         yield reply_comment
2088
2089         # YouTube comments have a max depth of 2
2090         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2091         if max_depth == 1 and parent:
2092             return
2093         if not comment_counts:
2094             # comment so far, est. total comments, current comment thread #
2095             comment_counts = [0, 0, 0]
2096
2097         continuation = self._extract_continuation(root_continuation_data)
2098         if continuation and len(continuation['ctoken']) < 27:
2099             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2100             continuation_token = self._generate_comment_continuation(video_id)
2101             continuation = self._build_continuation_query(continuation_token, None)
2102
2103         visitor_data = None
2104         is_first_continuation = parent is None
2105
2106         for page_num in itertools.count(0):
2107             if not continuation:
2108                 break
2109             headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
2110             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2111             if page_num == 0:
2112                 if is_first_continuation:
2113                     note_prefix = 'Downloading comment section API JSON'
2114                 else:
2115                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2116                         comment_counts[2], comment_prog_str)
2117             else:
2118                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2119                     '       ' if parent else '', ' replies' if parent else '',
2120                     page_num, comment_prog_str)
2121
2122             response = self._extract_response(
2123                 item_id=None, query=self._continuation_query_ajax_to_api(continuation),
2124                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2125                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2126             if not response:
2127                 break
2128             visitor_data = try_get(
2129                 response,
2130                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2131                 compat_str) or visitor_data
2132
2133             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2134
2135             continuation = None
2136             if isinstance(continuation_contents, list):
2137                 for continuation_section in continuation_contents:
2138                     if not isinstance(continuation_section, dict):
2139                         continue
2140                     continuation_items = try_get(
2141                         continuation_section,
2142                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2143                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2144                         list) or []
2145                     if is_first_continuation:
2146                         total_comments, continuation = extract_header(continuation_items)
2147                         if total_comments:
2148                             yield total_comments
2149                         is_first_continuation = False
2150                         if continuation:
2151                             break
2152                         continue
2153                     count = 0
2154                     for count, entry in enumerate(extract_thread(continuation_items)):
2155                         yield entry
2156                     continuation = self._extract_continuation({'contents': continuation_items})
2157                     if continuation:
2158                         # Sometimes YouTube provides a continuation without any comments
2159                         # In most cases we end up just downloading these with very little comments to come.
2160                         if count == 0:
2161                             if not parent:
2162                                 self.report_warning('No comments received - assuming end of comments')
2163                             continuation = None
2164                         break
2165
2166             # Deprecated response structure
2167             elif isinstance(continuation_contents, dict):
2168                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2169                 for key, continuation_renderer in continuation_contents.items():
2170                     if key not in known_continuation_renderers:
2171                         continue
2172                     if not isinstance(continuation_renderer, dict):
2173                         continue
2174                     if is_first_continuation:
2175                         header_continuation_items = [continuation_renderer.get('header') or {}]
2176                         total_comments, continuation = extract_header(header_continuation_items)
2177                         if total_comments:
2178                             yield total_comments
2179                         is_first_continuation = False
2180                         if continuation:
2181                             break
2182
2183                     # Sometimes YouTube provides a continuation without any comments
2184                     # In most cases we end up just downloading these with very little comments to come.
2185                     count = 0
2186                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2187                         yield entry
2188                     continuation = self._extract_continuation(continuation_renderer)
2189                     if count == 0:
2190                         if not parent:
2191                             self.report_warning('No comments received - assuming end of comments')
2192                         continuation = None
2193                     break
2194
2195     @staticmethod
2196     def _generate_comment_continuation(video_id):
2197         """
2198         Generates initial comment section continuation token from given video id
2199         """
2200         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2201         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2202         new_continuation_intlist = list(itertools.chain.from_iterable(
2203             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2204         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2205
2206     def _extract_comments(self, ytcfg, video_id, contents, webpage):
2207         """Entry for comment extraction"""
2208         def _real_comment_extract(contents):
2209             if isinstance(contents, list):
2210                 for entry in contents:
2211                     for key, renderer in entry.items():
2212                         if key not in known_entry_comment_renderers:
2213                             continue
2214                         yield from self._comment_entries(
2215                             renderer, video_id=video_id, ytcfg=ytcfg,
2216                             identity_token=self._extract_identity_token(webpage, item_id=video_id),
2217                             account_syncid=self._extract_account_syncid(ytcfg))
2218                         break
2219         comments = []
2220         known_entry_comment_renderers = ('itemSectionRenderer',)
2221         estimated_total = 0
2222         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) or float('inf')
2223
2224         try:
2225             for comment in _real_comment_extract(contents):
2226                 if len(comments) >= max_comments:
2227                     break
2228                 if isinstance(comment, int):
2229                     estimated_total = comment
2230                     continue
2231                 comments.append(comment)
2232         except KeyboardInterrupt:
2233             self.to_screen('Interrupted by user')
2234         self.to_screen('Downloaded %d/%d comments' % (len(comments), estimated_total))
2235         return {
2236             'comments': comments,
2237             'comment_count': len(comments),
2238         }
2239
2240     @staticmethod
2241     def _generate_player_context(sts=None):
2242         context = {
2243             'html5Preference': 'HTML5_PREF_WANTS',
2244         }
2245         if sts is not None:
2246             context['signatureTimestamp'] = sts
2247         return {
2248             'playbackContext': {
2249                 'contentPlaybackContext': context
2250             }
2251         }
2252
2253     @staticmethod
2254     def _get_video_info_params(video_id, client='TVHTML5'):
2255         GVI_CLIENTS = {
2256             'ANDROID': {
2257                 'c': 'ANDROID',
2258                 'cver': '16.20',
2259             },
2260             'TVHTML5': {
2261                 'c': 'TVHTML5',
2262                 'cver': '6.20180913',
2263             }
2264         }
2265         query = {
2266             'video_id': video_id,
2267             'eurl': 'https://youtube.googleapis.com/v/' + video_id,
2268             'html5': '1'
2269         }
2270         query.update(GVI_CLIENTS.get(client))
2271         return query
2272
2273     def _real_extract(self, url):
2274         url, smuggled_data = unsmuggle_url(url, {})
2275         video_id = self._match_id(url)
2276
2277         is_music_url = smuggled_data.get('is_music_url') or self.is_music_url(url)
2278
2279         base_url = self.http_scheme() + '//www.youtube.com/'
2280         webpage_url = base_url + 'watch?v=' + video_id
2281         webpage = self._download_webpage(
2282             webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2283
2284         ytcfg = self._extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2285         identity_token = self._extract_identity_token(webpage, video_id)
2286         syncid = self._extract_account_syncid(ytcfg)
2287         headers = self._generate_api_headers(ytcfg, identity_token, syncid)
2288
2289         player_url = self._extract_player_url(ytcfg, webpage)
2290
2291         player_client = self._configuration_arg('player_client', [''])[0]
2292         if player_client not in ('web', 'android', ''):
2293             self.report_warning(f'Invalid player_client {player_client} given. Falling back to android client.')
2294         force_mobile_client = player_client != 'web'
2295         player_skip = self._configuration_arg('player_skip')
2296
2297         def get_text(x):
2298             if not x:
2299                 return
2300             text = x.get('simpleText')
2301             if text and isinstance(text, compat_str):
2302                 return text
2303             runs = x.get('runs')
2304             if not isinstance(runs, list):
2305                 return
2306             return ''.join([r['text'] for r in runs if isinstance(r.get('text'), compat_str)])
2307
2308         ytm_streaming_data = {}
2309         if is_music_url:
2310             ytm_webpage = None
2311             sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2312             if sts and not force_mobile_client and 'configs' not in player_skip:
2313                 ytm_webpage = self._download_webpage(
2314                     'https://music.youtube.com',
2315                     video_id, fatal=False, note='Downloading remix client config')
2316
2317             ytm_cfg = self._extract_ytcfg(video_id, ytm_webpage) or {}
2318             ytm_client = 'WEB_REMIX'
2319             if not sts or force_mobile_client:
2320                 # Android client already has signature descrambled
2321                 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2322                 if not sts:
2323                     self.report_warning('Falling back to android remix client for player API.')
2324                 ytm_client = 'ANDROID_MUSIC'
2325                 ytm_cfg = {}
2326
2327             ytm_headers = self._generate_api_headers(
2328                 ytm_cfg, identity_token, syncid,
2329                 client=ytm_client)
2330             ytm_query = {'videoId': video_id}
2331             ytm_query.update(self._generate_player_context(sts))
2332
2333             ytm_player_response = self._extract_response(
2334                 item_id=video_id, ep='player', query=ytm_query,
2335                 ytcfg=ytm_cfg, headers=ytm_headers, fatal=False,
2336                 default_client=ytm_client,
2337                 note='Downloading %sremix player API JSON' % ('android ' if force_mobile_client else ''))
2338             ytm_streaming_data = try_get(ytm_player_response, lambda x: x['streamingData'], dict) or {}
2339
2340         player_response = None
2341         if webpage:
2342             player_response = self._extract_yt_initial_variable(
2343                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2344                 video_id, 'initial player response')
2345
2346         if not player_response or force_mobile_client:
2347             sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2348             yt_client = 'WEB'
2349             ytpcfg = ytcfg
2350             ytp_headers = headers
2351             if not sts or force_mobile_client:
2352                 # Android client already has signature descrambled
2353                 # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2354                 if not sts:
2355                     self.report_warning('Falling back to android client for player API.')
2356                 yt_client = 'ANDROID'
2357                 ytpcfg = {}
2358                 ytp_headers = self._generate_api_headers(ytpcfg, identity_token, syncid, yt_client)
2359
2360             yt_query = {'videoId': video_id}
2361             yt_query.update(self._generate_player_context(sts))
2362             player_response = self._extract_response(
2363                 item_id=video_id, ep='player', query=yt_query,
2364                 ytcfg=ytpcfg, headers=ytp_headers, fatal=False,
2365                 default_client=yt_client,
2366                 note='Downloading %splayer API JSON' % ('android ' if force_mobile_client else '')
2367             ) or player_response
2368
2369         # Age-gate workarounds
2370         playability_status = player_response.get('playabilityStatus') or {}
2371         if playability_status.get('reason') in self._AGE_GATE_REASONS:
2372             gvi_clients = ('ANDROID', 'TVHTML5') if force_mobile_client else ('TVHTML5', 'ANDROID')
2373             for gvi_client in gvi_clients:
2374                 pr = self._parse_json(try_get(compat_parse_qs(
2375                     self._download_webpage(
2376                         base_url + 'get_video_info', video_id,
2377                         'Refetching age-gated %s info webpage' % gvi_client.lower(),
2378                         'unable to download video info webpage', fatal=False,
2379                         query=self._get_video_info_params(video_id, client=gvi_client))),
2380                     lambda x: x['player_response'][0],
2381                     compat_str) or '{}', video_id)
2382                 if pr:
2383                     break
2384             if not pr:
2385                 self.report_warning('Falling back to embedded-only age-gate workaround.')
2386                 embed_webpage = None
2387                 sts = self._extract_signature_timestamp(video_id, player_url, ytcfg, fatal=False)
2388                 if sts and not force_mobile_client and 'configs' not in player_skip:
2389                     embed_webpage = self._download_webpage(
2390                         'https://www.youtube.com/embed/%s?html5=1' % video_id,
2391                         video_id=video_id, note='Downloading age-gated embed config')
2392
2393                 ytcfg_age = self._extract_ytcfg(video_id, embed_webpage) or {}
2394                 # If we extracted the embed webpage, it'll tell us if we can view the video
2395                 embedded_pr = self._parse_json(
2396                     try_get(ytcfg_age, lambda x: x['PLAYER_VARS']['embedded_player_response'], str) or '{}',
2397                     video_id=video_id)
2398                 embedded_ps_reason = try_get(embedded_pr, lambda x: x['playabilityStatus']['reason'], str) or ''
2399                 if embedded_ps_reason not in self._AGE_GATE_REASONS:
2400                     yt_client = 'WEB_EMBEDDED_PLAYER'
2401                     if not sts or force_mobile_client:
2402                         # Android client already has signature descrambled
2403                         # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
2404                         if not sts:
2405                             self.report_warning(
2406                                 'Falling back to android embedded client for player API (note: some formats may be missing).')
2407                         yt_client = 'ANDROID_EMBEDDED_PLAYER'
2408                         ytcfg_age = {}
2409
2410                     ytage_headers = self._generate_api_headers(
2411                         ytcfg_age, identity_token, syncid, client=yt_client)
2412                     yt_age_query = {'videoId': video_id}
2413                     yt_age_query.update(self._generate_player_context(sts))
2414                     pr = self._extract_response(
2415                         item_id=video_id, ep='player', query=yt_age_query,
2416                         ytcfg=ytcfg_age, headers=ytage_headers, fatal=False,
2417                         default_client=yt_client,
2418                         note='Downloading %sage-gated player API JSON' % ('android ' if force_mobile_client else '')
2419                     ) or {}
2420
2421             if pr:
2422                 player_response = pr
2423
2424         trailer_video_id = try_get(
2425             playability_status,
2426             lambda x: x['errorScreen']['playerLegacyDesktopYpcTrailerRenderer']['trailerVideoId'],
2427             compat_str)
2428         if trailer_video_id:
2429             return self.url_result(
2430                 trailer_video_id, self.ie_key(), trailer_video_id)
2431
2432         search_meta = (
2433             lambda x: self._html_search_meta(x, webpage, default=None)) \
2434             if webpage else lambda x: None
2435
2436         video_details = player_response.get('videoDetails') or {}
2437         microformat = try_get(
2438             player_response,
2439             lambda x: x['microformat']['playerMicroformatRenderer'],
2440             dict) or {}
2441         video_title = video_details.get('title') \
2442             or get_text(microformat.get('title')) \
2443             or search_meta(['og:title', 'twitter:title', 'title'])
2444         video_description = video_details.get('shortDescription')
2445
2446         if not smuggled_data.get('force_singlefeed', False):
2447             if not self.get_param('noplaylist'):
2448                 multifeed_metadata_list = try_get(
2449                     player_response,
2450                     lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
2451                     compat_str)
2452                 if multifeed_metadata_list:
2453                     entries = []
2454                     feed_ids = []
2455                     for feed in multifeed_metadata_list.split(','):
2456                         # Unquote should take place before split on comma (,) since textual
2457                         # fields may contain comma as well (see
2458                         # https://github.com/ytdl-org/youtube-dl/issues/8536)
2459                         feed_data = compat_parse_qs(
2460                             compat_urllib_parse_unquote_plus(feed))
2461
2462                         def feed_entry(name):
2463                             return try_get(
2464                                 feed_data, lambda x: x[name][0], compat_str)
2465
2466                         feed_id = feed_entry('id')
2467                         if not feed_id:
2468                             continue
2469                         feed_title = feed_entry('title')
2470                         title = video_title
2471                         if feed_title:
2472                             title += ' (%s)' % feed_title
2473                         entries.append({
2474                             '_type': 'url_transparent',
2475                             'ie_key': 'Youtube',
2476                             'url': smuggle_url(
2477                                 base_url + 'watch?v=' + feed_data['id'][0],
2478                                 {'force_singlefeed': True}),
2479                             'title': title,
2480                         })
2481                         feed_ids.append(feed_id)
2482                     self.to_screen(
2483                         'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2484                         % (', '.join(feed_ids), video_id))
2485                     return self.playlist_result(
2486                         entries, video_id, video_title, video_description)
2487             else:
2488                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2489
2490         formats, itags, stream_ids = [], [], []
2491         itag_qualities = {}
2492         q = qualities([
2493             # "tiny" is the smallest video-only format. But some audio-only formats
2494             # was also labeled "tiny". It is not clear if such formats still exist
2495             'tiny', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2496             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2497         ])
2498
2499         streaming_data = player_response.get('streamingData') or {}
2500         streaming_formats = streaming_data.get('formats') or []
2501         streaming_formats.extend(streaming_data.get('adaptiveFormats') or [])
2502         streaming_formats.extend(ytm_streaming_data.get('formats') or [])
2503         streaming_formats.extend(ytm_streaming_data.get('adaptiveFormats') or [])
2504
2505         for fmt in streaming_formats:
2506             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2507                 continue
2508
2509             itag = str_or_none(fmt.get('itag'))
2510             audio_track = fmt.get('audioTrack') or {}
2511             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2512             if stream_id in stream_ids:
2513                 continue
2514
2515             quality = fmt.get('quality')
2516             if quality == 'tiny' or not quality:
2517                 quality = fmt.get('audioQuality', '').lower() or quality
2518             if itag and quality:
2519                 itag_qualities[itag] = quality
2520             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2521             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2522             # number of fragment that would subsequently requested with (`&sq=N`)
2523             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2524                 continue
2525
2526             fmt_url = fmt.get('url')
2527             if not fmt_url:
2528                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2529                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2530                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2531                 if not (sc and fmt_url and encrypted_sig):
2532                     continue
2533                 if not player_url:
2534                     continue
2535                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2536                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2537                 fmt_url += '&' + sp + '=' + signature
2538
2539             if itag:
2540                 itags.append(itag)
2541                 stream_ids.append(stream_id)
2542
2543             tbr = float_or_none(
2544                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2545             dct = {
2546                 'asr': int_or_none(fmt.get('audioSampleRate')),
2547                 'filesize': int_or_none(fmt.get('contentLength')),
2548                 'format_id': itag,
2549                 'format_note': audio_track.get('displayName') or fmt.get('qualityLabel') or quality,
2550                 'fps': int_or_none(fmt.get('fps')),
2551                 'height': int_or_none(fmt.get('height')),
2552                 'quality': q(quality),
2553                 'tbr': tbr,
2554                 'url': fmt_url,
2555                 'width': fmt.get('width'),
2556                 'language': audio_track.get('id', '').split('.')[0],
2557             }
2558             mime_mobj = re.match(
2559                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2560             if mime_mobj:
2561                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2562                 dct.update(parse_codecs(mime_mobj.group(2)))
2563                 # The 3gp format in android client has a quality of "small",
2564                 # but is actually worse than all other formats
2565                 if dct['ext'] == '3gp':
2566                     dct['quality'] = q('tiny')
2567             no_audio = dct.get('acodec') == 'none'
2568             no_video = dct.get('vcodec') == 'none'
2569             if no_audio:
2570                 dct['vbr'] = tbr
2571             if no_video:
2572                 dct['abr'] = tbr
2573             if no_audio or no_video:
2574                 dct['downloader_options'] = {
2575                     # Youtube throttles chunks >~10M
2576                     'http_chunk_size': 10485760,
2577                 }
2578                 if dct.get('ext'):
2579                     dct['container'] = dct['ext'] + '_dash'
2580             formats.append(dct)
2581
2582         skip_manifests = self._configuration_arg('skip')
2583         get_dash = 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)
2584         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2585
2586         for sd in (streaming_data, ytm_streaming_data):
2587             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2588             if hls_manifest_url:
2589                 for f in self._extract_m3u8_formats(
2590                         hls_manifest_url, video_id, 'mp4', fatal=False):
2591                     itag = self._search_regex(
2592                         r'/itag/(\d+)', f['url'], 'itag', default=None)
2593                     if itag:
2594                         f['format_id'] = itag
2595                     formats.append(f)
2596
2597             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2598             if dash_manifest_url:
2599                 for f in self._extract_mpd_formats(
2600                         dash_manifest_url, video_id, fatal=False):
2601                     itag = f['format_id']
2602                     if itag in itags:
2603                         continue
2604                     if itag in itag_qualities:
2605                         f['quality'] = q(itag_qualities[itag])
2606                     filesize = int_or_none(self._search_regex(
2607                         r'/clen/(\d+)', f.get('fragment_base_url')
2608                         or f['url'], 'file size', default=None))
2609                     if filesize:
2610                         f['filesize'] = filesize
2611                     formats.append(f)
2612
2613         if not formats:
2614             if not self.get_param('allow_unplayable_formats') and streaming_data.get('licenseInfos'):
2615                 self.raise_no_formats(
2616                     'This video is DRM protected.', expected=True)
2617             pemr = try_get(
2618                 playability_status,
2619                 lambda x: x['errorScreen']['playerErrorMessageRenderer'],
2620                 dict) or {}
2621             reason = get_text(pemr.get('reason')) or playability_status.get('reason')
2622             subreason = pemr.get('subreason')
2623             if subreason:
2624                 subreason = clean_html(get_text(subreason))
2625                 if subreason == 'The uploader has not made this video available in your country.':
2626                     countries = microformat.get('availableCountries')
2627                     if not countries:
2628                         regions_allowed = search_meta('regionsAllowed')
2629                         countries = regions_allowed.split(',') if regions_allowed else None
2630                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2631                 reason += '\n' + subreason
2632             if reason:
2633                 self.raise_no_formats(reason, expected=True)
2634
2635         self._sort_formats(formats)
2636
2637         keywords = video_details.get('keywords') or []
2638         if not keywords and webpage:
2639             keywords = [
2640                 unescapeHTML(m.group('content'))
2641                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2642         for keyword in keywords:
2643             if keyword.startswith('yt:stretch='):
2644                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2645                 if mobj:
2646                     # NB: float is intentional for forcing float division
2647                     w, h = (float(v) for v in mobj.groups())
2648                     if w > 0 and h > 0:
2649                         ratio = w / h
2650                         for f in formats:
2651                             if f.get('vcodec') != 'none':
2652                                 f['stretched_ratio'] = ratio
2653                         break
2654
2655         thumbnails = []
2656         for container in (video_details, microformat):
2657             for thumbnail in (try_get(
2658                     container,
2659                     lambda x: x['thumbnail']['thumbnails'], list) or []):
2660                 thumbnail_url = thumbnail.get('url')
2661                 if not thumbnail_url:
2662                     continue
2663                 # Sometimes youtube gives a wrong thumbnail URL. See:
2664                 # https://github.com/yt-dlp/yt-dlp/issues/233
2665                 # https://github.com/ytdl-org/youtube-dl/issues/28023
2666                 if 'maxresdefault' in thumbnail_url:
2667                     thumbnail_url = thumbnail_url.split('?')[0]
2668                 thumbnails.append({
2669                     'url': thumbnail_url,
2670                     'height': int_or_none(thumbnail.get('height')),
2671                     'width': int_or_none(thumbnail.get('width')),
2672                     'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2673                 })
2674         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2675         if thumbnail_url:
2676             thumbnails.append({
2677                 'url': thumbnail_url,
2678                 'preference': 1 if 'maxresdefault' in thumbnail_url else -1
2679             })
2680         # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
2681         # See: https://github.com/ytdl-org/youtube-dl/issues/29049
2682         thumbnails.append({
2683             'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
2684             'preference': 1,
2685         })
2686         self._remove_duplicate_formats(thumbnails)
2687
2688         category = microformat.get('category') or search_meta('genre')
2689         channel_id = video_details.get('channelId') \
2690             or microformat.get('externalChannelId') \
2691             or search_meta('channelId')
2692         duration = int_or_none(
2693             video_details.get('lengthSeconds')
2694             or microformat.get('lengthSeconds')) \
2695             or parse_duration(search_meta('duration'))
2696         is_live = video_details.get('isLive')
2697         is_upcoming = video_details.get('isUpcoming')
2698         owner_profile_url = microformat.get('ownerProfileUrl')
2699
2700         info = {
2701             'id': video_id,
2702             'title': self._live_title(video_title) if is_live else video_title,
2703             'formats': formats,
2704             'thumbnails': thumbnails,
2705             'description': video_description,
2706             'upload_date': unified_strdate(
2707                 microformat.get('uploadDate')
2708                 or search_meta('uploadDate')),
2709             'uploader': video_details['author'],
2710             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2711             'uploader_url': owner_profile_url,
2712             'channel_id': channel_id,
2713             'channel_url': 'https://www.youtube.com/channel/' + channel_id if channel_id else None,
2714             'duration': duration,
2715             'view_count': int_or_none(
2716                 video_details.get('viewCount')
2717                 or microformat.get('viewCount')
2718                 or search_meta('interactionCount')),
2719             'average_rating': float_or_none(video_details.get('averageRating')),
2720             'age_limit': 18 if (
2721                 microformat.get('isFamilySafe') is False
2722                 or search_meta('isFamilyFriendly') == 'false'
2723                 or search_meta('og:restrictions:age') == '18+') else 0,
2724             'webpage_url': webpage_url,
2725             'categories': [category] if category else None,
2726             'tags': keywords,
2727             'is_live': is_live,
2728             'playable_in_embed': playability_status.get('playableInEmbed'),
2729             'was_live': video_details.get('isLiveContent'),
2730         }
2731
2732         pctr = try_get(
2733             player_response,
2734             lambda x: x['captions']['playerCaptionsTracklistRenderer'], dict)
2735         subtitles = {}
2736         if pctr:
2737             def process_language(container, base_url, lang_code, sub_name, query):
2738                 lang_subs = container.setdefault(lang_code, [])
2739                 for fmt in self._SUBTITLE_FORMATS:
2740                     query.update({
2741                         'fmt': fmt,
2742                     })
2743                     lang_subs.append({
2744                         'ext': fmt,
2745                         'url': update_url_query(base_url, query),
2746                         'name': sub_name,
2747                     })
2748
2749             for caption_track in (pctr.get('captionTracks') or []):
2750                 base_url = caption_track.get('baseUrl')
2751                 if not base_url:
2752                     continue
2753                 if caption_track.get('kind') != 'asr':
2754                     lang_code = (
2755                         remove_start(caption_track.get('vssId') or '', '.').replace('.', '-')
2756                         or caption_track.get('languageCode'))
2757                     if not lang_code:
2758                         continue
2759                     process_language(
2760                         subtitles, base_url, lang_code,
2761                         try_get(caption_track, lambda x: x['name']['simpleText']),
2762                         {})
2763                     continue
2764                 automatic_captions = {}
2765                 for translation_language in (pctr.get('translationLanguages') or []):
2766                     translation_language_code = translation_language.get('languageCode')
2767                     if not translation_language_code:
2768                         continue
2769                     process_language(
2770                         automatic_captions, base_url, translation_language_code,
2771                         try_get(translation_language, (
2772                             lambda x: x['languageName']['simpleText'],
2773                             lambda x: x['languageName']['runs'][0]['text'])),
2774                         {'tlang': translation_language_code})
2775                 info['automatic_captions'] = automatic_captions
2776         info['subtitles'] = subtitles
2777
2778         parsed_url = compat_urllib_parse_urlparse(url)
2779         for component in [parsed_url.fragment, parsed_url.query]:
2780             query = compat_parse_qs(component)
2781             for k, v in query.items():
2782                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2783                     d_k += '_time'
2784                     if d_k not in info and k in s_ks:
2785                         info[d_k] = parse_duration(query[k][0])
2786
2787         # Youtube Music Auto-generated description
2788         if video_description:
2789             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2790             if mobj:
2791                 release_year = mobj.group('release_year')
2792                 release_date = mobj.group('release_date')
2793                 if release_date:
2794                     release_date = release_date.replace('-', '')
2795                     if not release_year:
2796                         release_year = release_date[:4]
2797                 info.update({
2798                     'album': mobj.group('album'.strip()),
2799                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2800                     'track': mobj.group('track').strip(),
2801                     'release_date': release_date,
2802                     'release_year': int_or_none(release_year),
2803                 })
2804
2805         initial_data = None
2806         if webpage:
2807             initial_data = self._extract_yt_initial_variable(
2808                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2809                 'yt initial data')
2810         if not initial_data:
2811             initial_data = self._extract_response(
2812                 item_id=video_id, ep='next', fatal=False,
2813                 ytcfg=ytcfg, headers=headers, query={'videoId': video_id},
2814                 note='Downloading initial data API JSON')
2815
2816         try:
2817             # This will error if there is no livechat
2818             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2819             info['subtitles']['live_chat'] = [{
2820                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2821                 'video_id': video_id,
2822                 'ext': 'json',
2823                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2824             }]
2825         except (KeyError, IndexError, TypeError):
2826             pass
2827
2828         if initial_data:
2829             chapters = self._extract_chapters_from_json(
2830                 initial_data, video_id, duration)
2831             if not chapters:
2832                 for engagment_pannel in (initial_data.get('engagementPanels') or []):
2833                     contents = try_get(
2834                         engagment_pannel, lambda x: x['engagementPanelSectionListRenderer']['content']['macroMarkersListRenderer']['contents'],
2835                         list)
2836                     if not contents:
2837                         continue
2838
2839                     def chapter_time(mmlir):
2840                         return parse_duration(
2841                             get_text(mmlir.get('timeDescription')))
2842
2843                     chapters = []
2844                     for next_num, content in enumerate(contents, start=1):
2845                         mmlir = content.get('macroMarkersListItemRenderer') or {}
2846                         start_time = chapter_time(mmlir)
2847                         end_time = chapter_time(try_get(
2848                             contents, lambda x: x[next_num]['macroMarkersListItemRenderer'])) \
2849                             if next_num < len(contents) else duration
2850                         if start_time is None or end_time is None:
2851                             continue
2852                         chapters.append({
2853                             'start_time': start_time,
2854                             'end_time': end_time,
2855                             'title': get_text(mmlir.get('title')),
2856                         })
2857                     if chapters:
2858                         break
2859             if chapters:
2860                 info['chapters'] = chapters
2861
2862             contents = try_get(
2863                 initial_data,
2864                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
2865                 list) or []
2866             for content in contents:
2867                 vpir = content.get('videoPrimaryInfoRenderer')
2868                 if vpir:
2869                     stl = vpir.get('superTitleLink')
2870                     if stl:
2871                         stl = get_text(stl)
2872                         if try_get(
2873                                 vpir,
2874                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
2875                             info['location'] = stl
2876                         else:
2877                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
2878                             if mobj:
2879                                 info.update({
2880                                     'series': mobj.group(1),
2881                                     'season_number': int(mobj.group(2)),
2882                                     'episode_number': int(mobj.group(3)),
2883                                 })
2884                     for tlb in (try_get(
2885                             vpir,
2886                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
2887                             list) or []):
2888                         tbr = tlb.get('toggleButtonRenderer') or {}
2889                         for getter, regex in [(
2890                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
2891                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
2892                                     lambda x: x['accessibility'],
2893                                     lambda x: x['accessibilityData']['accessibilityData'],
2894                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
2895                             label = (try_get(tbr, getter, dict) or {}).get('label')
2896                             if label:
2897                                 mobj = re.match(regex, label)
2898                                 if mobj:
2899                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
2900                                     break
2901                     sbr_tooltip = try_get(
2902                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
2903                     if sbr_tooltip:
2904                         like_count, dislike_count = sbr_tooltip.split(' / ')
2905                         info.update({
2906                             'like_count': str_to_int(like_count),
2907                             'dislike_count': str_to_int(dislike_count),
2908                         })
2909                 vsir = content.get('videoSecondaryInfoRenderer')
2910                 if vsir:
2911                     info['channel'] = get_text(try_get(
2912                         vsir,
2913                         lambda x: x['owner']['videoOwnerRenderer']['title'],
2914                         dict))
2915                     rows = try_get(
2916                         vsir,
2917                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
2918                         list) or []
2919                     multiple_songs = False
2920                     for row in rows:
2921                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
2922                             multiple_songs = True
2923                             break
2924                     for row in rows:
2925                         mrr = row.get('metadataRowRenderer') or {}
2926                         mrr_title = mrr.get('title')
2927                         if not mrr_title:
2928                             continue
2929                         mrr_title = get_text(mrr['title'])
2930                         mrr_contents_text = get_text(mrr['contents'][0])
2931                         if mrr_title == 'License':
2932                             info['license'] = mrr_contents_text
2933                         elif not multiple_songs:
2934                             if mrr_title == 'Album':
2935                                 info['album'] = mrr_contents_text
2936                             elif mrr_title == 'Artist':
2937                                 info['artist'] = mrr_contents_text
2938                             elif mrr_title == 'Song':
2939                                 info['track'] = mrr_contents_text
2940
2941         fallbacks = {
2942             'channel': 'uploader',
2943             'channel_id': 'uploader_id',
2944             'channel_url': 'uploader_url',
2945         }
2946         for to, frm in fallbacks.items():
2947             if not info.get(to):
2948                 info[to] = info.get(frm)
2949
2950         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
2951             v = info.get(s_k)
2952             if v:
2953                 info[d_k] = v
2954
2955         is_private = bool_or_none(video_details.get('isPrivate'))
2956         is_unlisted = bool_or_none(microformat.get('isUnlisted'))
2957         is_membersonly = None
2958         is_premium = None
2959         if initial_data and is_private is not None:
2960             is_membersonly = False
2961             is_premium = False
2962             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list)
2963             for content in contents or []:
2964                 badges = try_get(content, lambda x: x['videoPrimaryInfoRenderer']['badges'], list)
2965                 for badge in badges or []:
2966                     label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label']) or ''
2967                     if label.lower() == 'members only':
2968                         is_membersonly = True
2969                         break
2970                     elif label.lower() == 'premium':
2971                         is_premium = True
2972                         break
2973                 if is_membersonly or is_premium:
2974                     break
2975
2976         # TODO: Add this for playlists
2977         info['availability'] = self._availability(
2978             is_private=is_private,
2979             needs_premium=is_premium,
2980             needs_subscription=is_membersonly,
2981             needs_auth=info['age_limit'] >= 18,
2982             is_unlisted=None if is_private is None else is_unlisted)
2983
2984         # get xsrf for annotations or comments
2985         get_annotations = self.get_param('writeannotations', False)
2986         get_comments = self.get_param('getcomments', False)
2987         if get_annotations or get_comments:
2988             xsrf_token = None
2989             ytcfg = self._extract_ytcfg(video_id, webpage)
2990             if ytcfg:
2991                 xsrf_token = try_get(ytcfg, lambda x: x['XSRF_TOKEN'], compat_str)
2992             if not xsrf_token:
2993                 xsrf_token = self._search_regex(
2994                     r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
2995                     webpage, 'xsrf token', group='xsrf_token', fatal=False)
2996
2997         # annotations
2998         if get_annotations:
2999             invideo_url = try_get(
3000                 player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
3001             if xsrf_token and invideo_url:
3002                 xsrf_field_name = None
3003                 if ytcfg:
3004                     xsrf_field_name = try_get(ytcfg, lambda x: x['XSRF_FIELD_NAME'], compat_str)
3005                 if not xsrf_field_name:
3006                     xsrf_field_name = self._search_regex(
3007                         r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
3008                         webpage, 'xsrf field name',
3009                         group='xsrf_field_name', default='session_token')
3010                 info['annotations'] = self._download_webpage(
3011                     self._proto_relative_url(invideo_url),
3012                     video_id, note='Downloading annotations',
3013                     errnote='Unable to download video annotations', fatal=False,
3014                     data=urlencode_postdata({xsrf_field_name: xsrf_token}))
3015
3016         if get_comments:
3017             info['__post_extractor'] = lambda: self._extract_comments(ytcfg, video_id, contents, webpage)
3018
3019         self.mark_watched(video_id, player_response)
3020
3021         return info
3022
3023
3024 class YoutubeTabIE(YoutubeBaseInfoExtractor):
3025     IE_DESC = 'YouTube.com tab'
3026     _VALID_URL = r'''(?x)
3027                     https?://
3028                         (?:\w+\.)?
3029                         (?:
3030                             youtube(?:kids)?\.com|
3031                             invidio\.us
3032                         )/
3033                         (?:
3034                             (?P<channel_type>channel|c|user|browse)/|
3035                             (?P<not_channel>
3036                                 feed/|hashtag/|
3037                                 (?:playlist|watch)\?.*?\blist=
3038                             )|
3039                             (?!(?:%s)\b)  # Direct URLs
3040                         )
3041                         (?P<id>[^/?\#&]+)
3042                     ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
3043     IE_NAME = 'youtube:tab'
3044
3045     _TESTS = [{
3046         'note': 'playlists, multipage',
3047         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3048         'playlist_mincount': 94,
3049         'info_dict': {
3050             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3051             'title': 'Игорь Клейнер - Playlists',
3052             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3053             'uploader': 'Игорь Клейнер',
3054             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3055         },
3056     }, {
3057         'note': 'playlists, multipage, different order',
3058         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3059         'playlist_mincount': 94,
3060         'info_dict': {
3061             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3062             'title': 'Игорь Клейнер - Playlists',
3063             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3064             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3065             'uploader': 'Игорь Клейнер',
3066         },
3067     }, {
3068         'note': 'playlists, series',
3069         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3070         'playlist_mincount': 5,
3071         'info_dict': {
3072             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3073             'title': '3Blue1Brown - Playlists',
3074             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3075             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3076             'uploader': '3Blue1Brown',
3077         },
3078     }, {
3079         'note': 'playlists, singlepage',
3080         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3081         'playlist_mincount': 4,
3082         'info_dict': {
3083             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3084             'title': 'ThirstForScience - Playlists',
3085             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3086             'uploader': 'ThirstForScience',
3087             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3088         }
3089     }, {
3090         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3091         'only_matching': True,
3092     }, {
3093         'note': 'basic, single video playlist',
3094         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3095         'info_dict': {
3096             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3097             'uploader': 'Sergey M.',
3098             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3099             'title': 'youtube-dl public playlist',
3100         },
3101         'playlist_count': 1,
3102     }, {
3103         'note': 'empty playlist',
3104         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3105         'info_dict': {
3106             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3107             'uploader': 'Sergey M.',
3108             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3109             'title': 'youtube-dl empty playlist',
3110         },
3111         'playlist_count': 0,
3112     }, {
3113         'note': 'Home tab',
3114         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3115         'info_dict': {
3116             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3117             'title': 'lex will - Home',
3118             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3119             'uploader': 'lex will',
3120             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3121         },
3122         'playlist_mincount': 2,
3123     }, {
3124         'note': 'Videos tab',
3125         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3126         'info_dict': {
3127             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3128             'title': 'lex will - Videos',
3129             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3130             'uploader': 'lex will',
3131             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3132         },
3133         'playlist_mincount': 975,
3134     }, {
3135         'note': 'Videos tab, sorted by popular',
3136         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3137         'info_dict': {
3138             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3139             'title': 'lex will - Videos',
3140             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3141             'uploader': 'lex will',
3142             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3143         },
3144         'playlist_mincount': 199,
3145     }, {
3146         'note': 'Playlists tab',
3147         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3148         'info_dict': {
3149             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3150             'title': 'lex will - Playlists',
3151             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3152             'uploader': 'lex will',
3153             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3154         },
3155         'playlist_mincount': 17,
3156     }, {
3157         'note': 'Community tab',
3158         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3159         'info_dict': {
3160             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3161             'title': 'lex will - Community',
3162             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3163             'uploader': 'lex will',
3164             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3165         },
3166         'playlist_mincount': 18,
3167     }, {
3168         'note': 'Channels tab',
3169         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3170         'info_dict': {
3171             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3172             'title': 'lex will - Channels',
3173             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3174             'uploader': 'lex will',
3175             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3176         },
3177         'playlist_mincount': 12,
3178     }, {
3179         'note': 'Search tab',
3180         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3181         'playlist_mincount': 40,
3182         'info_dict': {
3183             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3184             'title': '3Blue1Brown - Search - linear algebra',
3185             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3186             'uploader': '3Blue1Brown',
3187             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3188         },
3189     }, {
3190         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3191         'only_matching': True,
3192     }, {
3193         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3194         'only_matching': True,
3195     }, {
3196         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3197         'only_matching': True,
3198     }, {
3199         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3200         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3201         'info_dict': {
3202             'title': '29C3: Not my department',
3203             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3204             'uploader': 'Christiaan008',
3205             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3206             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3207         },
3208         'playlist_count': 96,
3209     }, {
3210         'note': 'Large playlist',
3211         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3212         'info_dict': {
3213             'title': 'Uploads from Cauchemar',
3214             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3215             'uploader': 'Cauchemar',
3216             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3217         },
3218         'playlist_mincount': 1123,
3219     }, {
3220         'note': 'even larger playlist, 8832 videos',
3221         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3222         'only_matching': True,
3223     }, {
3224         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3225         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3226         'info_dict': {
3227             'title': 'Uploads from Interstellar Movie',
3228             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3229             'uploader': 'Interstellar Movie',
3230             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3231         },
3232         'playlist_mincount': 21,
3233     }, {
3234         'note': 'Playlist with "show unavailable videos" button',
3235         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3236         'info_dict': {
3237             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3238             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3239             'uploader': 'Phim Siêu Nhân Nhật Bản',
3240             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3241         },
3242         'playlist_mincount': 200,
3243     }, {
3244         'note': 'Playlist with unavailable videos in page 7',
3245         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3246         'info_dict': {
3247             'title': 'Uploads from BlankTV',
3248             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
3249             'uploader': 'BlankTV',
3250             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
3251         },
3252         'playlist_mincount': 1000,
3253     }, {
3254         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
3255         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3256         'info_dict': {
3257             'title': 'Data Analysis with Dr Mike Pound',
3258             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
3259             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
3260             'uploader': 'Computerphile',
3261             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
3262         },
3263         'playlist_mincount': 11,
3264     }, {
3265         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3266         'only_matching': True,
3267     }, {
3268         'note': 'Playlist URL that does not actually serve a playlist',
3269         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
3270         'info_dict': {
3271             'id': 'FqZTN594JQw',
3272             'ext': 'webm',
3273             'title': "Smiley's People 01 detective, Adventure Series, Action",
3274             'uploader': 'STREEM',
3275             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
3276             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
3277             'upload_date': '20150526',
3278             'license': 'Standard YouTube License',
3279             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
3280             'categories': ['People & Blogs'],
3281             'tags': list,
3282             'view_count': int,
3283             'like_count': int,
3284             'dislike_count': int,
3285         },
3286         'params': {
3287             'skip_download': True,
3288         },
3289         'skip': 'This video is not available.',
3290         'add_ie': [YoutubeIE.ie_key()],
3291     }, {
3292         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
3293         'only_matching': True,
3294     }, {
3295         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
3296         'only_matching': True,
3297     }, {
3298         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
3299         'info_dict': {
3300             'id': 'X1whbWASnNQ',  # This will keep changing
3301             'ext': 'mp4',
3302             'title': compat_str,
3303             'uploader': 'Sky News',
3304             'uploader_id': 'skynews',
3305             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
3306             'upload_date': r're:\d{8}',
3307             'description': compat_str,
3308             'categories': ['News & Politics'],
3309             'tags': list,
3310             'like_count': int,
3311             'dislike_count': int,
3312         },
3313         'params': {
3314             'skip_download': True,
3315         },
3316         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
3317     }, {
3318         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
3319         'info_dict': {
3320             'id': 'a48o2S1cPoo',
3321             'ext': 'mp4',
3322             'title': 'The Young Turks - Live Main Show',
3323             'uploader': 'The Young Turks',
3324             'uploader_id': 'TheYoungTurks',
3325             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
3326             'upload_date': '20150715',
3327             'license': 'Standard YouTube License',
3328             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
3329             'categories': ['News & Politics'],
3330             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
3331             'like_count': int,
3332             'dislike_count': int,
3333         },
3334         'params': {
3335             'skip_download': True,
3336         },
3337         'only_matching': True,
3338     }, {
3339         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
3340         'only_matching': True,
3341     }, {
3342         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
3343         'only_matching': True,
3344     }, {
3345         'note': 'A channel that is not live. Should raise error',
3346         'url': 'https://www.youtube.com/user/numberphile/live',
3347         'only_matching': True,
3348     }, {
3349         'url': 'https://www.youtube.com/feed/trending',
3350         'only_matching': True,
3351     }, {
3352         'url': 'https://www.youtube.com/feed/library',
3353         'only_matching': True,
3354     }, {
3355         'url': 'https://www.youtube.com/feed/history',
3356         'only_matching': True,
3357     }, {
3358         'url': 'https://www.youtube.com/feed/subscriptions',
3359         'only_matching': True,
3360     }, {
3361         'url': 'https://www.youtube.com/feed/watch_later',
3362         'only_matching': True,
3363     }, {
3364         'note': 'Recommended - redirects to home page',
3365         'url': 'https://www.youtube.com/feed/recommended',
3366         'only_matching': True,
3367     }, {
3368         'note': 'inline playlist with not always working continuations',
3369         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
3370         'only_matching': True,
3371     }, {
3372         'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
3373         'only_matching': True,
3374     }, {
3375         'url': 'https://www.youtube.com/course',
3376         'only_matching': True,
3377     }, {
3378         'url': 'https://www.youtube.com/zsecurity',
3379         'only_matching': True,
3380     }, {
3381         'url': 'http://www.youtube.com/NASAgovVideo/videos',
3382         'only_matching': True,
3383     }, {
3384         'url': 'https://www.youtube.com/TheYoungTurks/live',
3385         'only_matching': True,
3386     }, {
3387         'url': 'https://www.youtube.com/hashtag/cctv9',
3388         'info_dict': {
3389             'id': 'cctv9',
3390             'title': '#cctv9',
3391         },
3392         'playlist_mincount': 350,
3393     }, {
3394         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
3395         'only_matching': True,
3396     }, {
3397         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
3398         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3399         'only_matching': True
3400     }, {
3401         'note': '/browse/ should redirect to /channel/',
3402         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
3403         'only_matching': True
3404     }, {
3405         'note': 'VLPL, should redirect to playlist?list=PL...',
3406         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3407         'info_dict': {
3408             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
3409             'uploader': 'NoCopyrightSounds',
3410             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
3411             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
3412             'title': 'NCS Releases',
3413         },
3414         'playlist_mincount': 166,
3415     }, {
3416         'note': 'Topic, should redirect to playlist?list=UU...',
3417         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
3418         'info_dict': {
3419             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
3420             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
3421             'title': 'Uploads from Royalty Free Music - Topic',
3422             'uploader': 'Royalty Free Music - Topic',
3423         },
3424         'expected_warnings': [
3425             'A channel/user page was given',
3426             'The URL does not have a videos tab',
3427         ],
3428         'playlist_mincount': 101,
3429     }, {
3430         'note': 'Topic without a UU playlist',
3431         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
3432         'info_dict': {
3433             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
3434             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
3435         },
3436         'expected_warnings': [
3437             'A channel/user page was given',
3438             'The URL does not have a videos tab',
3439             'Falling back to channel URL',
3440         ],
3441         'playlist_mincount': 9,
3442     }, {
3443         'note': 'Youtube music Album',
3444         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
3445         'info_dict': {
3446             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
3447             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
3448         },
3449         'playlist_count': 50,
3450     }]
3451
3452     @classmethod
3453     def suitable(cls, url):
3454         return False if YoutubeIE.suitable(url) else super(
3455             YoutubeTabIE, cls).suitable(url)
3456
3457     def _extract_channel_id(self, webpage):
3458         channel_id = self._html_search_meta(
3459             'channelId', webpage, 'channel id', default=None)
3460         if channel_id:
3461             return channel_id
3462         channel_url = self._html_search_meta(
3463             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3464              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3465              'twitter:app:url:googleplay'), webpage, 'channel url')
3466         return self._search_regex(
3467             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3468             channel_url, 'channel id')
3469
3470     @staticmethod
3471     def _extract_basic_item_renderer(item):
3472         # Modified from _extract_grid_item_renderer
3473         known_basic_renderers = (
3474             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3475         )
3476         for key, renderer in item.items():
3477             if not isinstance(renderer, dict):
3478                 continue
3479             elif key in known_basic_renderers:
3480                 return renderer
3481             elif key.startswith('grid') and key.endswith('Renderer'):
3482                 return renderer
3483
3484     def _grid_entries(self, grid_renderer):
3485         for item in grid_renderer['items']:
3486             if not isinstance(item, dict):
3487                 continue
3488             renderer = self._extract_basic_item_renderer(item)
3489             if not isinstance(renderer, dict):
3490                 continue
3491             title = try_get(
3492                 renderer, (lambda x: x['title']['runs'][0]['text'],
3493                            lambda x: x['title']['simpleText']), compat_str)
3494             # playlist
3495             playlist_id = renderer.get('playlistId')
3496             if playlist_id:
3497                 yield self.url_result(
3498                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3499                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3500                     video_title=title)
3501                 continue
3502             # video
3503             video_id = renderer.get('videoId')
3504             if video_id:
3505                 yield self._extract_video(renderer)
3506                 continue
3507             # channel
3508             channel_id = renderer.get('channelId')
3509             if channel_id:
3510                 title = try_get(
3511                     renderer, lambda x: x['title']['simpleText'], compat_str)
3512                 yield self.url_result(
3513                     'https://www.youtube.com/channel/%s' % channel_id,
3514                     ie=YoutubeTabIE.ie_key(), video_title=title)
3515                 continue
3516             # generic endpoint URL support
3517             ep_url = urljoin('https://www.youtube.com/', try_get(
3518                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3519                 compat_str))
3520             if ep_url:
3521                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3522                     if ie.suitable(ep_url):
3523                         yield self.url_result(
3524                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3525                         break
3526
3527     def _shelf_entries_from_content(self, shelf_renderer):
3528         content = shelf_renderer.get('content')
3529         if not isinstance(content, dict):
3530             return
3531         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3532         if renderer:
3533             # TODO: add support for nested playlists so each shelf is processed
3534             # as separate playlist
3535             # TODO: this includes only first N items
3536             for entry in self._grid_entries(renderer):
3537                 yield entry
3538         renderer = content.get('horizontalListRenderer')
3539         if renderer:
3540             # TODO
3541             pass
3542
3543     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3544         ep = try_get(
3545             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3546             compat_str)
3547         shelf_url = urljoin('https://www.youtube.com', ep)
3548         if shelf_url:
3549             # Skipping links to another channels, note that checking for
3550             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3551             # will not work
3552             if skip_channels and '/channels?' in shelf_url:
3553                 return
3554             title = try_get(
3555                 shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str)
3556             yield self.url_result(shelf_url, video_title=title)
3557         # Shelf may not contain shelf URL, fallback to extraction from content
3558         for entry in self._shelf_entries_from_content(shelf_renderer):
3559             yield entry
3560
3561     def _playlist_entries(self, video_list_renderer):
3562         for content in video_list_renderer['contents']:
3563             if not isinstance(content, dict):
3564                 continue
3565             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3566             if not isinstance(renderer, dict):
3567                 continue
3568             video_id = renderer.get('videoId')
3569             if not video_id:
3570                 continue
3571             yield self._extract_video(renderer)
3572
3573     def _rich_entries(self, rich_grid_renderer):
3574         renderer = try_get(
3575             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3576         video_id = renderer.get('videoId')
3577         if not video_id:
3578             return
3579         yield self._extract_video(renderer)
3580
3581     def _video_entry(self, video_renderer):
3582         video_id = video_renderer.get('videoId')
3583         if video_id:
3584             return self._extract_video(video_renderer)
3585
3586     def _post_thread_entries(self, post_thread_renderer):
3587         post_renderer = try_get(
3588             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3589         if not post_renderer:
3590             return
3591         # video attachment
3592         video_renderer = try_get(
3593             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3594         video_id = video_renderer.get('videoId')
3595         if video_id:
3596             entry = self._extract_video(video_renderer)
3597             if entry:
3598                 yield entry
3599         # playlist attachment
3600         playlist_id = try_get(
3601             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3602         if playlist_id:
3603             yield self.url_result(
3604                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3605                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3606         # inline video links
3607         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3608         for run in runs:
3609             if not isinstance(run, dict):
3610                 continue
3611             ep_url = try_get(
3612                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3613             if not ep_url:
3614                 continue
3615             if not YoutubeIE.suitable(ep_url):
3616                 continue
3617             ep_video_id = YoutubeIE._match_id(ep_url)
3618             if video_id == ep_video_id:
3619                 continue
3620             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3621
3622     def _post_thread_continuation_entries(self, post_thread_continuation):
3623         contents = post_thread_continuation.get('contents')
3624         if not isinstance(contents, list):
3625             return
3626         for content in contents:
3627             renderer = content.get('backstagePostThreadRenderer')
3628             if not isinstance(renderer, dict):
3629                 continue
3630             for entry in self._post_thread_entries(renderer):
3631                 yield entry
3632
3633     r''' # unused
3634     def _rich_grid_entries(self, contents):
3635         for content in contents:
3636             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3637             if video_renderer:
3638                 entry = self._video_entry(video_renderer)
3639                 if entry:
3640                     yield entry
3641     '''
3642     def _entries(self, tab, item_id, identity_token, account_syncid, ytcfg):
3643
3644         def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
3645             contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3646             for content in contents:
3647                 if not isinstance(content, dict):
3648                     continue
3649                 is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3650                 if not is_renderer:
3651                     renderer = content.get('richItemRenderer')
3652                     if renderer:
3653                         for entry in self._rich_entries(renderer):
3654                             yield entry
3655                         continuation_list[0] = self._extract_continuation(parent_renderer)
3656                     continue
3657                 isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3658                 for isr_content in isr_contents:
3659                     if not isinstance(isr_content, dict):
3660                         continue
3661
3662                     known_renderers = {
3663                         'playlistVideoListRenderer': self._playlist_entries,
3664                         'gridRenderer': self._grid_entries,
3665                         'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'),
3666                         'backstagePostThreadRenderer': self._post_thread_entries,
3667                         'videoRenderer': lambda x: [self._video_entry(x)],
3668                     }
3669                     for key, renderer in isr_content.items():
3670                         if key not in known_renderers:
3671                             continue
3672                         for entry in known_renderers[key](renderer):
3673                             if entry:
3674                                 yield entry
3675                         continuation_list[0] = self._extract_continuation(renderer)
3676                         break
3677
3678                 if not continuation_list[0]:
3679                     continuation_list[0] = self._extract_continuation(is_renderer)
3680
3681             if not continuation_list[0]:
3682                 continuation_list[0] = self._extract_continuation(parent_renderer)
3683
3684         continuation_list = [None]  # Python 2 doesnot support nonlocal
3685         tab_content = try_get(tab, lambda x: x['content'], dict)
3686         if not tab_content:
3687             return
3688         parent_renderer = (
3689             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3690             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3691         for entry in extract_entries(parent_renderer):
3692             yield entry
3693         continuation = continuation_list[0]
3694         context = self._extract_context(ytcfg)
3695         visitor_data = try_get(context, lambda x: x['client']['visitorData'], compat_str)
3696
3697         for page_num in itertools.count(1):
3698             if not continuation:
3699                 break
3700             query = {
3701                 'continuation': continuation['continuation'],
3702                 'clickTracking': {'clickTrackingParams': continuation['itct']}
3703             }
3704             headers = self._generate_api_headers(ytcfg, identity_token, account_syncid, visitor_data)
3705             response = self._extract_response(
3706                 item_id='%s page %s' % (item_id, page_num),
3707                 query=query, headers=headers, ytcfg=ytcfg,
3708                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3709
3710             if not response:
3711                 break
3712             visitor_data = try_get(
3713                 response, lambda x: x['responseContext']['visitorData'], compat_str) or visitor_data
3714
3715             known_continuation_renderers = {
3716                 'playlistVideoListContinuation': self._playlist_entries,
3717                 'gridContinuation': self._grid_entries,
3718                 'itemSectionContinuation': self._post_thread_continuation_entries,
3719                 'sectionListContinuation': extract_entries,  # for feeds
3720             }
3721             continuation_contents = try_get(
3722                 response, lambda x: x['continuationContents'], dict) or {}
3723             continuation_renderer = None
3724             for key, value in continuation_contents.items():
3725                 if key not in known_continuation_renderers:
3726                     continue
3727                 continuation_renderer = value
3728                 continuation_list = [None]
3729                 for entry in known_continuation_renderers[key](continuation_renderer):
3730                     yield entry
3731                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3732                 break
3733             if continuation_renderer:
3734                 continue
3735
3736             known_renderers = {
3737                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3738                 'gridVideoRenderer': (self._grid_entries, 'items'),
3739                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3740                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3741                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3742                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3743             }
3744             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3745             continuation_items = try_get(
3746                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3747             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3748             video_items_renderer = None
3749             for key, value in continuation_item.items():
3750                 if key not in known_renderers:
3751                     continue
3752                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3753                 continuation_list = [None]
3754                 for entry in known_renderers[key][0](video_items_renderer):
3755                     yield entry
3756                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3757                 break
3758             if video_items_renderer:
3759                 continue
3760             break
3761
3762     @staticmethod
3763     def _extract_selected_tab(tabs):
3764         for tab in tabs:
3765             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3766             if renderer.get('selected') is True:
3767                 return renderer
3768         else:
3769             raise ExtractorError('Unable to find selected tab')
3770
3771     @staticmethod
3772     def _extract_uploader(data):
3773         uploader = {}
3774         sidebar_renderer = try_get(
3775             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3776         if sidebar_renderer:
3777             for item in sidebar_renderer:
3778                 if not isinstance(item, dict):
3779                     continue
3780                 renderer = item.get('playlistSidebarSecondaryInfoRenderer')
3781                 if not isinstance(renderer, dict):
3782                     continue
3783                 owner = try_get(
3784                     renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3785                 if owner:
3786                     uploader['uploader'] = owner.get('text')
3787                     uploader['uploader_id'] = try_get(
3788                         owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3789                     uploader['uploader_url'] = urljoin(
3790                         'https://www.youtube.com/',
3791                         try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3792         return {k: v for k, v in uploader.items() if v is not None}
3793
3794     def _extract_from_tabs(self, item_id, webpage, data, tabs):
3795         playlist_id = title = description = channel_url = channel_name = channel_id = None
3796         thumbnails_list = tags = []
3797
3798         selected_tab = self._extract_selected_tab(tabs)
3799         renderer = try_get(
3800             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3801         if renderer:
3802             channel_name = renderer.get('title')
3803             channel_url = renderer.get('channelUrl')
3804             channel_id = renderer.get('externalId')
3805         else:
3806             renderer = try_get(
3807                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3808
3809         if renderer:
3810             title = renderer.get('title')
3811             description = renderer.get('description', '')
3812             playlist_id = channel_id
3813             tags = renderer.get('keywords', '').split()
3814             thumbnails_list = (
3815                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3816                 or try_get(
3817                     data,
3818                     lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3819                     list)
3820                 or [])
3821
3822         thumbnails = []
3823         for t in thumbnails_list:
3824             if not isinstance(t, dict):
3825                 continue
3826             thumbnail_url = url_or_none(t.get('url'))
3827             if not thumbnail_url:
3828                 continue
3829             thumbnails.append({
3830                 'url': thumbnail_url,
3831                 'width': int_or_none(t.get('width')),
3832                 'height': int_or_none(t.get('height')),
3833             })
3834         if playlist_id is None:
3835             playlist_id = item_id
3836         if title is None:
3837             title = (
3838                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3839                 or playlist_id)
3840         title += format_field(selected_tab, 'title', ' - %s')
3841         title += format_field(selected_tab, 'expandedText', ' - %s')
3842
3843         metadata = {
3844             'playlist_id': playlist_id,
3845             'playlist_title': title,
3846             'playlist_description': description,
3847             'uploader': channel_name,
3848             'uploader_id': channel_id,
3849             'uploader_url': channel_url,
3850             'thumbnails': thumbnails,
3851             'tags': tags,
3852         }
3853         if not channel_id:
3854             metadata.update(self._extract_uploader(data))
3855         metadata.update({
3856             'channel': metadata['uploader'],
3857             'channel_id': metadata['uploader_id'],
3858             'channel_url': metadata['uploader_url']})
3859         return self.playlist_result(
3860             self._entries(
3861                 selected_tab, playlist_id,
3862                 self._extract_identity_token(webpage, item_id),
3863                 self._extract_account_syncid(data),
3864                 self._extract_ytcfg(item_id, webpage)),
3865             **metadata)
3866
3867     def _extract_mix_playlist(self, playlist, playlist_id, data, webpage):
3868         first_id = last_id = None
3869         ytcfg = self._extract_ytcfg(playlist_id, webpage)
3870         headers = self._generate_api_headers(
3871             ytcfg, account_syncid=self._extract_account_syncid(data),
3872             identity_token=self._extract_identity_token(webpage, item_id=playlist_id),
3873             visitor_data=try_get(self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3874         for page_num in itertools.count(1):
3875             videos = list(self._playlist_entries(playlist))
3876             if not videos:
3877                 return
3878             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3879             if start >= len(videos):
3880                 return
3881             for video in videos[start:]:
3882                 if video['id'] == first_id:
3883                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3884                     return
3885                 yield video
3886             first_id = first_id or videos[0]['id']
3887             last_id = videos[-1]['id']
3888             watch_endpoint = try_get(
3889                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3890             query = {
3891                 'playlistId': playlist_id,
3892                 'videoId': watch_endpoint.get('videoId') or last_id,
3893                 'index': watch_endpoint.get('index') or len(videos),
3894                 'params': watch_endpoint.get('params') or 'OAE%3D'
3895             }
3896             response = self._extract_response(
3897                 item_id='%s page %d' % (playlist_id, page_num),
3898                 query=query,
3899                 ep='next',
3900                 headers=headers,
3901                 check_get_keys='contents'
3902             )
3903             playlist = try_get(
3904                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3905
3906     def _extract_from_playlist(self, item_id, url, data, playlist, webpage):
3907         title = playlist.get('title') or try_get(
3908             data, lambda x: x['titleText']['simpleText'], compat_str)
3909         playlist_id = playlist.get('playlistId') or item_id
3910
3911         # Delegating everything except mix playlists to regular tab-based playlist URL
3912         playlist_url = urljoin(url, try_get(
3913             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3914             compat_str))
3915         if playlist_url and playlist_url != url:
3916             return self.url_result(
3917                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3918                 video_title=title)
3919
3920         return self.playlist_result(
3921             self._extract_mix_playlist(playlist, playlist_id, data, webpage),
3922             playlist_id=playlist_id, playlist_title=title)
3923
3924     def _reload_with_unavailable_videos(self, item_id, data, webpage):
3925         """
3926         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3927         """
3928         sidebar_renderer = try_get(
3929             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list)
3930         if not sidebar_renderer:
3931             return
3932         browse_id = params = None
3933         for item in sidebar_renderer:
3934             if not isinstance(item, dict):
3935                 continue
3936             renderer = item.get('playlistSidebarPrimaryInfoRenderer')
3937             menu_renderer = try_get(
3938                 renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3939             for menu_item in menu_renderer:
3940                 if not isinstance(menu_item, dict):
3941                     continue
3942                 nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3943                 text = try_get(
3944                     nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3945                 if not text or text.lower() != 'show unavailable videos':
3946                     continue
3947                 browse_endpoint = try_get(
3948                     nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3949                 browse_id = browse_endpoint.get('browseId')
3950                 params = browse_endpoint.get('params')
3951                 break
3952
3953             ytcfg = self._extract_ytcfg(item_id, webpage)
3954             headers = self._generate_api_headers(
3955                 ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
3956                 identity_token=self._extract_identity_token(webpage, item_id=item_id),
3957                 visitor_data=try_get(
3958                     self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
3959             query = {
3960                 'params': params or 'wgYCCAA=',
3961                 'browseId': browse_id or 'VL%s' % item_id
3962             }
3963             return self._extract_response(
3964                 item_id=item_id, headers=headers, query=query,
3965                 check_get_keys='contents', fatal=False,
3966                 note='Downloading API JSON with unavailable videos')
3967
3968     def _extract_webpage(self, url, item_id):
3969         retries = self.get_param('extractor_retries', 3)
3970         count = -1
3971         last_error = 'Incomplete yt initial data recieved'
3972         while count < retries:
3973             count += 1
3974             # Sometimes youtube returns a webpage with incomplete ytInitialData
3975             # See: https://github.com/yt-dlp/yt-dlp/issues/116
3976             if count:
3977                 self.report_warning('%s. Retrying ...' % last_error)
3978             webpage = self._download_webpage(
3979                 url, item_id,
3980                 'Downloading webpage%s' % (' (retry #%d)' % count if count else ''))
3981             data = self._extract_yt_initial_data(item_id, webpage)
3982             if data.get('contents') or data.get('currentVideoEndpoint'):
3983                 break
3984             # Extract alerts here only when there is error
3985             self._extract_and_report_alerts(data)
3986             if count >= retries:
3987                 raise ExtractorError(last_error)
3988         return webpage, data
3989
3990     @staticmethod
3991     def _smuggle_data(entries, data):
3992         for entry in entries:
3993             if data:
3994                 entry['url'] = smuggle_url(entry['url'], data)
3995             yield entry
3996
3997     def _real_extract(self, url):
3998         url, smuggled_data = unsmuggle_url(url, {})
3999         if self.is_music_url(url):
4000             smuggled_data['is_music_url'] = True
4001         info_dict = self.__real_extract(url, smuggled_data)
4002         if info_dict.get('entries'):
4003             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4004         return info_dict
4005
4006     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4007
4008     def __real_extract(self, url, smuggled_data):
4009         item_id = self._match_id(url)
4010         url = compat_urlparse.urlunparse(
4011             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4012         compat_opts = self.get_param('compat_opts', [])
4013
4014         def get_mobj(url):
4015             mobj = self._url_re.match(url).groupdict()
4016             mobj.update((k, '') for k, v in mobj.items() if v is None)
4017             return mobj
4018
4019         mobj = get_mobj(url)
4020         # Youtube returns incomplete data if tabname is not lower case
4021         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4022
4023         if is_channel:
4024             if smuggled_data.get('is_music_url'):
4025                 if item_id[:2] == 'VL':
4026                     # Youtube music VL channels have an equivalent playlist
4027                     item_id = item_id[2:]
4028                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4029                 elif item_id[:2] == 'MP':
4030                     # Youtube music albums (/channel/MP...) have a OLAK playlist that can be extracted from the webpage
4031                     item_id = self._search_regex(
4032                         r'\\x22audioPlaylistId\\x22:\\x22([0-9A-Za-z_-]+)\\x22',
4033                         self._download_webpage('https://music.youtube.com/channel/%s' % item_id, item_id),
4034                         'playlist id')
4035                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4036                 elif mobj['channel_type'] == 'browse':
4037                     # Youtube music /browse/ should be changed to /channel/
4038                     pre = 'https://www.youtube.com/channel/%s' % item_id
4039         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4040             # Home URLs should redirect to /videos/
4041             self.report_warning(
4042                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4043                 'To download only the videos in the home page, add a "/featured" to the URL')
4044             tab = '/videos'
4045
4046         url = ''.join((pre, tab, post))
4047         mobj = get_mobj(url)
4048
4049         # Handle both video/playlist URLs
4050         qs = parse_qs(url)
4051         video_id = qs.get('v', [None])[0]
4052         playlist_id = qs.get('list', [None])[0]
4053
4054         if not video_id and mobj['not_channel'].startswith('watch'):
4055             if not playlist_id:
4056                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4057                 raise ExtractorError('Unable to recognize tab page')
4058             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4059             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4060             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4061             mobj = get_mobj(url)
4062
4063         if video_id and playlist_id:
4064             if self.get_param('noplaylist'):
4065                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4066                 return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4067             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4068
4069         webpage, data = self._extract_webpage(url, item_id)
4070
4071         tabs = try_get(
4072             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4073         if tabs:
4074             selected_tab = self._extract_selected_tab(tabs)
4075             tab_name = selected_tab.get('title', '')
4076             if 'no-youtube-channel-redirect' not in compat_opts:
4077                 if mobj['tab'] == '/live':
4078                     # Live tab should have redirected to the video
4079                     raise ExtractorError('The channel is not currently live', expected=True)
4080                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4081                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4082                         # Topic channels don't have /videos. Use the equivalent playlist instead
4083                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4084                         pl_id = 'UU%s' % item_id[2:]
4085                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4086                         try:
4087                             pl_webpage, pl_data = self._extract_webpage(pl_url, pl_id)
4088                             for alert_type, alert_message in self._extract_alerts(pl_data):
4089                                 if alert_type == 'error':
4090                                     raise ExtractorError('Youtube said: %s' % alert_message)
4091                             item_id, url, webpage, data = pl_id, pl_url, pl_webpage, pl_data
4092                         except ExtractorError:
4093                             self.report_warning('The playlist gave error. Falling back to channel URL')
4094                     else:
4095                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4096
4097         self.write_debug('Final URL: %s' % url)
4098
4099         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4100         if 'no-youtube-unavailable-videos' not in compat_opts:
4101             data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
4102         self._extract_and_report_alerts(data)
4103
4104         tabs = try_get(
4105             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4106         if tabs:
4107             return self._extract_from_tabs(item_id, webpage, data, tabs)
4108
4109         playlist = try_get(
4110             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4111         if playlist:
4112             return self._extract_from_playlist(item_id, url, data, playlist, webpage)
4113
4114         video_id = try_get(
4115             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4116             compat_str) or video_id
4117         if video_id:
4118             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4119                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4120             return self.url_result(video_id, ie=YoutubeIE.ie_key(), video_id=video_id)
4121
4122         raise ExtractorError('Unable to recognize tab page')
4123
4124
4125 class YoutubePlaylistIE(InfoExtractor):
4126     IE_DESC = 'YouTube.com playlists'
4127     _VALID_URL = r'''(?x)(?:
4128                         (?:https?://)?
4129                         (?:\w+\.)?
4130                         (?:
4131                             (?:
4132                                 youtube(?:kids)?\.com|
4133                                 invidio\.us
4134                             )
4135                             /.*?\?.*?\blist=
4136                         )?
4137                         (?P<id>%(playlist_id)s)
4138                      )''' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4139     IE_NAME = 'youtube:playlist'
4140     _TESTS = [{
4141         'note': 'issue #673',
4142         'url': 'PLBB231211A4F62143',
4143         'info_dict': {
4144             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4145             'id': 'PLBB231211A4F62143',
4146             'uploader': 'Wickydoo',
4147             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4148         },
4149         'playlist_mincount': 29,
4150     }, {
4151         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4152         'info_dict': {
4153             'title': 'YDL_safe_search',
4154             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4155         },
4156         'playlist_count': 2,
4157         'skip': 'This playlist is private',
4158     }, {
4159         'note': 'embedded',
4160         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4161         'playlist_count': 4,
4162         'info_dict': {
4163             'title': 'JODA15',
4164             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4165             'uploader': 'milan',
4166             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4167         }
4168     }, {
4169         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4170         'playlist_mincount': 982,
4171         'info_dict': {
4172             'title': '2018 Chinese New Singles (11/6 updated)',
4173             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4174             'uploader': 'LBK',
4175             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4176         }
4177     }, {
4178         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4179         'only_matching': True,
4180     }, {
4181         # music album playlist
4182         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4183         'only_matching': True,
4184     }]
4185
4186     @classmethod
4187     def suitable(cls, url):
4188         if YoutubeTabIE.suitable(url):
4189             return False
4190         # Hack for lazy extractors until more generic solution is implemented
4191         # (see #28780)
4192         from .youtube import parse_qs
4193         qs = parse_qs(url)
4194         if qs.get('v', [None])[0]:
4195             return False
4196         return super(YoutubePlaylistIE, cls).suitable(url)
4197
4198     def _real_extract(self, url):
4199         playlist_id = self._match_id(url)
4200         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4201         url = update_url_query(
4202             'https://www.youtube.com/playlist',
4203             parse_qs(url) or {'list': playlist_id})
4204         if is_music_url:
4205             url = smuggle_url(url, {'is_music_url': True})
4206         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4207
4208
4209 class YoutubeYtBeIE(InfoExtractor):
4210     IE_DESC = 'youtu.be'
4211     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4212     _TESTS = [{
4213         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4214         'info_dict': {
4215             'id': 'yeWKywCrFtk',
4216             'ext': 'mp4',
4217             'title': 'Small Scale Baler and Braiding Rugs',
4218             'uploader': 'Backus-Page House Museum',
4219             'uploader_id': 'backuspagemuseum',
4220             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4221             'upload_date': '20161008',
4222             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4223             'categories': ['Nonprofits & Activism'],
4224             'tags': list,
4225             'like_count': int,
4226             'dislike_count': int,
4227         },
4228         'params': {
4229             'noplaylist': True,
4230             'skip_download': True,
4231         },
4232     }, {
4233         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4234         'only_matching': True,
4235     }]
4236
4237     def _real_extract(self, url):
4238         mobj = re.match(self._VALID_URL, url)
4239         video_id = mobj.group('id')
4240         playlist_id = mobj.group('playlist_id')
4241         return self.url_result(
4242             update_url_query('https://www.youtube.com/watch', {
4243                 'v': video_id,
4244                 'list': playlist_id,
4245                 'feature': 'youtu.be',
4246             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4247
4248
4249 class YoutubeYtUserIE(InfoExtractor):
4250     IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
4251     _VALID_URL = r'ytuser:(?P<id>.+)'
4252     _TESTS = [{
4253         'url': 'ytuser:phihag',
4254         'only_matching': True,
4255     }]
4256
4257     def _real_extract(self, url):
4258         user_id = self._match_id(url)
4259         return self.url_result(
4260             'https://www.youtube.com/user/%s' % user_id,
4261             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4262
4263
4264 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4265     IE_NAME = 'youtube:favorites'
4266     IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
4267     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4268     _LOGIN_REQUIRED = True
4269     _TESTS = [{
4270         'url': ':ytfav',
4271         'only_matching': True,
4272     }, {
4273         'url': ':ytfavorites',
4274         'only_matching': True,
4275     }]
4276
4277     def _real_extract(self, url):
4278         return self.url_result(
4279             'https://www.youtube.com/playlist?list=LL',
4280             ie=YoutubeTabIE.ie_key())
4281
4282
4283 class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
4284     IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
4285     # there doesn't appear to be a real limit, for example if you search for
4286     # 'python' you get more than 8.000.000 results
4287     _MAX_RESULTS = float('inf')
4288     IE_NAME = 'youtube:search'
4289     _SEARCH_KEY = 'ytsearch'
4290     _SEARCH_PARAMS = None
4291     _TESTS = []
4292
4293     def _entries(self, query, n):
4294         data = {'query': query}
4295         if self._SEARCH_PARAMS:
4296             data['params'] = self._SEARCH_PARAMS
4297         total = 0
4298         for page_num in itertools.count(1):
4299             search = self._extract_response(
4300                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
4301                 check_get_keys=('contents', 'onResponseReceivedCommands')
4302             )
4303             if not search:
4304                 break
4305             slr_contents = try_get(
4306                 search,
4307                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
4308                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
4309                 list)
4310             if not slr_contents:
4311                 break
4312
4313             # Youtube sometimes adds promoted content to searches,
4314             # changing the index location of videos and token.
4315             # So we search through all entries till we find them.
4316             continuation_token = None
4317             for slr_content in slr_contents:
4318                 if continuation_token is None:
4319                     continuation_token = try_get(
4320                         slr_content,
4321                         lambda x: x['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
4322                         compat_str)
4323
4324                 isr_contents = try_get(
4325                     slr_content,
4326                     lambda x: x['itemSectionRenderer']['contents'],
4327                     list)
4328                 if not isr_contents:
4329                     continue
4330                 for content in isr_contents:
4331                     if not isinstance(content, dict):
4332                         continue
4333                     video = content.get('videoRenderer')
4334                     if not isinstance(video, dict):
4335                         continue
4336                     video_id = video.get('videoId')
4337                     if not video_id:
4338                         continue
4339
4340                     yield self._extract_video(video)
4341                     total += 1
4342                     if total == n:
4343                         return
4344
4345             if not continuation_token:
4346                 break
4347             data['continuation'] = continuation_token
4348
4349     def _get_n_results(self, query, n):
4350         """Get a specified number of results for a query"""
4351         return self.playlist_result(self._entries(query, n), query)
4352
4353
4354 class YoutubeSearchDateIE(YoutubeSearchIE):
4355     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4356     _SEARCH_KEY = 'ytsearchdate'
4357     IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
4358     _SEARCH_PARAMS = 'CAI%3D'
4359
4360
4361 class YoutubeSearchURLIE(YoutubeSearchIE):
4362     IE_DESC = 'YouTube.com search URLs'
4363     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4364     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4365     # _MAX_RESULTS = 100
4366     _TESTS = [{
4367         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4368         'playlist_mincount': 5,
4369         'info_dict': {
4370             'title': 'youtube-dl test video',
4371         }
4372     }, {
4373         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4374         'only_matching': True,
4375     }]
4376
4377     @classmethod
4378     def _make_valid_url(cls):
4379         return cls._VALID_URL
4380
4381     def _real_extract(self, url):
4382         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4383         query = (qs.get('search_query') or qs.get('q'))[0]
4384         self._SEARCH_PARAMS = qs.get('sp', ('',))[0]
4385         return self._get_n_results(query, self._MAX_RESULTS)
4386
4387
4388 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4389     """
4390     Base class for feed extractors
4391     Subclasses must define the _FEED_NAME property.
4392     """
4393     _LOGIN_REQUIRED = True
4394     _TESTS = []
4395
4396     @property
4397     def IE_NAME(self):
4398         return 'youtube:%s' % self._FEED_NAME
4399
4400     def _real_extract(self, url):
4401         return self.url_result(
4402             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4403             ie=YoutubeTabIE.ie_key())
4404
4405
4406 class YoutubeWatchLaterIE(InfoExtractor):
4407     IE_NAME = 'youtube:watchlater'
4408     IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
4409     _VALID_URL = r':ytwatchlater'
4410     _TESTS = [{
4411         'url': ':ytwatchlater',
4412         'only_matching': True,
4413     }]
4414
4415     def _real_extract(self, url):
4416         return self.url_result(
4417             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4418
4419
4420 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4421     IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
4422     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4423     _FEED_NAME = 'recommended'
4424     _LOGIN_REQUIRED = False
4425     _TESTS = [{
4426         'url': ':ytrec',
4427         'only_matching': True,
4428     }, {
4429         'url': ':ytrecommended',
4430         'only_matching': True,
4431     }, {
4432         'url': 'https://youtube.com',
4433         'only_matching': True,
4434     }]
4435
4436
4437 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4438     IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
4439     _VALID_URL = r':ytsub(?:scription)?s?'
4440     _FEED_NAME = 'subscriptions'
4441     _TESTS = [{
4442         'url': ':ytsubs',
4443         'only_matching': True,
4444     }, {
4445         'url': ':ytsubscriptions',
4446         'only_matching': True,
4447     }]
4448
4449
4450 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4451     IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
4452     _VALID_URL = r':ythis(?:tory)?'
4453     _FEED_NAME = 'history'
4454     _TESTS = [{
4455         'url': ':ythistory',
4456         'only_matching': True,
4457     }]
4458
4459
4460 class YoutubeTruncatedURLIE(InfoExtractor):
4461     IE_NAME = 'youtube:truncated_url'
4462     IE_DESC = False  # Do not list
4463     _VALID_URL = r'''(?x)
4464         (?:https?://)?
4465         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4466         (?:watch\?(?:
4467             feature=[a-z_]+|
4468             annotation_id=annotation_[^&]+|
4469             x-yt-cl=[0-9]+|
4470             hl=[^&]*|
4471             t=[0-9]+
4472         )?
4473         |
4474             attribution_link\?a=[^&]+
4475         )
4476         $
4477     '''
4478
4479     _TESTS = [{
4480         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4481         'only_matching': True,
4482     }, {
4483         'url': 'https://www.youtube.com/watch?',
4484         'only_matching': True,
4485     }, {
4486         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4487         'only_matching': True,
4488     }, {
4489         'url': 'https://www.youtube.com/watch?feature=foo',
4490         'only_matching': True,
4491     }, {
4492         'url': 'https://www.youtube.com/watch?hl=en-GB',
4493         'only_matching': True,
4494     }, {
4495         'url': 'https://www.youtube.com/watch?t=2372',
4496         'only_matching': True,
4497     }]
4498
4499     def _real_extract(self, url):
4500         raise ExtractorError(
4501             'Did you forget to quote the URL? Remember that & is a meta '
4502             'character in most shells, so you want to put the URL in quotes, '
4503             'like  youtube-dl '
4504             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4505             ' or simply  youtube-dl BaW_jenozKc  .',
4506             expected=True)
4507
4508
4509 class YoutubeTruncatedIDIE(InfoExtractor):
4510     IE_NAME = 'youtube:truncated_id'
4511     IE_DESC = False  # Do not list
4512     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4513
4514     _TESTS = [{
4515         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4516         'only_matching': True,
4517     }]
4518
4519     def _real_extract(self, url):
4520         video_id = self._match_id(url)
4521         raise ExtractorError(
4522             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4523             expected=True)