yt_dlp/extractor/youtube.py

   1 # coding: utf-8
   2
   3 from __future__ import unicode_literals
   4
   5 import base64
   6 import calendar
   7 import copy
   8 import datetime
   9 import hashlib
  10 import itertools
  11 import json
  12 import math
  13 import os.path
  14 import random
  15 import re
  16 import time
  17 import traceback
  18
  19 from .common import InfoExtractor, SearchInfoExtractor
  20 from ..compat import (
  21     compat_chr,
  22     compat_HTTPError,
  23     compat_parse_qs,
  24     compat_str,
  25     compat_urllib_parse_unquote_plus,
  26     compat_urllib_parse_urlencode,
  27     compat_urllib_parse_urlparse,
  28     compat_urlparse,
  29 )
  30 from ..jsinterp import JSInterpreter
  31 from ..utils import (
  32     bug_reports_message,
  33     bytes_to_intlist,
  34     clean_html,
  35     datetime_from_str,
  36     dict_get,
  37     error_to_compat_str,
  38     ExtractorError,
  39     float_or_none,
  40     format_field,
  41     int_or_none,
  42     intlist_to_bytes,
  43     is_html,
  44     join_nonempty,
  45     mimetype2ext,
  46     network_exceptions,
  47     NO_DEFAULT,
  48     orderedSet,
  49     parse_codecs,
  50     parse_count,
  51     parse_duration,
  52     parse_iso8601,
  53     parse_qs,
  54     qualities,
  55     remove_end,
  56     remove_start,
  57     smuggle_url,
  58     str_or_none,
  59     str_to_int,
  60     traverse_obj,
  61     try_get,
  62     unescapeHTML,
  63     unified_strdate,
  64     unsmuggle_url,
  65     update_url_query,
  66     url_or_none,
  67     urljoin,
  68     variadic,
  69 )
  70
  71
  72 def get_first(obj, keys, **kwargs):
  73     return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False)
  74
  75
  76 # any clients starting with _ cannot be explicity requested by the user
  77 INNERTUBE_CLIENTS = {
  78     'web': {
  79         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  80         'INNERTUBE_CONTEXT': {
  81             'client': {
  82                 'clientName': 'WEB',
  83                 'clientVersion': '2.20210622.10.00',
  84             }
  85         },
  86         'INNERTUBE_CONTEXT_CLIENT_NAME': 1
  87     },
  88     'web_embedded': {
  89         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
  90         'INNERTUBE_CONTEXT': {
  91             'client': {
  92                 'clientName': 'WEB_EMBEDDED_PLAYER',
  93                 'clientVersion': '1.20210620.0.1',
  94             },
  95         },
  96         'INNERTUBE_CONTEXT_CLIENT_NAME': 56
  97     },
  98     'web_music': {
  99         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 100         'INNERTUBE_HOST': 'music.youtube.com',
 101         'INNERTUBE_CONTEXT': {
 102             'client': {
 103                 'clientName': 'WEB_REMIX',
 104                 'clientVersion': '1.20210621.00.00',
 105             }
 106         },
 107         'INNERTUBE_CONTEXT_CLIENT_NAME': 67,
 108     },
 109     'web_creator': {
 110         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 111         'INNERTUBE_CONTEXT': {
 112             'client': {
 113                 'clientName': 'WEB_CREATOR',
 114                 'clientVersion': '1.20210621.00.00',
 115             }
 116         },
 117         'INNERTUBE_CONTEXT_CLIENT_NAME': 62,
 118     },
 119     'android': {
 120         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 121         'INNERTUBE_CONTEXT': {
 122             'client': {
 123                 'clientName': 'ANDROID',
 124                 'clientVersion': '16.20',
 125             }
 126         },
 127         'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
 128         'REQUIRE_JS_PLAYER': False
 129     },
 130     'android_embedded': {
 131         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 132         'INNERTUBE_CONTEXT': {
 133             'client': {
 134                 'clientName': 'ANDROID_EMBEDDED_PLAYER',
 135                 'clientVersion': '16.20',
 136             },
 137         },
 138         'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
 139         'REQUIRE_JS_PLAYER': False
 140     },
 141     'android_music': {
 142         'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30',
 143         'INNERTUBE_HOST': 'music.youtube.com',
 144         'INNERTUBE_CONTEXT': {
 145             'client': {
 146                 'clientName': 'ANDROID_MUSIC',
 147                 'clientVersion': '4.32',
 148             }
 149         },
 150         'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
 151         'REQUIRE_JS_PLAYER': False
 152     },
 153     'android_creator': {
 154         'INNERTUBE_CONTEXT': {
 155             'client': {
 156                 'clientName': 'ANDROID_CREATOR',
 157                 'clientVersion': '21.24.100',
 158             },
 159         },
 160         'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
 161         'REQUIRE_JS_PLAYER': False
 162     },
 163     # ios has HLS live streams
 164     # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680
 165     'ios': {
 166         'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
 167         'INNERTUBE_CONTEXT': {
 168             'client': {
 169                 'clientName': 'IOS',
 170                 'clientVersion': '16.20',
 171             }
 172         },
 173         'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
 174         'REQUIRE_JS_PLAYER': False
 175     },
 176     'ios_embedded': {
 177         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 178         'INNERTUBE_CONTEXT': {
 179             'client': {
 180                 'clientName': 'IOS_MESSAGES_EXTENSION',
 181                 'clientVersion': '16.20',
 182             },
 183         },
 184         'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
 185         'REQUIRE_JS_PLAYER': False
 186     },
 187     'ios_music': {
 188         'INNERTUBE_API_KEY': 'AIzaSyDK3iBpDP9nHVTk2qL73FLJICfOC3c51Og',
 189         'INNERTUBE_HOST': 'music.youtube.com',
 190         'INNERTUBE_CONTEXT': {
 191             'client': {
 192                 'clientName': 'IOS_MUSIC',
 193                 'clientVersion': '4.32',
 194             },
 195         },
 196         'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
 197         'REQUIRE_JS_PLAYER': False
 198     },
 199     'ios_creator': {
 200         'INNERTUBE_CONTEXT': {
 201             'client': {
 202                 'clientName': 'IOS_CREATOR',
 203                 'clientVersion': '21.24.100',
 204             },
 205         },
 206         'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
 207         'REQUIRE_JS_PLAYER': False
 208     },
 209     # mweb has 'ultralow' formats
 210     # See: https://github.com/yt-dlp/yt-dlp/pull/557
 211     'mweb': {
 212         'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8',
 213         'INNERTUBE_CONTEXT': {
 214             'client': {
 215                 'clientName': 'MWEB',
 216                 'clientVersion': '2.20210721.07.00',
 217             }
 218         },
 219         'INNERTUBE_CONTEXT_CLIENT_NAME': 2
 220     },
 221 }
 222
 223
 224 def build_innertube_clients():
 225     third_party = {
 226         'embedUrl': 'https://google.com',  # Can be any valid URL
 227     }
 228     base_clients = ('android', 'web', 'ios', 'mweb')
 229     priority = qualities(base_clients[::-1])
 230
 231     for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
 232         ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
 233         ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
 234         ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
 235         ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
 236         ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
 237
 238         if client in base_clients:
 239             INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
 240             agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
 241             agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 242             agegate_ytcfg['priority'] -= 1
 243         elif client.endswith('_embedded'):
 244             ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
 245             ytcfg['priority'] -= 2
 246         else:
 247             ytcfg['priority'] -= 3
 248
 249
 250 build_innertube_clients()
 251
 252
 253 class YoutubeBaseInfoExtractor(InfoExtractor):
 254     """Provide base functions for Youtube extractors"""
 255
 256     _RESERVED_NAMES = (
 257         r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
 258         r'shorts|movies|results|shared|hashtag|trending|feed|feeds|'
 259         r'browse|oembed|get_video_info|iframe_api|s/player|'
 260         r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
 261
 262     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
 263
 264     _NETRC_MACHINE = 'youtube'
 265
 266     # If True it will raise an error if no login info is provided
 267     _LOGIN_REQUIRED = False
 268
 269     _INVIDIOUS_SITES = (
 270         # invidious-redirect websites
 271         r'(?:www\.)?redirect\.invidious\.io',
 272         r'(?:(?:www|dev)\.)?invidio\.us',
 273         # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
 274         r'(?:www\.)?invidious\.pussthecat\.org',
 275         r'(?:www\.)?invidious\.zee\.li',
 276         r'(?:www\.)?invidious\.ethibox\.fr',
 277         r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
 278         # youtube-dl invidious instances list
 279         r'(?:(?:www|no)\.)?invidiou\.sh',
 280         r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
 281         r'(?:www\.)?invidious\.kabi\.tk',
 282         r'(?:www\.)?invidious\.mastodon\.host',
 283         r'(?:www\.)?invidious\.zapashcanon\.fr',
 284         r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
 285         r'(?:www\.)?invidious\.tinfoil-hat\.net',
 286         r'(?:www\.)?invidious\.himiko\.cloud',
 287         r'(?:www\.)?invidious\.reallyancient\.tech',
 288         r'(?:www\.)?invidious\.tube',
 289         r'(?:www\.)?invidiou\.site',
 290         r'(?:www\.)?invidious\.site',
 291         r'(?:www\.)?invidious\.xyz',
 292         r'(?:www\.)?invidious\.nixnet\.xyz',
 293         r'(?:www\.)?invidious\.048596\.xyz',
 294         r'(?:www\.)?invidious\.drycat\.fr',
 295         r'(?:www\.)?inv\.skyn3t\.in',
 296         r'(?:www\.)?tube\.poal\.co',
 297         r'(?:www\.)?tube\.connect\.cafe',
 298         r'(?:www\.)?vid\.wxzm\.sx',
 299         r'(?:www\.)?vid\.mint\.lgbt',
 300         r'(?:www\.)?vid\.puffyan\.us',
 301         r'(?:www\.)?yewtu\.be',
 302         r'(?:www\.)?yt\.elukerio\.org',
 303         r'(?:www\.)?yt\.lelux\.fi',
 304         r'(?:www\.)?invidious\.ggc-project\.de',
 305         r'(?:www\.)?yt\.maisputain\.ovh',
 306         r'(?:www\.)?ytprivate\.com',
 307         r'(?:www\.)?invidious\.13ad\.de',
 308         r'(?:www\.)?invidious\.toot\.koeln',
 309         r'(?:www\.)?invidious\.fdn\.fr',
 310         r'(?:www\.)?watch\.nettohikari\.com',
 311         r'(?:www\.)?invidious\.namazso\.eu',
 312         r'(?:www\.)?invidious\.silkky\.cloud',
 313         r'(?:www\.)?invidious\.exonip\.de',
 314         r'(?:www\.)?invidious\.riverside\.rocks',
 315         r'(?:www\.)?invidious\.blamefran\.net',
 316         r'(?:www\.)?invidious\.moomoo\.de',
 317         r'(?:www\.)?ytb\.trom\.tf',
 318         r'(?:www\.)?yt\.cyberhost\.uk',
 319         r'(?:www\.)?kgg2m7yk5aybusll\.onion',
 320         r'(?:www\.)?qklhadlycap4cnod\.onion',
 321         r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
 322         r'(?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion',
 323         r'(?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion',
 324         r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
 325         r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
 326         r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
 327         r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
 328         r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
 329         r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
 330         r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
 331     )
 332
 333     def _login(self):
 334         """
 335         Attempt to log in to YouTube.
 336         If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
 337         """
 338
 339         if (self._LOGIN_REQUIRED
 340                 and self.get_param('cookiefile') is None
 341                 and self.get_param('cookiesfrombrowser') is None):
 342             self.raise_login_required(
 343                 'Login details are needed to download this content', method='cookies')
 344         username, password = self._get_login_info()
 345         if username:
 346             self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}')
 347
 348     def _initialize_consent(self):
 349         cookies = self._get_cookies('https://www.youtube.com/')
 350         if cookies.get('__Secure-3PSID'):
 351             return
 352         consent_id = None
 353         consent = cookies.get('CONSENT')
 354         if consent:
 355             if 'YES' in consent.value:
 356                 return
 357             consent_id = self._search_regex(
 358                 r'PENDING\+(\d+)', consent.value, 'consent', default=None)
 359         if not consent_id:
 360             consent_id = random.randint(100, 999)
 361         self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
 362
 363     def _real_initialize(self):
 364         self._initialize_consent()
 365         self._login()
 366
 367     _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
 368     _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
 369     _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
 370
 371     def _get_default_ytcfg(self, client='web'):
 372         return copy.deepcopy(INNERTUBE_CLIENTS[client])
 373
 374     def _get_innertube_host(self, client='web'):
 375         return INNERTUBE_CLIENTS[client]['INNERTUBE_HOST']
 376
 377     def _ytcfg_get_safe(self, ytcfg, getter, expected_type=None, default_client='web'):
 378         # try_get but with fallback to default ytcfg client values when present
 379         _func = lambda y: try_get(y, getter, expected_type)
 380         return _func(ytcfg) or _func(self._get_default_ytcfg(default_client))
 381
 382     def _extract_client_name(self, ytcfg, default_client='web'):
 383         return self._ytcfg_get_safe(
 384             ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'],
 385                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client)
 386
 387     def _extract_client_version(self, ytcfg, default_client='web'):
 388         return self._ytcfg_get_safe(
 389             ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'],
 390                     lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client)
 391
 392     def _extract_api_key(self, ytcfg=None, default_client='web'):
 393         return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client)
 394
 395     def _extract_context(self, ytcfg=None, default_client='web'):
 396         _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict)
 397         context = _get_context(ytcfg)
 398         if context:
 399             return context
 400
 401         context = _get_context(self._get_default_ytcfg(default_client))
 402         if not ytcfg:
 403             return context
 404
 405         # Recreate the client context (required)
 406         context['client'].update({
 407             'clientVersion': self._extract_client_version(ytcfg, default_client),
 408             'clientName': self._extract_client_name(ytcfg, default_client),
 409         })
 410         visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str)
 411         if visitor_data:
 412             context['client']['visitorData'] = visitor_data
 413         return context
 414
 415     _SAPISID = None
 416
 417     def _generate_sapisidhash_header(self, origin='https://www.youtube.com'):
 418         time_now = round(time.time())
 419         if self._SAPISID is None:
 420             yt_cookies = self._get_cookies('https://www.youtube.com')
 421             # Sometimes SAPISID cookie isn't present but __Secure-3PAPISID is.
 422             # See: https://github.com/yt-dlp/yt-dlp/issues/393
 423             sapisid_cookie = dict_get(
 424                 yt_cookies, ('__Secure-3PAPISID', 'SAPISID'))
 425             if sapisid_cookie and sapisid_cookie.value:
 426                 self._SAPISID = sapisid_cookie.value
 427                 self.write_debug('Extracted SAPISID cookie')
 428                 # SAPISID cookie is required if not already present
 429                 if not yt_cookies.get('SAPISID'):
 430                     self.write_debug('Copying __Secure-3PAPISID cookie to SAPISID cookie')
 431                     self._set_cookie(
 432                         '.youtube.com', 'SAPISID', self._SAPISID, secure=True, expire_time=time_now + 3600)
 433             else:
 434                 self._SAPISID = False
 435         if not self._SAPISID:
 436             return None
 437         # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323
 438         sapisidhash = hashlib.sha1(
 439             f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest()
 440         return f'SAPISIDHASH {time_now}_{sapisidhash}'
 441
 442     def _call_api(self, ep, query, video_id, fatal=True, headers=None,
 443                   note='Downloading API JSON', errnote='Unable to download API page',
 444                   context=None, api_key=None, api_hostname=None, default_client='web'):
 445
 446         data = {'context': context} if context else {'context': self._extract_context(default_client=default_client)}
 447         data.update(query)
 448         real_headers = self.generate_api_headers(default_client=default_client)
 449         real_headers.update({'content-type': 'application/json'})
 450         if headers:
 451             real_headers.update(headers)
 452         return self._download_json(
 453             'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep),
 454             video_id=video_id, fatal=fatal, note=note, errnote=errnote,
 455             data=json.dumps(data).encode('utf8'), headers=real_headers,
 456             query={'key': api_key or self._extract_api_key()})
 457
 458     def extract_yt_initial_data(self, item_id, webpage, fatal=True):
 459         data = self._search_regex(
 460             (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
 461              self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal)
 462         if data:
 463             return self._parse_json(data, item_id, fatal=fatal)
 464
 465     @staticmethod
 466     def _extract_session_index(*data):
 467         """
 468         Index of current account in account list.
 469         See: https://github.com/yt-dlp/yt-dlp/pull/519
 470         """
 471         for ytcfg in data:
 472             session_index = int_or_none(try_get(ytcfg, lambda x: x['SESSION_INDEX']))
 473             if session_index is not None:
 474                 return session_index
 475
 476     # Deprecated?
 477     def _extract_identity_token(self, ytcfg=None, webpage=None):
 478         if ytcfg:
 479             token = try_get(ytcfg, lambda x: x['ID_TOKEN'], compat_str)
 480             if token:
 481                 return token
 482         if webpage:
 483             return self._search_regex(
 484                 r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
 485                 'identity token', default=None, fatal=False)
 486
 487     @staticmethod
 488     def _extract_account_syncid(*args):
 489         """
 490         Extract syncId required to download private playlists of secondary channels
 491         @params response and/or ytcfg
 492         """
 493         for data in args:
 494             # ytcfg includes channel_syncid if on secondary channel
 495             delegated_sid = try_get(data, lambda x: x['DELEGATED_SESSION_ID'], compat_str)
 496             if delegated_sid:
 497                 return delegated_sid
 498             sync_ids = (try_get(
 499                 data, (lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'],
 500                        lambda x: x['DATASYNC_ID']), compat_str) or '').split('||')
 501             if len(sync_ids) >= 2 and sync_ids[1]:
 502                 # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
 503                 # and just "user_syncid||" for primary channel. We only want the channel_syncid
 504                 return sync_ids[0]
 505
 506     @staticmethod
 507     def _extract_visitor_data(*args):
 508         """
 509         Extracts visitorData from an API response or ytcfg
 510         Appears to be used to track session state
 511         """
 512         return get_first(
 513             args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))),
 514             expected_type=str)
 515
 516     @property
 517     def is_authenticated(self):
 518         return bool(self._generate_sapisidhash_header())
 519
 520     def extract_ytcfg(self, video_id, webpage):
 521         if not webpage:
 522             return {}
 523         return self._parse_json(
 524             self._search_regex(
 525                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
 526                 default='{}'), video_id, fatal=False) or {}
 527
 528     def generate_api_headers(
 529             self, *, ytcfg=None, account_syncid=None, session_index=None,
 530             visitor_data=None, identity_token=None, api_hostname=None, default_client='web'):
 531
 532         origin = 'https://' + (api_hostname if api_hostname else self._get_innertube_host(default_client))
 533         headers = {
 534             'X-YouTube-Client-Name': compat_str(
 535                 self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT_CLIENT_NAME'], default_client=default_client)),
 536             'X-YouTube-Client-Version': self._extract_client_version(ytcfg, default_client),
 537             'Origin': origin,
 538             'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
 539             'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
 540             'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
 541         }
 542         if session_index is None:
 543             session_index = self._extract_session_index(ytcfg)
 544         if account_syncid or session_index is not None:
 545             headers['X-Goog-AuthUser'] = session_index if session_index is not None else 0
 546
 547         auth = self._generate_sapisidhash_header(origin)
 548         if auth is not None:
 549             headers['Authorization'] = auth
 550             headers['X-Origin'] = origin
 551         return {h: v for h, v in headers.items() if v is not None}
 552
 553     @staticmethod
 554     def _build_api_continuation_query(continuation, ctp=None):
 555         query = {
 556             'continuation': continuation
 557         }
 558         # TODO: Inconsistency with clickTrackingParams.
 559         # Currently we have a fixed ctp contained within context (from ytcfg)
 560         # and a ctp in root query for continuation.
 561         if ctp:
 562             query['clickTracking'] = {'clickTrackingParams': ctp}
 563         return query
 564
 565     @classmethod
 566     def _extract_next_continuation_data(cls, renderer):
 567         next_continuation = try_get(
 568             renderer, (lambda x: x['continuations'][0]['nextContinuationData'],
 569                        lambda x: x['continuation']['reloadContinuationData']), dict)
 570         if not next_continuation:
 571             return
 572         continuation = next_continuation.get('continuation')
 573         if not continuation:
 574             return
 575         ctp = next_continuation.get('clickTrackingParams')
 576         return cls._build_api_continuation_query(continuation, ctp)
 577
 578     @classmethod
 579     def _extract_continuation_ep_data(cls, continuation_ep: dict):
 580         if isinstance(continuation_ep, dict):
 581             continuation = try_get(
 582                 continuation_ep, lambda x: x['continuationCommand']['token'], compat_str)
 583             if not continuation:
 584                 return
 585             ctp = continuation_ep.get('clickTrackingParams')
 586             return cls._build_api_continuation_query(continuation, ctp)
 587
 588     @classmethod
 589     def _extract_continuation(cls, renderer):
 590         next_continuation = cls._extract_next_continuation_data(renderer)
 591         if next_continuation:
 592             return next_continuation
 593
 594         contents = []
 595         for key in ('contents', 'items'):
 596             contents.extend(try_get(renderer, lambda x: x[key], list) or [])
 597
 598         for content in contents:
 599             if not isinstance(content, dict):
 600                 continue
 601             continuation_ep = try_get(
 602                 content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
 603                           lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
 604                 dict)
 605             continuation = cls._extract_continuation_ep_data(continuation_ep)
 606             if continuation:
 607                 return continuation
 608
 609     @classmethod
 610     def _extract_alerts(cls, data):
 611         for alert_dict in try_get(data, lambda x: x['alerts'], list) or []:
 612             if not isinstance(alert_dict, dict):
 613                 continue
 614             for alert in alert_dict.values():
 615                 alert_type = alert.get('type')
 616                 if not alert_type:
 617                     continue
 618                 message = cls._get_text(alert, 'text')
 619                 if message:
 620                     yield alert_type, message
 621
 622     def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
 623         errors = []
 624         warnings = []
 625         for alert_type, alert_message in alerts:
 626             if alert_type.lower() == 'error' and fatal:
 627                 errors.append([alert_type, alert_message])
 628             else:
 629                 warnings.append([alert_type, alert_message])
 630
 631         for alert_type, alert_message in (warnings + errors[:-1]):
 632             self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once)
 633         if errors:
 634             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 635
 636     def _extract_and_report_alerts(self, data, *args, **kwargs):
 637         return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
 638
 639     def _extract_badges(self, renderer: dict):
 640         badges = set()
 641         for badge in try_get(renderer, lambda x: x['badges'], list) or []:
 642             label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], compat_str)
 643             if label:
 644                 badges.add(label.lower())
 645         return badges
 646
 647     @staticmethod
 648     def _get_text(data, *path_list, max_runs=None):
 649         for path in path_list or [None]:
 650             if path is None:
 651                 obj = [data]
 652             else:
 653                 obj = traverse_obj(data, path, default=[])
 654                 if not any(key is ... or isinstance(key, (list, tuple)) for key in variadic(path)):
 655                     obj = [obj]
 656             for item in obj:
 657                 text = try_get(item, lambda x: x['simpleText'], compat_str)
 658                 if text:
 659                     return text
 660                 runs = try_get(item, lambda x: x['runs'], list) or []
 661                 if not runs and isinstance(item, list):
 662                     runs = item
 663
 664                 runs = runs[:min(len(runs), max_runs or len(runs))]
 665                 text = ''.join(traverse_obj(runs, (..., 'text'), expected_type=str, default=[]))
 666                 if text:
 667                     return text
 668
 669     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
 670                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
 671                           default_client='web'):
 672         response = None
 673         last_error = None
 674         count = -1
 675         retries = self.get_param('extractor_retries', 3)
 676         if check_get_keys is None:
 677             check_get_keys = []
 678         while count < retries:
 679             count += 1
 680             if last_error:
 681                 self.report_warning('%s. Retrying ...' % remove_end(last_error, '.'))
 682             try:
 683                 response = self._call_api(
 684                     ep=ep, fatal=True, headers=headers,
 685                     video_id=item_id, query=query,
 686                     context=self._extract_context(ytcfg, default_client),
 687                     api_key=self._extract_api_key(ytcfg, default_client),
 688                     api_hostname=api_hostname, default_client=default_client,
 689                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
 690             except ExtractorError as e:
 691                 if isinstance(e.cause, network_exceptions):
 692                     if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)):
 693                         e.cause.seek(0)
 694                         yt_error = try_get(
 695                             self._parse_json(e.cause.read().decode(), item_id, fatal=False),
 696                             lambda x: x['error']['message'], compat_str)
 697                         if yt_error:
 698                             self._report_alerts([('ERROR', yt_error)], fatal=False)
 699                     # Downloading page may result in intermittent 5xx HTTP error
 700                     # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
 701                     # We also want to catch all other network exceptions since errors in later pages can be troublesome
 702                     # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
 703                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
 704                         last_error = error_to_compat_str(e.cause or e.msg)
 705                         if count < retries:
 706                             continue
 707                 if fatal:
 708                     raise
 709                 else:
 710                     self.report_warning(error_to_compat_str(e))
 711                     return
 712
 713             else:
 714                 try:
 715                     self._extract_and_report_alerts(response, only_once=True)
 716                 except ExtractorError as e:
 717                     # YouTube servers may return errors we want to retry on in a 200 OK response
 718                     # See: https://github.com/yt-dlp/yt-dlp/issues/839
 719                     if 'unknown error' in e.msg.lower():
 720                         last_error = e.msg
 721                         continue
 722                     if fatal:
 723                         raise
 724                     self.report_warning(error_to_compat_str(e))
 725                     return
 726                 if not check_get_keys or dict_get(response, check_get_keys):
 727                     break
 728                 # Youtube sometimes sends incomplete data
 729                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
 730                 last_error = 'Incomplete data received'
 731                 if count >= retries:
 732                     if fatal:
 733                         raise ExtractorError(last_error)
 734                     else:
 735                         self.report_warning(last_error)
 736                         return
 737         return response
 738
 739     @staticmethod
 740     def is_music_url(url):
 741         return re.match(r'https?://music\.youtube\.com/', url) is not None
 742
 743     def _extract_video(self, renderer):
 744         video_id = renderer.get('videoId')
 745         title = self._get_text(renderer, 'title')
 746         description = self._get_text(renderer, 'descriptionSnippet')
 747         duration = parse_duration(self._get_text(
 748             renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
 749         view_count_text = self._get_text(renderer, 'viewCountText') or ''
 750         view_count = str_to_int(self._search_regex(
 751             r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
 752             'view count', default=None))
 753
 754         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
 755
 756         return {
 757             '_type': 'url',
 758             'ie_key': YoutubeIE.ie_key(),
 759             'id': video_id,
 760             'url': f'https://www.youtube.com/watch?v={video_id}',
 761             'title': title,
 762             'description': description,
 763             'duration': duration,
 764             'view_count': view_count,
 765             'uploader': uploader,
 766         }
 767
 768
 769 class YoutubeIE(YoutubeBaseInfoExtractor):
 770     IE_DESC = 'YouTube'
 771     _VALID_URL = r"""(?x)^
 772                      (
 773                          (?:https?://|//)                                    # http(s):// or protocol-independent URL
 774                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com|
 775                             (?:www\.)?deturl\.com/www\.youtube\.com|
 776                             (?:www\.)?pwnyoutube\.com|
 777                             (?:www\.)?hooktube\.com|
 778                             (?:www\.)?yourepeat\.com|
 779                             tube\.majestyc\.net|
 780                             %(invidious)s|
 781                             youtube\.googleapis\.com)/                        # the various hostnames, with wildcard subdomains
 782                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
 783                          (?:                                                  # the various things that can precede the ID:
 784                              (?:(?:v|embed|e|shorts)/(?!videoseries))         # v/ or embed/ or e/ or shorts/
 785                              |(?:                                             # or the v= param in all its forms
 786                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
 787                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #!
 788                                  (?:.*?[&;])??                                # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
 789                                  v=
 790                              )
 791                          ))
 792                          |(?:
 793                             youtu\.be|                                        # just youtu.be/xxxx
 794                             vid\.plus|                                        # or vid.plus/xxxx
 795                             zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
 796                             %(invidious)s
 797                          )/
 798                          |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
 799                          )
 800                      )?                                                       # all until now is optional -> you can pass the naked ID
 801                      (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID
 802                      (?(1).+)?                                                # if we found the ID, everything can follow
 803                      (?:\#|$)""" % {
 804         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
 805     }
 806     _PLAYER_INFO_RE = (
 807         r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
 808         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
 809         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$',
 810     )
 811     _formats = {
 812         '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 813         '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 814         '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
 815         '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
 816         '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
 817         '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 818         '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 819         '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 820         # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
 821         '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
 822         '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 823         '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
 824         '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 825         '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
 826         '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 827         '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
 828         '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 829         '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
 830
 831
 832         # 3D videos
 833         '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 834         '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
 835         '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 836         '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
 837         '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
 838         '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 839         '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
 840
 841         # Apple HTTP Live Streaming
 842         '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 843         '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 844         '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 845         '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
 846         '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 847         '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
 848         '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
 849         '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
 850
 851         # DASH mp4 video
 852         '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
 853         '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
 854         '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 855         '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
 856         '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
 857         '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'},  # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
 858         '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
 859         '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
 860         '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
 861         '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 862         '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
 863         '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
 864
 865         # Dash mp4 audio
 866         '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
 867         '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
 868         '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
 869         '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 870         '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
 871         '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
 872         '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
 873
 874         # Dash webm
 875         '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 876         '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 877         '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 878         '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 879         '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 880         '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
 881         '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
 882         '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 883         '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 884         '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 885         '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 886         '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 887         '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 888         '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 889         '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 890         # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
 891         '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 892         '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 893         '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 894         '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 895         '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
 896         '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
 897
 898         # Dash webm audio
 899         '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
 900         '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
 901
 902         # Dash webm audio with opus inside
 903         '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
 904         '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
 905         '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
 906
 907         # RTMP (unnamed)
 908         '_rtmp': {'protocol': 'rtmp'},
 909
 910         # av01 video only formats sometimes served with "unknown" codecs
 911         '394': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
 912         '395': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'av01.0.00M.08'},
 913         '396': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'av01.0.01M.08'},
 914         '397': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'av01.0.04M.08'},
 915         '398': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'av01.0.05M.08'},
 916         '399': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'av01.0.08M.08'},
 917         '400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
 918         '401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
 919     }
 920     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
 921
 922     _GEO_BYPASS = False
 923
 924     IE_NAME = 'youtube'
 925     _TESTS = [
 926         {
 927             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
 928             'info_dict': {
 929                 'id': 'BaW_jenozKc',
 930                 'ext': 'mp4',
 931                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 932                 'uploader': 'Philipp Hagemeister',
 933                 'uploader_id': 'phihag',
 934                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 935                 'channel': 'Philipp Hagemeister',
 936                 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
 937                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
 938                 'upload_date': '20121002',
 939                 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22',
 940                 'categories': ['Science & Technology'],
 941                 'tags': ['youtube-dl'],
 942                 'duration': 10,
 943                 'view_count': int,
 944                 'like_count': int,
 945                 # 'dislike_count': int,
 946                 'availability': 'public',
 947                 'playable_in_embed': True,
 948                 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg',
 949                 'live_status': 'not_live',
 950                 'age_limit': 0,
 951                 'start_time': 1,
 952                 'end_time': 9,
 953             }
 954         },
 955         {
 956             'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
 957             'note': 'Embed-only video (#1746)',
 958             'info_dict': {
 959                 'id': 'yZIXLfi8CZQ',
 960                 'ext': 'mp4',
 961                 'upload_date': '20120608',
 962                 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
 963                 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
 964                 'uploader': 'SET India',
 965                 'uploader_id': 'setindia',
 966                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
 967                 'age_limit': 18,
 968             },
 969             'skip': 'Private video',
 970         },
 971         {
 972             'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=yZIXLfi8CZQ',
 973             'note': 'Use the first video ID in the URL',
 974             'info_dict': {
 975                 'id': 'BaW_jenozKc',
 976                 'ext': 'mp4',
 977                 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
 978                 'uploader': 'Philipp Hagemeister',
 979                 'uploader_id': 'phihag',
 980                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
 981                 'upload_date': '20121002',
 982                 'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
 983                 'categories': ['Science & Technology'],
 984                 'tags': ['youtube-dl'],
 985                 'duration': 10,
 986                 'view_count': int,
 987                 'like_count': int,
 988                 'dislike_count': int,
 989             },
 990             'params': {
 991                 'skip_download': True,
 992             },
 993         },
 994         {
 995             'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
 996             'note': '256k DASH audio (format 141) via DASH manifest',
 997             'info_dict': {
 998                 'id': 'a9LDPn-MO4I',
 999                 'ext': 'm4a',
1000                 'upload_date': '20121002',
1001                 'uploader_id': '8KVIDEO',
1002                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
1003                 'description': '',
1004                 'uploader': '8KVIDEO',
1005                 'title': 'UHDTV TEST 8K VIDEO.mp4'
1006             },
1007             'params': {
1008                 'youtube_include_dash_manifest': True,
1009                 'format': '141',
1010             },
1011             'skip': 'format 141 not served anymore',
1012         },
1013         # DASH manifest with encrypted signature
1014         {
1015             'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
1016             'info_dict': {
1017                 'id': 'IB3lcPjvWLA',
1018                 'ext': 'm4a',
1019                 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
1020                 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
1021                 'duration': 244,
1022                 'uploader': 'AfrojackVEVO',
1023                 'uploader_id': 'AfrojackVEVO',
1024                 'upload_date': '20131011',
1025                 'abr': 129.495,
1026             },
1027             'params': {
1028                 'youtube_include_dash_manifest': True,
1029                 'format': '141/bestaudio[ext=m4a]',
1030             },
1031         },
1032         # Age-gate videos. See https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-888837000
1033         {
1034             'note': 'Embed allowed age-gate video',
1035             'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
1036             'info_dict': {
1037                 'id': 'HtVdAasjOgU',
1038                 'ext': 'mp4',
1039                 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
1040                 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
1041                 'duration': 142,
1042                 'uploader': 'The Witcher',
1043                 'uploader_id': 'WitcherGame',
1044                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
1045                 'upload_date': '20140605',
1046                 'age_limit': 18,
1047             },
1048         },
1049         {
1050             'note': 'Age-gate video with embed allowed in public site',
1051             'url': 'https://youtube.com/watch?v=HsUATh_Nc2U',
1052             'info_dict': {
1053                 'id': 'HsUATh_Nc2U',
1054                 'ext': 'mp4',
1055                 'title': 'Godzilla 2 (Official Video)',
1056                 'description': 'md5:bf77e03fcae5529475e500129b05668a',
1057                 'upload_date': '20200408',
1058                 'uploader_id': 'FlyingKitty900',
1059                 'uploader': 'FlyingKitty',
1060                 'age_limit': 18,
1061             },
1062         },
1063         {
1064             'note': 'Age-gate video embedable only with clientScreen=EMBED',
1065             'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',
1066             'info_dict': {
1067                 'id': 'Tq92D6wQ1mg',
1068                 'title': '[MMD] Adios - EVERGLOW [+Motion DL]',
1069                 'ext': 'mp4',
1070                 'upload_date': '20191227',
1071                 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ',
1072                 'uploader': 'Projekt Melody',
1073                 'description': 'md5:17eccca93a786d51bc67646756894066',
1074                 'age_limit': 18,
1075             },
1076         },
1077         {
1078             'note': 'Non-Agegated non-embeddable video',
1079             'url': 'https://youtube.com/watch?v=MeJVWBSsPAY',
1080             'info_dict': {
1081                 'id': 'MeJVWBSsPAY',
1082                 'ext': 'mp4',
1083                 'title': 'OOMPH! - Such Mich Find Mich (Lyrics)',
1084                 'uploader': 'Herr Lurik',
1085                 'uploader_id': 'st3in234',
1086                 'description': 'Fan Video. Music & Lyrics by OOMPH!.',
1087                 'upload_date': '20130730',
1088             },
1089         },
1090         {
1091             'note': 'Non-bypassable age-gated video',
1092             'url': 'https://youtube.com/watch?v=Cr381pDsSsA',
1093             'only_matching': True,
1094         },
1095         # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
1096         # YouTube Red ad is not captured for creator
1097         {
1098             'url': '__2ABJjxzNo',
1099             'info_dict': {
1100                 'id': '__2ABJjxzNo',
1101                 'ext': 'mp4',
1102                 'duration': 266,
1103                 'upload_date': '20100430',
1104                 'uploader_id': 'deadmau5',
1105                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
1106                 'creator': 'deadmau5',
1107                 'description': 'md5:6cbcd3a92ce1bc676fc4d6ab4ace2336',
1108                 'uploader': 'deadmau5',
1109                 'title': 'Deadmau5 - Some Chords (HD)',
1110                 'alt_title': 'Some Chords',
1111             },
1112             'expected_warnings': [
1113                 'DASH manifest missing',
1114             ]
1115         },
1116         # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
1117         {
1118             'url': 'lqQg6PlCWgI',
1119             'info_dict': {
1120                 'id': 'lqQg6PlCWgI',
1121                 'ext': 'mp4',
1122                 'duration': 6085,
1123                 'upload_date': '20150827',
1124                 'uploader_id': 'olympic',
1125                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
1126                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
1127                 'uploader': 'Olympics',
1128                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
1129             },
1130             'params': {
1131                 'skip_download': 'requires avconv',
1132             }
1133         },
1134         # Non-square pixels
1135         {
1136             'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
1137             'info_dict': {
1138                 'id': '_b-2C3KPAM0',
1139                 'ext': 'mp4',
1140                 'stretched_ratio': 16 / 9.,
1141                 'duration': 85,
1142                 'upload_date': '20110310',
1143                 'uploader_id': 'AllenMeow',
1144                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
1145                 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
1146                 'uploader': '孫ᄋᄅ',
1147                 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
1148             },
1149         },
1150         # url_encoded_fmt_stream_map is empty string
1151         {
1152             'url': 'qEJwOuvDf7I',
1153             'info_dict': {
1154                 'id': 'qEJwOuvDf7I',
1155                 'ext': 'webm',
1156                 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
1157                 'description': '',
1158                 'upload_date': '20150404',
1159                 'uploader_id': 'spbelect',
1160                 'uploader': 'Наблюдатели Петербурга',
1161             },
1162             'params': {
1163                 'skip_download': 'requires avconv',
1164             },
1165             'skip': 'This live event has ended.',
1166         },
1167         # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
1168         {
1169             'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
1170             'info_dict': {
1171                 'id': 'FIl7x6_3R5Y',
1172                 'ext': 'webm',
1173                 'title': 'md5:7b81415841e02ecd4313668cde88737a',
1174                 'description': 'md5:116377fd2963b81ec4ce64b542173306',
1175                 'duration': 220,
1176                 'upload_date': '20150625',
1177                 'uploader_id': 'dorappi2000',
1178                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
1179                 'uploader': 'dorappi2000',
1180                 'formats': 'mincount:31',
1181             },
1182             'skip': 'not actual anymore',
1183         },
1184         # DASH manifest with segment_list
1185         {
1186             'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
1187             'md5': '8ce563a1d667b599d21064e982ab9e31',
1188             'info_dict': {
1189                 'id': 'CsmdDsKjzN8',
1190                 'ext': 'mp4',
1191                 'upload_date': '20150501',  # According to '<meta itemprop="datePublished"', but in other places it's 20150510
1192                 'uploader': 'Airtek',
1193                 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
1194                 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
1195                 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
1196             },
1197             'params': {
1198                 'youtube_include_dash_manifest': True,
1199                 'format': '135',  # bestvideo
1200             },
1201             'skip': 'This live event has ended.',
1202         },
1203         {
1204             # Multifeed videos (multiple cameras), URL is for Main Camera
1205             'url': 'https://www.youtube.com/watch?v=jvGDaLqkpTg',
1206             'info_dict': {
1207                 'id': 'jvGDaLqkpTg',
1208                 'title': 'Tom Clancy Free Weekend Rainbow Whatever',
1209                 'description': 'md5:e03b909557865076822aa169218d6a5d',
1210             },
1211             'playlist': [{
1212                 'info_dict': {
1213                     'id': 'jvGDaLqkpTg',
1214                     'ext': 'mp4',
1215                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Main Camera)',
1216                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1217                     'duration': 10643,
1218                     'upload_date': '20161111',
1219                     'uploader': 'Team PGP',
1220                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1221                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1222                 },
1223             }, {
1224                 'info_dict': {
1225                     'id': '3AKt1R1aDnw',
1226                     'ext': 'mp4',
1227                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 2)',
1228                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1229                     'duration': 10991,
1230                     'upload_date': '20161111',
1231                     'uploader': 'Team PGP',
1232                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1233                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1234                 },
1235             }, {
1236                 'info_dict': {
1237                     'id': 'RtAMM00gpVc',
1238                     'ext': 'mp4',
1239                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 3)',
1240                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1241                     'duration': 10995,
1242                     'upload_date': '20161111',
1243                     'uploader': 'Team PGP',
1244                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1245                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1246                 },
1247             }, {
1248                 'info_dict': {
1249                     'id': '6N2fdlP3C5U',
1250                     'ext': 'mp4',
1251                     'title': 'Tom Clancy Free Weekend Rainbow Whatever (Camera 4)',
1252                     'description': 'md5:e03b909557865076822aa169218d6a5d',
1253                     'duration': 10990,
1254                     'upload_date': '20161111',
1255                     'uploader': 'Team PGP',
1256                     'uploader_id': 'UChORY56LMMETTuGjXaJXvLg',
1257                     'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UChORY56LMMETTuGjXaJXvLg',
1258                 },
1259             }],
1260             'params': {
1261                 'skip_download': True,
1262             },
1263             'skip': 'Not multifeed anymore',
1264         },
1265         {
1266             # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
1267             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
1268             'info_dict': {
1269                 'id': 'gVfLd0zydlo',
1270                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
1271             },
1272             'playlist_count': 2,
1273             'skip': 'Not multifeed anymore',
1274         },
1275         {
1276             'url': 'https://vid.plus/FlRa-iH7PGw',
1277             'only_matching': True,
1278         },
1279         {
1280             'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
1281             'only_matching': True,
1282         },
1283         {
1284             # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1285             # Also tests cut-off URL expansion in video description (see
1286             # https://github.com/ytdl-org/youtube-dl/issues/1892,
1287             # https://github.com/ytdl-org/youtube-dl/issues/8164)
1288             'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
1289             'info_dict': {
1290                 'id': 'lsguqyKfVQg',
1291                 'ext': 'mp4',
1292                 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
1293                 'alt_title': 'Dark Walk',
1294                 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
1295                 'duration': 133,
1296                 'upload_date': '20151119',
1297                 'uploader_id': 'IronSoulElf',
1298                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
1299                 'uploader': 'IronSoulElf',
1300                 'creator': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1301                 'track': 'Dark Walk',
1302                 'artist': 'Todd Haberman;\nDaniel Law Heath and Aaron Kaplan',
1303                 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
1304             },
1305             'params': {
1306                 'skip_download': True,
1307             },
1308         },
1309         {
1310             # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
1311             'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
1312             'only_matching': True,
1313         },
1314         {
1315             # Video with yt:stretch=17:0
1316             'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
1317             'info_dict': {
1318                 'id': 'Q39EVAstoRM',
1319                 'ext': 'mp4',
1320                 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
1321                 'description': 'md5:ee18a25c350637c8faff806845bddee9',
1322                 'upload_date': '20151107',
1323                 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
1324                 'uploader': 'CH GAMER DROID',
1325             },
1326             'params': {
1327                 'skip_download': True,
1328             },
1329             'skip': 'This video does not exist.',
1330         },
1331         {
1332             # Video with incomplete 'yt:stretch=16:'
1333             'url': 'https://www.youtube.com/watch?v=FRhJzUSJbGI',
1334             'only_matching': True,
1335         },
1336         {
1337             # Video licensed under Creative Commons
1338             'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
1339             'info_dict': {
1340                 'id': 'M4gD1WSo5mA',
1341                 'ext': 'mp4',
1342                 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
1343                 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
1344                 'duration': 721,
1345                 'upload_date': '20150127',
1346                 'uploader_id': 'BerkmanCenter',
1347                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
1348                 'uploader': 'The Berkman Klein Center for Internet & Society',
1349                 'license': 'Creative Commons Attribution license (reuse allowed)',
1350             },
1351             'params': {
1352                 'skip_download': True,
1353             },
1354         },
1355         {
1356             # Channel-like uploader_url
1357             'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
1358             'info_dict': {
1359                 'id': 'eQcmzGIKrzg',
1360                 'ext': 'mp4',
1361                 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
1362                 'description': 'md5:13a2503d7b5904ef4b223aa101628f39',
1363                 'duration': 4060,
1364                 'upload_date': '20151119',
1365                 'uploader': 'Bernie Sanders',
1366                 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
1367                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
1368                 'license': 'Creative Commons Attribution license (reuse allowed)',
1369             },
1370             'params': {
1371                 'skip_download': True,
1372             },
1373         },
1374         {
1375             'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
1376             'only_matching': True,
1377         },
1378         {
1379             # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
1380             'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
1381             'only_matching': True,
1382         },
1383         {
1384             # Rental video preview
1385             'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
1386             'info_dict': {
1387                 'id': 'uGpuVWrhIzE',
1388                 'ext': 'mp4',
1389                 'title': 'Piku - Trailer',
1390                 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
1391                 'upload_date': '20150811',
1392                 'uploader': 'FlixMatrix',
1393                 'uploader_id': 'FlixMatrixKaravan',
1394                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
1395                 'license': 'Standard YouTube License',
1396             },
1397             'params': {
1398                 'skip_download': True,
1399             },
1400             'skip': 'This video is not available.',
1401         },
1402         {
1403             # YouTube Red video with episode data
1404             'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
1405             'info_dict': {
1406                 'id': 'iqKdEhx-dD4',
1407                 'ext': 'mp4',
1408                 'title': 'Isolation - Mind Field (Ep 1)',
1409                 'description': 'md5:f540112edec5d09fc8cc752d3d4ba3cd',
1410                 'duration': 2085,
1411                 'upload_date': '20170118',
1412                 'uploader': 'Vsauce',
1413                 'uploader_id': 'Vsauce',
1414                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
1415                 'series': 'Mind Field',
1416                 'season_number': 1,
1417                 'episode_number': 1,
1418             },
1419             'params': {
1420                 'skip_download': True,
1421             },
1422             'expected_warnings': [
1423                 'Skipping DASH manifest',
1424             ],
1425         },
1426         {
1427             # The following content has been identified by the YouTube community
1428             # as inappropriate or offensive to some audiences.
1429             'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
1430             'info_dict': {
1431                 'id': '6SJNVb0GnPI',
1432                 'ext': 'mp4',
1433                 'title': 'Race Differences in Intelligence',
1434                 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
1435                 'duration': 965,
1436                 'upload_date': '20140124',
1437                 'uploader': 'New Century Foundation',
1438                 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
1439                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
1440             },
1441             'params': {
1442                 'skip_download': True,
1443             },
1444             'skip': 'This video has been removed for violating YouTube\'s policy on hate speech.',
1445         },
1446         {
1447             # itag 212
1448             'url': '1t24XAntNCY',
1449             'only_matching': True,
1450         },
1451         {
1452             # geo restricted to JP
1453             'url': 'sJL6WA-aGkQ',
1454             'only_matching': True,
1455         },
1456         {
1457             'url': 'https://invidio.us/watch?v=BaW_jenozKc',
1458             'only_matching': True,
1459         },
1460         {
1461             'url': 'https://redirect.invidious.io/watch?v=BaW_jenozKc',
1462             'only_matching': True,
1463         },
1464         {
1465             # from https://nitter.pussthecat.org/YouTube/status/1360363141947944964#m
1466             'url': 'https://redirect.invidious.io/Yh0AhrY9GjA',
1467             'only_matching': True,
1468         },
1469         {
1470             # DRM protected
1471             'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
1472             'only_matching': True,
1473         },
1474         {
1475             # Video with unsupported adaptive stream type formats
1476             'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
1477             'info_dict': {
1478                 'id': 'Z4Vy8R84T1U',
1479                 'ext': 'mp4',
1480                 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
1481                 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
1482                 'duration': 433,
1483                 'upload_date': '20130923',
1484                 'uploader': 'Amelia Putri Harwita',
1485                 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
1486                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
1487                 'formats': 'maxcount:10',
1488             },
1489             'params': {
1490                 'skip_download': True,
1491                 'youtube_include_dash_manifest': False,
1492             },
1493             'skip': 'not actual anymore',
1494         },
1495         {
1496             # Youtube Music Auto-generated description
1497             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1498             'info_dict': {
1499                 'id': 'MgNrAu2pzNs',
1500                 'ext': 'mp4',
1501                 'title': 'Voyeur Girl',
1502                 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
1503                 'upload_date': '20190312',
1504                 'uploader': 'Stephen - Topic',
1505                 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
1506                 'artist': 'Stephen',
1507                 'track': 'Voyeur Girl',
1508                 'album': 'it\'s too much love to know my dear',
1509                 'release_date': '20190313',
1510                 'release_year': 2019,
1511             },
1512             'params': {
1513                 'skip_download': True,
1514             },
1515         },
1516         {
1517             'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
1518             'only_matching': True,
1519         },
1520         {
1521             # invalid -> valid video id redirection
1522             'url': 'DJztXj2GPfl',
1523             'info_dict': {
1524                 'id': 'DJztXj2GPfk',
1525                 'ext': 'mp4',
1526                 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
1527                 'description': 'md5:bf577a41da97918e94fa9798d9228825',
1528                 'upload_date': '20090125',
1529                 'uploader': 'Prochorowka',
1530                 'uploader_id': 'Prochorowka',
1531                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
1532                 'artist': 'Panjabi MC',
1533                 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
1534                 'album': 'Beware of the Boys (Mundian To Bach Ke)',
1535             },
1536             'params': {
1537                 'skip_download': True,
1538             },
1539             'skip': 'Video unavailable',
1540         },
1541         {
1542             # empty description results in an empty string
1543             'url': 'https://www.youtube.com/watch?v=x41yOUIvK2k',
1544             'info_dict': {
1545                 'id': 'x41yOUIvK2k',
1546                 'ext': 'mp4',
1547                 'title': 'IMG 3456',
1548                 'description': '',
1549                 'upload_date': '20170613',
1550                 'uploader_id': 'ElevageOrVert',
1551                 'uploader': 'ElevageOrVert',
1552             },
1553             'params': {
1554                 'skip_download': True,
1555             },
1556         },
1557         {
1558             # with '};' inside yt initial data (see [1])
1559             # see [2] for an example with '};' inside ytInitialPlayerResponse
1560             # 1. https://github.com/ytdl-org/youtube-dl/issues/27093
1561             # 2. https://github.com/ytdl-org/youtube-dl/issues/27216
1562             'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no',
1563             'info_dict': {
1564                 'id': 'CHqg6qOn4no',
1565                 'ext': 'mp4',
1566                 'title': 'Part 77   Sort a list of simple types in c#',
1567                 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc',
1568                 'upload_date': '20130831',
1569                 'uploader_id': 'kudvenkat',
1570                 'uploader': 'kudvenkat',
1571             },
1572             'params': {
1573                 'skip_download': True,
1574             },
1575         },
1576         {
1577             # another example of '};' in ytInitialData
1578             'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
1579             'only_matching': True,
1580         },
1581         {
1582             'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
1583             'only_matching': True,
1584         },
1585         {
1586             # https://github.com/ytdl-org/youtube-dl/pull/28094
1587             'url': 'OtqTfy26tG0',
1588             'info_dict': {
1589                 'id': 'OtqTfy26tG0',
1590                 'ext': 'mp4',
1591                 'title': 'Burn Out',
1592                 'description': 'md5:8d07b84dcbcbfb34bc12a56d968b6131',
1593                 'upload_date': '20141120',
1594                 'uploader': 'The Cinematic Orchestra - Topic',
1595                 'uploader_id': 'UCIzsJBIyo8hhpFm1NK0uLgw',
1596                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCIzsJBIyo8hhpFm1NK0uLgw',
1597                 'artist': 'The Cinematic Orchestra',
1598                 'track': 'Burn Out',
1599                 'album': 'Every Day',
1600                 'release_data': None,
1601                 'release_year': None,
1602             },
1603             'params': {
1604                 'skip_download': True,
1605             },
1606         },
1607         {
1608             # controversial video, only works with bpctr when authenticated with cookies
1609             'url': 'https://www.youtube.com/watch?v=nGC3D_FkCmg',
1610             'only_matching': True,
1611         },
1612         {
1613             # controversial video, requires bpctr/contentCheckOk
1614             'url': 'https://www.youtube.com/watch?v=SZJvDhaSDnc',
1615             'info_dict': {
1616                 'id': 'SZJvDhaSDnc',
1617                 'ext': 'mp4',
1618                 'title': 'San Diego teen commits suicide after bullying over embarrassing video',
1619                 'channel_id': 'UC-SJ6nODDmufqBzPBwCvYvQ',
1620                 'uploader': 'CBS This Morning',
1621                 'uploader_id': 'CBSThisMorning',
1622                 'upload_date': '20140716',
1623                 'description': 'md5:acde3a73d3f133fc97e837a9f76b53b7'
1624             }
1625         },
1626         {
1627             # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685
1628             'url': 'cBvYw8_A0vQ',
1629             'info_dict': {
1630                 'id': 'cBvYw8_A0vQ',
1631                 'ext': 'mp4',
1632                 'title': '4K Ueno Okachimachi  Street  Scenes  上野御徒町歩き',
1633                 'description': 'md5:ea770e474b7cd6722b4c95b833c03630',
1634                 'upload_date': '20201120',
1635                 'uploader': 'Walk around Japan',
1636                 'uploader_id': 'UC3o_t8PzBmXf5S9b7GLx1Mw',
1637                 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw',
1638             },
1639             'params': {
1640                 'skip_download': True,
1641             },
1642         }, {
1643             # Has multiple audio streams
1644             'url': 'WaOKSUlf4TM',
1645             'only_matching': True
1646         }, {
1647             # Requires Premium: has format 141 when requested using YTM url
1648             'url': 'https://music.youtube.com/watch?v=XclachpHxis',
1649             'only_matching': True
1650         }, {
1651             # multiple subtitles with same lang_code
1652             'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug',
1653             'only_matching': True,
1654         }, {
1655             # Force use android client fallback
1656             'url': 'https://www.youtube.com/watch?v=YOelRv7fMxY',
1657             'info_dict': {
1658                 'id': 'YOelRv7fMxY',
1659                 'title': 'DIGGING A SECRET TUNNEL Part 1',
1660                 'ext': '3gp',
1661                 'upload_date': '20210624',
1662                 'channel_id': 'UCp68_FLety0O-n9QU6phsgw',
1663                 'uploader': 'colinfurze',
1664                 'uploader_id': 'colinfurze',
1665                 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCp68_FLety0O-n9QU6phsgw',
1666                 'description': 'md5:b5096f56af7ccd7a555c84db81738b22'
1667             },
1668             'params': {
1669                 'format': '17',  # 3gp format available on android
1670                 'extractor_args': {'youtube': {'player_client': ['android']}},
1671             },
1672         },
1673         {
1674             # Skip download of additional client configs (remix client config in this case)
1675             'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
1676             'only_matching': True,
1677             'params': {
1678                 'extractor_args': {'youtube': {'player_skip': ['configs']}},
1679             },
1680         }, {
1681             # shorts
1682             'url': 'https://www.youtube.com/shorts/BGQWPY4IigY',
1683             'only_matching': True,
1684         }, {
1685             'note': 'Storyboards',
1686             'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8',
1687             'info_dict': {
1688                 'id': '5KLPxDtMqe8',
1689                 'ext': 'mhtml',
1690                 'format_id': 'sb0',
1691                 'title': 'Your Brain is Plastic',
1692                 'uploader_id': 'scishow',
1693                 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc',
1694                 'upload_date': '20140324',
1695                 'uploader': 'SciShow',
1696             }, 'params': {'format': 'mhtml', 'skip_download': True}
1697         }
1698     ]
1699
1700     @classmethod
1701     def suitable(cls, url):
1702         from ..utils import parse_qs
1703
1704         qs = parse_qs(url)
1705         if qs.get('list', [None])[0]:
1706             return False
1707         return super(YoutubeIE, cls).suitable(url)
1708
1709     def __init__(self, *args, **kwargs):
1710         super(YoutubeIE, self).__init__(*args, **kwargs)
1711         self._code_cache = {}
1712         self._player_cache = {}
1713
1714     def _extract_player_url(self, *ytcfgs, webpage=None):
1715         player_url = traverse_obj(
1716             ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'),
1717             get_all=False, expected_type=compat_str)
1718         if not player_url:
1719             return
1720         if player_url.startswith('//'):
1721             player_url = 'https:' + player_url
1722         elif not re.match(r'https?://', player_url):
1723             player_url = compat_urlparse.urljoin(
1724                 'https://www.youtube.com', player_url)
1725         return player_url
1726
1727     def _download_player_url(self, video_id, fatal=False):
1728         res = self._download_webpage(
1729             'https://www.youtube.com/iframe_api',
1730             note='Downloading iframe API JS', video_id=video_id, fatal=fatal)
1731         if res:
1732             player_version = self._search_regex(
1733                 r'player\\?/([0-9a-fA-F]{8})\\?/', res, 'player version', fatal=fatal)
1734             if player_version:
1735                 return f'https://www.youtube.com/s/player/{player_version}/player_ias.vflset/en_US/base.js'
1736
1737     def _signature_cache_id(self, example_sig):
1738         """ Return a string representation of a signature """
1739         return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
1740
1741     @classmethod
1742     def _extract_player_info(cls, player_url):
1743         for player_re in cls._PLAYER_INFO_RE:
1744             id_m = re.search(player_re, player_url)
1745             if id_m:
1746                 break
1747         else:
1748             raise ExtractorError('Cannot identify player %r' % player_url)
1749         return id_m.group('id')
1750
1751     def _load_player(self, video_id, player_url, fatal=True):
1752         player_id = self._extract_player_info(player_url)
1753         if player_id not in self._code_cache:
1754             code = self._download_webpage(
1755                 player_url, video_id, fatal=fatal,
1756                 note='Downloading player ' + player_id,
1757                 errnote='Download of %s failed' % player_url)
1758             if code:
1759                 self._code_cache[player_id] = code
1760         return self._code_cache.get(player_id)
1761
1762     def _extract_signature_function(self, video_id, player_url, example_sig):
1763         player_id = self._extract_player_info(player_url)
1764
1765         # Read from filesystem cache
1766         func_id = 'js_%s_%s' % (
1767             player_id, self._signature_cache_id(example_sig))
1768         assert os.path.basename(func_id) == func_id
1769
1770         cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
1771         if cache_spec is not None:
1772             return lambda s: ''.join(s[i] for i in cache_spec)
1773
1774         code = self._load_player(video_id, player_url)
1775         if code:
1776             res = self._parse_sig_js(code)
1777
1778             test_string = ''.join(map(compat_chr, range(len(example_sig))))
1779             cache_res = res(test_string)
1780             cache_spec = [ord(c) for c in cache_res]
1781
1782             self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
1783             return res
1784
1785     def _print_sig_code(self, func, example_sig):
1786         if not self.get_param('youtube_print_sig_code'):
1787             return
1788
1789         def gen_sig_code(idxs):
1790             def _genslice(start, end, step):
1791                 starts = '' if start == 0 else str(start)
1792                 ends = (':%d' % (end + step)) if end + step >= 0 else ':'
1793                 steps = '' if step == 1 else (':%d' % step)
1794                 return 's[%s%s%s]' % (starts, ends, steps)
1795
1796             step = None
1797             # Quelch pyflakes warnings - start will be set when step is set
1798             start = '(Never used)'
1799             for i, prev in zip(idxs[1:], idxs[:-1]):
1800                 if step is not None:
1801                     if i - prev == step:
1802                         continue
1803                     yield _genslice(start, prev, step)
1804                     step = None
1805                     continue
1806                 if i - prev in [-1, 1]:
1807                     step = i - prev
1808                     start = prev
1809                     continue
1810                 else:
1811                     yield 's[%d]' % prev
1812             if step is None:
1813                 yield 's[%d]' % i
1814             else:
1815                 yield _genslice(start, i, step)
1816
1817         test_string = ''.join(map(compat_chr, range(len(example_sig))))
1818         cache_res = func(test_string)
1819         cache_spec = [ord(c) for c in cache_res]
1820         expr_code = ' + '.join(gen_sig_code(cache_spec))
1821         signature_id_tuple = '(%s)' % (
1822             ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
1823         code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
1824                 '    return %s\n') % (signature_id_tuple, expr_code)
1825         self.to_screen('Extracted signature function:\n' + code)
1826
1827     def _parse_sig_js(self, jscode):
1828         funcname = self._search_regex(
1829             (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1830              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1831              r'\bm=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(h\.s\)\)',
1832              r'\bc&&\(c=(?P<sig>[a-zA-Z0-9$]{2,})\(decodeURIComponent\(c\)\)',
1833              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\);[a-zA-Z0-9$]{2}\.[a-zA-Z0-9$]{2}\(a,\d+\)',
1834              r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2,})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1835              r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
1836              # Obsolete patterns
1837              r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1838              r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
1839              r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1840              r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1841              r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1842              r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1843              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
1844              r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
1845             jscode, 'Initial JS player signature function name', group='sig')
1846
1847         jsi = JSInterpreter(jscode)
1848         initial_function = jsi.extract_function(funcname)
1849         return lambda s: initial_function([s])
1850
1851     def _decrypt_signature(self, s, video_id, player_url):
1852         """Turn the encrypted s field into a working signature"""
1853
1854         if player_url is None:
1855             raise ExtractorError('Cannot decrypt signature without player_url')
1856
1857         try:
1858             player_id = (player_url, self._signature_cache_id(s))
1859             if player_id not in self._player_cache:
1860                 func = self._extract_signature_function(
1861                     video_id, player_url, s
1862                 )
1863                 self._player_cache[player_id] = func
1864             func = self._player_cache[player_id]
1865             self._print_sig_code(func, s)
1866             return func(s)
1867         except Exception as e:
1868             raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
1869
1870     def _decrypt_nsig(self, s, video_id, player_url):
1871         """Turn the encrypted n field into a working signature"""
1872         if player_url is None:
1873             raise ExtractorError('Cannot decrypt nsig without player_url')
1874         if player_url.startswith('//'):
1875             player_url = 'https:' + player_url
1876         elif not re.match(r'https?://', player_url):
1877             player_url = compat_urlparse.urljoin(
1878                 'https://www.youtube.com', player_url)
1879
1880         sig_id = ('nsig_value', s)
1881         if sig_id in self._player_cache:
1882             return self._player_cache[sig_id]
1883
1884         try:
1885             player_id = ('nsig', player_url)
1886             if player_id not in self._player_cache:
1887                 self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
1888             func = self._player_cache[player_id]
1889             self._player_cache[sig_id] = func(s)
1890             self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
1891             return self._player_cache[sig_id]
1892         except Exception as e:
1893             raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
1894
1895     def _extract_n_function_name(self, jscode):
1896         return self._search_regex(
1897             (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
1898             jscode, 'Initial JS player n function name', group='nfunc')
1899
1900     def _extract_n_function(self, video_id, player_url):
1901         player_id = self._extract_player_info(player_url)
1902         func_code = self._downloader.cache.load('youtube-nsig', player_id)
1903
1904         if func_code:
1905             jsi = JSInterpreter(func_code)
1906         else:
1907             jscode = self._load_player(video_id, player_url)
1908             funcname = self._extract_n_function_name(jscode)
1909             jsi = JSInterpreter(jscode)
1910             func_code = jsi.extract_function_code(funcname)
1911             self._downloader.cache.store('youtube-nsig', player_id, func_code)
1912
1913         if self.get_param('youtube_print_sig_code'):
1914             self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
1915
1916         return lambda s: jsi.extract_function_from_code(*func_code)([s])
1917
1918     def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
1919         """
1920         Extract signatureTimestamp (sts)
1921         Required to tell API what sig/player version is in use.
1922         """
1923         sts = None
1924         if isinstance(ytcfg, dict):
1925             sts = int_or_none(ytcfg.get('STS'))
1926
1927         if not sts:
1928             # Attempt to extract from player
1929             if player_url is None:
1930                 error_msg = 'Cannot extract signature timestamp without player_url.'
1931                 if fatal:
1932                     raise ExtractorError(error_msg)
1933                 self.report_warning(error_msg)
1934                 return
1935             code = self._load_player(video_id, player_url, fatal=fatal)
1936             if code:
1937                 sts = int_or_none(self._search_regex(
1938                     r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
1939                     'JS player signature timestamp', group='sts', fatal=fatal))
1940         return sts
1941
1942     def _mark_watched(self, video_id, player_responses):
1943         playback_url = get_first(
1944             player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'),
1945             expected_type=url_or_none)
1946         if not playback_url:
1947             self.report_warning('Unable to mark watched')
1948             return
1949         parsed_playback_url = compat_urlparse.urlparse(playback_url)
1950         qs = compat_urlparse.parse_qs(parsed_playback_url.query)
1951
1952         # cpn generation algorithm is reverse engineered from base.js.
1953         # In fact it works even with dummy cpn.
1954         CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
1955         cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
1956
1957         qs.update({
1958             'ver': ['2'],
1959             'cpn': [cpn],
1960         })
1961         playback_url = compat_urlparse.urlunparse(
1962             parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
1963
1964         self._download_webpage(
1965             playback_url, video_id, 'Marking watched',
1966             'Unable to mark watched', fatal=False)
1967
1968     @staticmethod
1969     def _extract_urls(webpage):
1970         # Embedded YouTube player
1971         entries = [
1972             unescapeHTML(mobj.group('url'))
1973             for mobj in re.finditer(r'''(?x)
1974             (?:
1975                 <iframe[^>]+?src=|
1976                 data-video-url=|
1977                 <embed[^>]+?src=|
1978                 embedSWF\(?:\s*|
1979                 <object[^>]+data=|
1980                 new\s+SWFObject\(
1981             )
1982             (["\'])
1983                 (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
1984                 (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
1985             \1''', webpage)]
1986
1987         # lazyYT YouTube embed
1988         entries.extend(list(map(
1989             unescapeHTML,
1990             re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
1991
1992         # Wordpress "YouTube Video Importer" plugin
1993         matches = re.findall(r'''(?x)<div[^>]+
1994             class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
1995             data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
1996         entries.extend(m[-1] for m in matches)
1997
1998         return entries
1999
2000     @staticmethod
2001     def _extract_url(webpage):
2002         urls = YoutubeIE._extract_urls(webpage)
2003         return urls[0] if urls else None
2004
2005     @classmethod
2006     def extract_id(cls, url):
2007         mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
2008         if mobj is None:
2009             raise ExtractorError('Invalid URL: %s' % url)
2010         return mobj.group('id')
2011
2012     def _extract_chapters_from_json(self, data, duration):
2013         chapter_list = traverse_obj(
2014             data, (
2015                 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer',
2016                 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters'
2017             ), expected_type=list)
2018
2019         return self._extract_chapters(
2020             chapter_list,
2021             chapter_time=lambda chapter: float_or_none(
2022                 traverse_obj(chapter, ('chapterRenderer', 'timeRangeStartMillis')), scale=1000),
2023             chapter_title=lambda chapter: traverse_obj(
2024                 chapter, ('chapterRenderer', 'title', 'simpleText'), expected_type=str),
2025             duration=duration)
2026
2027     def _extract_chapters_from_engagement_panel(self, data, duration):
2028         content_list = traverse_obj(
2029             data,
2030             ('engagementPanels', ..., 'engagementPanelSectionListRenderer', 'content', 'macroMarkersListRenderer', 'contents'),
2031             expected_type=list, default=[])
2032         chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription'))
2033         chapter_title = lambda chapter: self._get_text(chapter, 'title')
2034
2035         return next((
2036             filter(None, (
2037                 self._extract_chapters(
2038                     traverse_obj(contents, (..., 'macroMarkersListItemRenderer')),
2039                     chapter_time, chapter_title, duration)
2040                 for contents in content_list
2041             ))), [])
2042
2043     def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration):
2044         chapters = []
2045         last_chapter = {'start_time': 0}
2046         for idx, chapter in enumerate(chapter_list or []):
2047             title = chapter_title(chapter)
2048             start_time = chapter_time(chapter)
2049             if start_time is None:
2050                 continue
2051             last_chapter['end_time'] = start_time
2052             if start_time < last_chapter['start_time']:
2053                 if idx == 1:
2054                     chapters.pop()
2055                     self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title'])
2056                 else:
2057                     self.report_warning(f'Invalid start time for chapter "{title}"')
2058                     continue
2059             last_chapter = {'start_time': start_time, 'title': title}
2060             chapters.append(last_chapter)
2061         last_chapter['end_time'] = duration
2062         return chapters
2063
2064     def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
2065         return self._parse_json(self._search_regex(
2066             (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE),
2067              regex), webpage, name, default='{}'), video_id, fatal=False)
2068
2069     @staticmethod
2070     def parse_time_text(time_text):
2071         """
2072         Parse the comment time text
2073         time_text is in the format 'X units ago (edited)'
2074         """
2075         time_text_split = time_text.split(' ')
2076         if len(time_text_split) >= 3:
2077             try:
2078                 return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto')
2079             except ValueError:
2080                 return None
2081
2082     def _extract_comment(self, comment_renderer, parent=None):
2083         comment_id = comment_renderer.get('commentId')
2084         if not comment_id:
2085             return
2086
2087         text = self._get_text(comment_renderer, 'contentText')
2088
2089         # note: timestamp is an estimate calculated from the current time and time_text
2090         time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
2091         time_text_dt = self.parse_time_text(time_text)
2092         if isinstance(time_text_dt, datetime.datetime):
2093             timestamp = calendar.timegm(time_text_dt.timetuple())
2094         author = self._get_text(comment_renderer, 'authorText')
2095         author_id = try_get(comment_renderer,
2096                             lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str)
2097
2098         votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
2099                                                        lambda x: x['likeCount']), compat_str)) or 0
2100         author_thumbnail = try_get(comment_renderer,
2101                                    lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], compat_str)
2102
2103         author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
2104         is_favorited = 'creatorHeart' in (try_get(
2105             comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
2106         return {
2107             'id': comment_id,
2108             'text': text,
2109             'timestamp': timestamp,
2110             'time_text': time_text,
2111             'like_count': votes,
2112             'is_favorited': is_favorited,
2113             'author': author,
2114             'author_id': author_id,
2115             'author_thumbnail': author_thumbnail,
2116             'author_is_uploader': author_is_uploader,
2117             'parent': parent or 'root'
2118         }
2119
2120     def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None):
2121
2122         def extract_header(contents):
2123             _continuation = None
2124             for content in contents:
2125                 comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer'])
2126                 expected_comment_count = parse_count(self._get_text(
2127                     comments_header_renderer, 'countText', 'commentsCount', max_runs=1))
2128
2129                 if expected_comment_count:
2130                     comment_counts[1] = expected_comment_count
2131                     self.to_screen('Downloading ~%d comments' % expected_comment_count)
2132                 sort_mode_str = self._configuration_arg('comment_sort', [''])[0]
2133                 comment_sort_index = int(sort_mode_str != 'top')  # 1 = new, 0 = top
2134
2135                 sort_menu_item = try_get(
2136                     comments_header_renderer,
2137                     lambda x: x['sortMenu']['sortFilterSubMenuRenderer']['subMenuItems'][comment_sort_index], dict) or {}
2138                 sort_continuation_ep = sort_menu_item.get('serviceEndpoint') or {}
2139
2140                 _continuation = self._extract_continuation_ep_data(sort_continuation_ep) or self._extract_continuation(sort_menu_item)
2141                 if not _continuation:
2142                     continue
2143
2144                 sort_text = sort_menu_item.get('title')
2145                 if isinstance(sort_text, compat_str):
2146                     sort_text = sort_text.lower()
2147                 else:
2148                     sort_text = 'top comments' if comment_sort_index == 0 else 'newest first'
2149                 self.to_screen('Sorting comments by %s' % sort_text)
2150                 break
2151             return _continuation
2152
2153         def extract_thread(contents):
2154             if not parent:
2155                 comment_counts[2] = 0
2156             for content in contents:
2157                 comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer'])
2158                 comment_renderer = try_get(
2159                     comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get(
2160                     content, (lambda x: x['commentRenderer'], dict))
2161
2162                 if not comment_renderer:
2163                     continue
2164                 comment = self._extract_comment(comment_renderer, parent)
2165                 if not comment:
2166                     continue
2167                 comment_counts[0] += 1
2168                 yield comment
2169                 # Attempt to get the replies
2170                 comment_replies_renderer = try_get(
2171                     comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict)
2172
2173                 if comment_replies_renderer:
2174                     comment_counts[2] += 1
2175                     comment_entries_iter = self._comment_entries(
2176                         comment_replies_renderer, ytcfg, video_id,
2177                         parent=comment.get('id'), comment_counts=comment_counts)
2178
2179                     for reply_comment in comment_entries_iter:
2180                         yield reply_comment
2181
2182         # YouTube comments have a max depth of 2
2183         max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf')
2184         if max_depth == 1 and parent:
2185             return
2186         if not comment_counts:
2187             # comment so far, est. total comments, current comment thread #
2188             comment_counts = [0, 0, 0]
2189
2190         continuation = self._extract_continuation(root_continuation_data)
2191         if continuation and len(continuation['continuation']) < 27:
2192             self.write_debug('Detected old API continuation token. Generating new API compatible token.')
2193             continuation_token = self._generate_comment_continuation(video_id)
2194             continuation = self._build_api_continuation_query(continuation_token, None)
2195
2196         message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1)
2197         if message and not parent:
2198             self.report_warning(message, video_id=video_id)
2199
2200         visitor_data = None
2201         is_first_continuation = parent is None
2202
2203         for page_num in itertools.count(0):
2204             if not continuation:
2205                 break
2206             headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
2207             comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1])
2208             if page_num == 0:
2209                 if is_first_continuation:
2210                     note_prefix = 'Downloading comment section API JSON'
2211                 else:
2212                     note_prefix = '    Downloading comment API JSON reply thread %d %s' % (
2213                         comment_counts[2], comment_prog_str)
2214             else:
2215                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
2216                     '       ' if parent else '', ' replies' if parent else '',
2217                     page_num, comment_prog_str)
2218
2219             response = self._extract_response(
2220                 item_id=None, query=continuation,
2221                 ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
2222                 check_get_keys=('onResponseReceivedEndpoints', 'continuationContents'))
2223             if not response:
2224                 break
2225             visitor_data = try_get(
2226                 response,
2227                 lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'],
2228                 compat_str) or visitor_data
2229
2230             continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents'))
2231
2232             continuation = None
2233             if isinstance(continuation_contents, list):
2234                 for continuation_section in continuation_contents:
2235                     if not isinstance(continuation_section, dict):
2236                         continue
2237                     continuation_items = try_get(
2238                         continuation_section,
2239                         (lambda x: x['reloadContinuationItemsCommand']['continuationItems'],
2240                          lambda x: x['appendContinuationItemsAction']['continuationItems']),
2241                         list) or []
2242                     if is_first_continuation:
2243                         continuation = extract_header(continuation_items)
2244                         is_first_continuation = False
2245                         if continuation:
2246                             break
2247                         continue
2248                     count = 0
2249                     for count, entry in enumerate(extract_thread(continuation_items)):
2250                         yield entry
2251                     continuation = self._extract_continuation({'contents': continuation_items})
2252                     if continuation:
2253                         # Sometimes YouTube provides a continuation without any comments
2254                         # In most cases we end up just downloading these with very little comments to come.
2255                         if count == 0:
2256                             if not parent:
2257                                 self.report_warning('No comments received - assuming end of comments')
2258                             continuation = None
2259                         break
2260
2261             # Deprecated response structure
2262             elif isinstance(continuation_contents, dict):
2263                 known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation')
2264                 for key, continuation_renderer in continuation_contents.items():
2265                     if key not in known_continuation_renderers:
2266                         continue
2267                     if not isinstance(continuation_renderer, dict):
2268                         continue
2269                     if is_first_continuation:
2270                         header_continuation_items = [continuation_renderer.get('header') or {}]
2271                         continuation = extract_header(header_continuation_items)
2272                         is_first_continuation = False
2273                         if continuation:
2274                             break
2275
2276                     # Sometimes YouTube provides a continuation without any comments
2277                     # In most cases we end up just downloading these with very little comments to come.
2278                     count = 0
2279                     for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})):
2280                         yield entry
2281                     continuation = self._extract_continuation(continuation_renderer)
2282                     if count == 0:
2283                         if not parent:
2284                             self.report_warning('No comments received - assuming end of comments')
2285                         continuation = None
2286                     break
2287
2288     @staticmethod
2289     def _generate_comment_continuation(video_id):
2290         """
2291         Generates initial comment section continuation token from given video id
2292         """
2293         b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8')))
2294         parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u')
2295         new_continuation_intlist = list(itertools.chain.from_iterable(
2296             [bytes_to_intlist(base64.b64decode(part)) for part in parts]))
2297         return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8')
2298
2299     def _get_comments(self, ytcfg, video_id, contents, webpage):
2300         """Entry for comment extraction"""
2301         def _real_comment_extract(contents):
2302             renderer = next((
2303                 item for item in traverse_obj(contents, (..., 'itemSectionRenderer'), default={})
2304                 if item.get('sectionIdentifier') == 'comment-item-section'), None)
2305             yield from self._comment_entries(renderer, ytcfg, video_id)
2306
2307         max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0])
2308         # Force English regardless of account setting to prevent parsing issues
2309         # See: https://github.com/yt-dlp/yt-dlp/issues/532
2310         ytcfg = copy.deepcopy(ytcfg)
2311         traverse_obj(
2312             ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en'
2313         return itertools.islice(_real_comment_extract(contents), 0, max_comments)
2314
2315     @staticmethod
2316     def _get_checkok_params():
2317         return {'contentCheckOk': True, 'racyCheckOk': True}
2318
2319     @classmethod
2320     def _generate_player_context(cls, sts=None):
2321         context = {
2322             'html5Preference': 'HTML5_PREF_WANTS',
2323         }
2324         if sts is not None:
2325             context['signatureTimestamp'] = sts
2326         return {
2327             'playbackContext': {
2328                 'contentPlaybackContext': context
2329             },
2330             **cls._get_checkok_params()
2331         }
2332
2333     @staticmethod
2334     def _is_agegated(player_response):
2335         if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')):
2336             return True
2337
2338         reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[])
2339         AGE_GATE_REASONS = (
2340             'confirm your age', 'age-restricted', 'inappropriate',  # reason
2341             'age_verification_required', 'age_check_required',  # status
2342         )
2343         return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons)
2344
2345     @staticmethod
2346     def _is_unplayable(player_response):
2347         return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
2348
2349     def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
2350
2351         session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
2352         syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
2353         sts = self._extract_signature_timestamp(video_id, player_url, master_ytcfg, fatal=False) if player_url else None
2354         headers = self.generate_api_headers(
2355             ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client)
2356
2357         yt_query = {'videoId': video_id}
2358         yt_query.update(self._generate_player_context(sts))
2359         return self._extract_response(
2360             item_id=video_id, ep='player', query=yt_query,
2361             ytcfg=player_ytcfg, headers=headers, fatal=True,
2362             default_client=client,
2363             note='Downloading %s player API JSON' % client.replace('_', ' ').strip()
2364         ) or None
2365
2366     def _get_requested_clients(self, url, smuggled_data):
2367         requested_clients = []
2368         default = ['android', 'web']
2369         allowed_clients = sorted(
2370             [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'],
2371             key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
2372         for client in self._configuration_arg('player_client'):
2373             if client in allowed_clients:
2374                 requested_clients.append(client)
2375             elif client == 'default':
2376                 requested_clients.extend(default)
2377             elif client == 'all':
2378                 requested_clients.extend(allowed_clients)
2379             else:
2380                 self.report_warning(f'Skipping unsupported client {client}')
2381         if not requested_clients:
2382             requested_clients = default
2383
2384         if smuggled_data.get('is_music_url') or self.is_music_url(url):
2385             requested_clients.extend(
2386                 f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS)
2387
2388         return orderedSet(requested_clients)
2389
2390     def _extract_player_ytcfg(self, client, video_id):
2391         url = {
2392             'web_music': 'https://music.youtube.com',
2393             'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
2394         }.get(client)
2395         if not url:
2396             return {}
2397         webpage = self._download_webpage(url, video_id, fatal=False, note=f'Downloading {client} config')
2398         return self.extract_ytcfg(video_id, webpage) or {}
2399
2400     def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
2401         initial_pr = None
2402         if webpage:
2403             initial_pr = self._extract_yt_initial_variable(
2404                 webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
2405                 video_id, 'initial player response')
2406
2407         original_clients = clients
2408         clients = clients[::-1]
2409         prs = []
2410
2411         def append_client(client_name):
2412             if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
2413                 clients.append(client_name)
2414
2415         # Android player_response does not have microFormats which are needed for
2416         # extraction of some data. So we return the initial_pr with formats
2417         # stripped out even if not requested by the user
2418         # See: https://github.com/yt-dlp/yt-dlp/issues/501
2419         if initial_pr:
2420             pr = dict(initial_pr)
2421             pr['streamingData'] = None
2422             prs.append(pr)
2423
2424         last_error = None
2425         tried_iframe_fallback = False
2426         player_url = None
2427         while clients:
2428             client = clients.pop()
2429             player_ytcfg = master_ytcfg if client == 'web' else {}
2430             if 'configs' not in self._configuration_arg('player_skip'):
2431                 player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
2432
2433             player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
2434             require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
2435             if 'js' in self._configuration_arg('player_skip'):
2436                 require_js_player = False
2437                 player_url = None
2438
2439             if not player_url and not tried_iframe_fallback and require_js_player:
2440                 player_url = self._download_player_url(video_id)
2441                 tried_iframe_fallback = True
2442
2443             try:
2444                 pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
2445                     client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
2446             except ExtractorError as e:
2447                 if last_error:
2448                     self.report_warning(last_error)
2449                 last_error = e
2450                 continue
2451
2452             if pr:
2453                 prs.append(pr)
2454
2455             # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
2456             if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
2457                 append_client(client.replace('_agegate', '_creator'))
2458             elif self._is_agegated(pr):
2459                 append_client(f'{client}_agegate')
2460
2461         if last_error:
2462             if not len(prs):
2463                 raise last_error
2464             self.report_warning(last_error)
2465         return prs, player_url
2466
2467     def _extract_formats(self, streaming_data, video_id, player_url, is_live):
2468         itags, stream_ids = {}, []
2469         itag_qualities, res_qualities = {}, {}
2470         q = qualities([
2471             # Normally tiny is the smallest video-only formats. But
2472             # audio-only formats with unknown quality may get tagged as tiny
2473             'tiny',
2474             'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high',  # Audio only formats
2475             'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
2476         ])
2477         streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
2478
2479         for fmt in streaming_formats:
2480             if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
2481                 continue
2482
2483             itag = str_or_none(fmt.get('itag'))
2484             audio_track = fmt.get('audioTrack') or {}
2485             stream_id = '%s.%s' % (itag or '', audio_track.get('id', ''))
2486             if stream_id in stream_ids:
2487                 continue
2488
2489             quality = fmt.get('quality')
2490             height = int_or_none(fmt.get('height'))
2491             if quality == 'tiny' or not quality:
2492                 quality = fmt.get('audioQuality', '').lower() or quality
2493             # The 3gp format (17) in android client has a quality of "small",
2494             # but is actually worse than other formats
2495             if itag == '17':
2496                 quality = 'tiny'
2497             if quality:
2498                 if itag:
2499                     itag_qualities[itag] = quality
2500                 if height:
2501                     res_qualities[height] = quality
2502             # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment
2503             # (adding `&sq=0` to the URL) and parsing emsg box to determine the
2504             # number of fragment that would subsequently requested with (`&sq=N`)
2505             if fmt.get('type') == 'FORMAT_STREAM_TYPE_OTF':
2506                 continue
2507
2508             fmt_url = fmt.get('url')
2509             if not fmt_url:
2510                 sc = compat_parse_qs(fmt.get('signatureCipher'))
2511                 fmt_url = url_or_none(try_get(sc, lambda x: x['url'][0]))
2512                 encrypted_sig = try_get(sc, lambda x: x['s'][0])
2513                 if not (sc and fmt_url and encrypted_sig):
2514                     continue
2515                 if not player_url:
2516                     continue
2517                 signature = self._decrypt_signature(sc['s'][0], video_id, player_url)
2518                 sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
2519                 fmt_url += '&' + sp + '=' + signature
2520
2521             query = parse_qs(fmt_url)
2522             throttled = False
2523             if query.get('n'):
2524                 try:
2525                     fmt_url = update_url_query(fmt_url, {
2526                         'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
2527                 except ExtractorError as e:
2528                     self.report_warning(
2529                         f'nsig extraction failed: You may experience throttling for some formats\n'
2530                         f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True)
2531                     throttled = True
2532
2533             if itag:
2534                 itags[itag] = 'https'
2535                 stream_ids.append(stream_id)
2536
2537             tbr = float_or_none(
2538                 fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
2539             dct = {
2540                 'asr': int_or_none(fmt.get('audioSampleRate')),
2541                 'filesize': int_or_none(fmt.get('contentLength')),
2542                 'format_id': itag,
2543                 'format_note': join_nonempty(
2544                     '%s%s' % (audio_track.get('displayName') or '',
2545                               ' (default)' if audio_track.get('audioIsDefault') else ''),
2546                     fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
2547                     throttled and 'THROTTLED', delim=', '),
2548                 'source_preference': -10 if throttled else -1,
2549                 'fps': int_or_none(fmt.get('fps')) or None,
2550                 'height': height,
2551                 'quality': q(quality),
2552                 'tbr': tbr,
2553                 'url': fmt_url,
2554                 'width': int_or_none(fmt.get('width')),
2555                 'language': audio_track.get('id', '').split('.')[0],
2556                 'language_preference': 1 if audio_track.get('audioIsDefault') else -1,
2557             }
2558             mime_mobj = re.match(
2559                 r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
2560             if mime_mobj:
2561                 dct['ext'] = mimetype2ext(mime_mobj.group(1))
2562                 dct.update(parse_codecs(mime_mobj.group(2)))
2563             no_audio = dct.get('acodec') == 'none'
2564             no_video = dct.get('vcodec') == 'none'
2565             if no_audio:
2566                 dct['vbr'] = tbr
2567             if no_video:
2568                 dct['abr'] = tbr
2569             if no_audio or no_video:
2570                 dct['downloader_options'] = {
2571                     # Youtube throttles chunks >~10M
2572                     'http_chunk_size': 10485760,
2573                 }
2574                 if dct.get('ext'):
2575                     dct['container'] = dct['ext'] + '_dash'
2576             yield dct
2577
2578         skip_manifests = self._configuration_arg('skip')
2579         get_dash = (
2580             (not is_live or self._configuration_arg('include_live_dash'))
2581             and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
2582         get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
2583
2584         def process_manifest_format(f, proto, itag):
2585             if itag in itags:
2586                 if itags[itag] == proto or f'{itag}-{proto}' in itags:
2587                     return False
2588                 itag = f'{itag}-{proto}'
2589             if itag:
2590                 f['format_id'] = itag
2591                 itags[itag] = proto
2592
2593             f['quality'] = next((
2594                 q(qdict[val])
2595                 for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
2596                 if val in qdict), -1)
2597             return True
2598
2599         for sd in streaming_data:
2600             hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
2601             if hls_manifest_url:
2602                 for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
2603                     if process_manifest_format(f, 'hls', self._search_regex(
2604                             r'/itag/(\d+)', f['url'], 'itag', default=None)):
2605                         yield f
2606
2607             dash_manifest_url = get_dash and sd.get('dashManifestUrl')
2608             if dash_manifest_url:
2609                 for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
2610                     if process_manifest_format(f, 'dash', f['format_id']):
2611                         f['filesize'] = int_or_none(self._search_regex(
2612                             r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
2613                         yield f
2614
2615     def _extract_storyboard(self, player_responses, duration):
2616         spec = get_first(
2617             player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1]
2618         if not spec:
2619             return
2620         base_url = spec.pop()
2621         L = len(spec) - 1
2622         for i, args in enumerate(spec):
2623             args = args.split('#')
2624             counts = list(map(int_or_none, args[:5]))
2625             if len(args) != 8 or not all(counts):
2626                 self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}')
2627                 continue
2628             width, height, frame_count, cols, rows = counts
2629             N, sigh = args[6:]
2630
2631             url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}'
2632             fragment_count = frame_count / (cols * rows)
2633             fragment_duration = duration / fragment_count
2634             yield {
2635                 'format_id': f'sb{i}',
2636                 'format_note': 'storyboard',
2637                 'ext': 'mhtml',
2638                 'protocol': 'mhtml',
2639                 'acodec': 'none',
2640                 'vcodec': 'none',
2641                 'url': url,
2642                 'width': width,
2643                 'height': height,
2644                 'fragments': [{
2645                     'path': url.replace('$M', str(j)),
2646                     'duration': min(fragment_duration, duration - (j * fragment_duration)),
2647                 } for j in range(math.ceil(fragment_count))],
2648             }
2649
2650     def _real_extract(self, url):
2651         url, smuggled_data = unsmuggle_url(url, {})
2652         video_id = self._match_id(url)
2653
2654         base_url = self.http_scheme() + '//www.youtube.com/'
2655         webpage_url = base_url + 'watch?v=' + video_id
2656         webpage = None
2657         if 'webpage' not in self._configuration_arg('player_skip'):
2658             webpage = self._download_webpage(
2659                 webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False)
2660
2661         master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
2662
2663         player_responses, player_url = self._extract_player_responses(
2664             self._get_requested_clients(url, smuggled_data),
2665             video_id, webpage, master_ytcfg)
2666
2667         playability_statuses = traverse_obj(
2668             player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[])
2669
2670         trailer_video_id = get_first(
2671             playability_statuses,
2672             ('errorScreen', 'playerLegacyDesktopYpcTrailerRenderer', 'trailerVideoId'),
2673             expected_type=str)
2674         if trailer_video_id:
2675             return self.url_result(
2676                 trailer_video_id, self.ie_key(), trailer_video_id)
2677
2678         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None))
2679                        if webpage else (lambda x: None))
2680
2681         video_details = traverse_obj(
2682             player_responses, (..., 'videoDetails'), expected_type=dict, default=[])
2683         microformats = traverse_obj(
2684             player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
2685             expected_type=dict, default=[])
2686         video_title = (
2687             get_first(video_details, 'title')
2688             or self._get_text(microformats, (..., 'title'))
2689             or search_meta(['og:title', 'twitter:title', 'title']))
2690         video_description = get_first(video_details, 'shortDescription')
2691
2692         multifeed_metadata_list = get_first(
2693             player_responses,
2694             ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'),
2695             expected_type=str)
2696         if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'):
2697             if self.get_param('noplaylist'):
2698                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
2699             else:
2700                 entries = []
2701                 feed_ids = []
2702                 for feed in multifeed_metadata_list.split(','):
2703                     # Unquote should take place before split on comma (,) since textual
2704                     # fields may contain comma as well (see
2705                     # https://github.com/ytdl-org/youtube-dl/issues/8536)
2706                     feed_data = compat_parse_qs(
2707                         compat_urllib_parse_unquote_plus(feed))
2708
2709                     def feed_entry(name):
2710                         return try_get(
2711                             feed_data, lambda x: x[name][0], compat_str)
2712
2713                     feed_id = feed_entry('id')
2714                     if not feed_id:
2715                         continue
2716                     feed_title = feed_entry('title')
2717                     title = video_title
2718                     if feed_title:
2719                         title += ' (%s)' % feed_title
2720                     entries.append({
2721                         '_type': 'url_transparent',
2722                         'ie_key': 'Youtube',
2723                         'url': smuggle_url(
2724                             '%swatch?v=%s' % (base_url, feed_data['id'][0]),
2725                             {'force_singlefeed': True}),
2726                         'title': title,
2727                     })
2728                     feed_ids.append(feed_id)
2729                 self.to_screen(
2730                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
2731                     % (', '.join(feed_ids), video_id))
2732                 return self.playlist_result(
2733                     entries, video_id, video_title, video_description)
2734
2735         live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
2736         is_live = get_first(video_details, 'isLive')
2737         if is_live is None:
2738             is_live = get_first(live_broadcast_details, 'isLiveNow')
2739
2740         streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
2741         formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live))
2742
2743         if not formats:
2744             if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
2745                 self.report_drm(video_id)
2746             pemr = get_first(
2747                 playability_statuses,
2748                 ('errorScreen', 'playerErrorMessageRenderer'), expected_type=dict) or {}
2749             reason = self._get_text(pemr, 'reason') or get_first(playability_statuses, 'reason')
2750             subreason = clean_html(self._get_text(pemr, 'subreason') or '')
2751             if subreason:
2752                 if subreason == 'The uploader has not made this video available in your country.':
2753                     countries = get_first(microformats, 'availableCountries')
2754                     if not countries:
2755                         regions_allowed = search_meta('regionsAllowed')
2756                         countries = regions_allowed.split(',') if regions_allowed else None
2757                     self.raise_geo_restricted(subreason, countries, metadata_available=True)
2758                 reason += f'. {subreason}'
2759             if reason:
2760                 self.raise_no_formats(reason, expected=True)
2761
2762         keywords = get_first(video_details, 'keywords', expected_type=list) or []
2763         if not keywords and webpage:
2764             keywords = [
2765                 unescapeHTML(m.group('content'))
2766                 for m in re.finditer(self._meta_regex('og:video:tag'), webpage)]
2767         for keyword in keywords:
2768             if keyword.startswith('yt:stretch='):
2769                 mobj = re.search(r'(\d+)\s*:\s*(\d+)', keyword)
2770                 if mobj:
2771                     # NB: float is intentional for forcing float division
2772                     w, h = (float(v) for v in mobj.groups())
2773                     if w > 0 and h > 0:
2774                         ratio = w / h
2775                         for f in formats:
2776                             if f.get('vcodec') != 'none':
2777                                 f['stretched_ratio'] = ratio
2778                         break
2779
2780         thumbnails = []
2781         thumbnail_dicts = traverse_obj(
2782             (video_details, microformats), (..., ..., 'thumbnail', 'thumbnails', ...),
2783             expected_type=dict, default=[])
2784         for thumbnail in thumbnail_dicts:
2785             thumbnail_url = thumbnail.get('url')
2786             if not thumbnail_url:
2787                 continue
2788             # Sometimes youtube gives a wrong thumbnail URL. See:
2789             # https://github.com/yt-dlp/yt-dlp/issues/233
2790             # https://github.com/ytdl-org/youtube-dl/issues/28023
2791             if 'maxresdefault' in thumbnail_url:
2792                 thumbnail_url = thumbnail_url.split('?')[0]
2793             thumbnails.append({
2794                 'url': thumbnail_url,
2795                 'height': int_or_none(thumbnail.get('height')),
2796                 'width': int_or_none(thumbnail.get('width')),
2797             })
2798         thumbnail_url = search_meta(['og:image', 'twitter:image'])
2799         if thumbnail_url:
2800             thumbnails.append({
2801                 'url': thumbnail_url,
2802             })
2803         original_thumbnails = thumbnails.copy()
2804
2805         # The best resolution thumbnails sometimes does not appear in the webpage
2806         # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
2807         # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
2808         thumbnail_names = [
2809             'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
2810             'hqdefault', 'hq1', 'hq2', 'hq3', '0',
2811             'mqdefault', 'mq1', 'mq2', 'mq3',
2812             'default', '1', '2', '3'
2813         ]
2814         n_thumbnail_names = len(thumbnail_names)
2815         thumbnails.extend({
2816             'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
2817                 video_id=video_id, name=name, ext=ext,
2818                 webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
2819         } for name in thumbnail_names for ext in ('webp', 'jpg'))
2820         for thumb in thumbnails:
2821             i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
2822             thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
2823         self._remove_duplicate_formats(thumbnails)
2824         self._downloader._sort_thumbnails(original_thumbnails)
2825
2826         category = get_first(microformats, 'category') or search_meta('genre')
2827         channel_id = str_or_none(
2828             get_first(video_details, 'channelId')
2829             or get_first(microformats, 'externalChannelId')
2830             or search_meta('channelId'))
2831         duration = int_or_none(
2832             get_first(video_details, 'lengthSeconds')
2833             or get_first(microformats, 'lengthSeconds')
2834             or parse_duration(search_meta('duration'))) or None
2835         owner_profile_url = get_first(microformats, 'ownerProfileUrl')
2836
2837         live_content = get_first(video_details, 'isLiveContent')
2838         is_upcoming = get_first(video_details, 'isUpcoming')
2839         if is_live is None:
2840             if is_upcoming or live_content is False:
2841                 is_live = False
2842         if is_upcoming is None and (live_content or is_live):
2843             is_upcoming = False
2844         live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
2845         live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
2846         if not duration and live_endtime and live_starttime:
2847             duration = live_endtime - live_starttime
2848
2849         formats.extend(self._extract_storyboard(player_responses, duration))
2850
2851         # Source is given priority since formats that throttle are given lower source_preference
2852         # When throttling issue is fully fixed, remove this
2853         self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
2854
2855         info = {
2856             'id': video_id,
2857             'title': self._live_title(video_title) if is_live else video_title,
2858             'formats': formats,
2859             'thumbnails': thumbnails,
2860             # The best thumbnail that we are sure exists. Prevents unnecessary
2861             # URL checking if user don't care about getting the best possible thumbnail
2862             'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
2863             'description': video_description,
2864             'upload_date': unified_strdate(
2865                 get_first(microformats, 'uploadDate')
2866                 or search_meta('uploadDate')),
2867             'uploader': get_first(video_details, 'author'),
2868             'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None,
2869             'uploader_url': owner_profile_url,
2870             'channel_id': channel_id,
2871             'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
2872             'duration': duration,
2873             'view_count': int_or_none(
2874                 get_first((video_details, microformats), (..., 'viewCount'))
2875                 or search_meta('interactionCount')),
2876             'average_rating': float_or_none(get_first(video_details, 'averageRating')),
2877             'age_limit': 18 if (
2878                 get_first(microformats, 'isFamilySafe') is False
2879                 or search_meta('isFamilyFriendly') == 'false'
2880                 or search_meta('og:restrictions:age') == '18+') else 0,
2881             'webpage_url': webpage_url,
2882             'categories': [category] if category else None,
2883             'tags': keywords,
2884             'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
2885             'is_live': is_live,
2886             'was_live': (False if is_live or is_upcoming or live_content is False
2887                          else None if is_live is None or is_upcoming is None
2888                          else live_content),
2889             'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
2890             'release_timestamp': live_starttime,
2891         }
2892
2893         pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
2894         if pctr:
2895             def get_lang_code(track):
2896                 return (remove_start(track.get('vssId') or '', '.').replace('.', '-')
2897                         or track.get('languageCode'))
2898
2899             # Converted into dicts to remove duplicates
2900             captions = {
2901                 get_lang_code(sub): sub
2902                 for sub in traverse_obj(pctr, (..., 'captionTracks', ...), default=[])}
2903             translation_languages = {
2904                 lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
2905                 for lang in traverse_obj(pctr, (..., 'translationLanguages', ...), default=[])}
2906
2907             def process_language(container, base_url, lang_code, sub_name, query):
2908                 lang_subs = container.setdefault(lang_code, [])
2909                 for fmt in self._SUBTITLE_FORMATS:
2910                     query.update({
2911                         'fmt': fmt,
2912                     })
2913                     lang_subs.append({
2914                         'ext': fmt,
2915                         'url': update_url_query(base_url, query),
2916                         'name': sub_name,
2917                     })
2918
2919             subtitles, automatic_captions = {}, {}
2920             for lang_code, caption_track in captions.items():
2921                 base_url = caption_track.get('baseUrl')
2922                 if not base_url:
2923                     continue
2924                 lang_name = self._get_text(caption_track, 'name', max_runs=1)
2925                 if caption_track.get('kind') != 'asr':
2926                     if not lang_code:
2927                         continue
2928                     process_language(
2929                         subtitles, base_url, lang_code, lang_name, {})
2930                     if not caption_track.get('isTranslatable'):
2931                         continue
2932                 for trans_code, trans_name in translation_languages.items():
2933                     if not trans_code:
2934                         continue
2935                     if caption_track.get('kind') != 'asr':
2936                         trans_code += f'-{lang_code}'
2937                         trans_name += format_field(lang_name, template=' from %s')
2938                     process_language(
2939                         automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code})
2940             info['automatic_captions'] = automatic_captions
2941             info['subtitles'] = subtitles
2942
2943         parsed_url = compat_urllib_parse_urlparse(url)
2944         for component in [parsed_url.fragment, parsed_url.query]:
2945             query = compat_parse_qs(component)
2946             for k, v in query.items():
2947                 for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]:
2948                     d_k += '_time'
2949                     if d_k not in info and k in s_ks:
2950                         info[d_k] = parse_duration(query[k][0])
2951
2952         # Youtube Music Auto-generated description
2953         if video_description:
2954             mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
2955             if mobj:
2956                 release_year = mobj.group('release_year')
2957                 release_date = mobj.group('release_date')
2958                 if release_date:
2959                     release_date = release_date.replace('-', '')
2960                     if not release_year:
2961                         release_year = release_date[:4]
2962                 info.update({
2963                     'album': mobj.group('album'.strip()),
2964                     'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')),
2965                     'track': mobj.group('track').strip(),
2966                     'release_date': release_date,
2967                     'release_year': int_or_none(release_year),
2968                 })
2969
2970         initial_data = None
2971         if webpage:
2972             initial_data = self._extract_yt_initial_variable(
2973                 webpage, self._YT_INITIAL_DATA_RE, video_id,
2974                 'yt initial data')
2975         if not initial_data:
2976             query = {'videoId': video_id}
2977             query.update(self._get_checkok_params())
2978             initial_data = self._extract_response(
2979                 item_id=video_id, ep='next', fatal=False,
2980                 ytcfg=master_ytcfg, query=query,
2981                 headers=self.generate_api_headers(ytcfg=master_ytcfg),
2982                 note='Downloading initial data API JSON')
2983
2984         try:
2985             # This will error if there is no livechat
2986             initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
2987             info.setdefault('subtitles', {})['live_chat'] = [{
2988                 'url': 'https://www.youtube.com/watch?v=%s' % video_id,  # url is needed to set cookies
2989                 'video_id': video_id,
2990                 'ext': 'json',
2991                 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
2992             }]
2993         except (KeyError, IndexError, TypeError):
2994             pass
2995
2996         if initial_data:
2997             info['chapters'] = (
2998                 self._extract_chapters_from_json(initial_data, duration)
2999                 or self._extract_chapters_from_engagement_panel(initial_data, duration)
3000                 or None)
3001
3002             contents = try_get(
3003                 initial_data,
3004                 lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'],
3005                 list) or []
3006             for content in contents:
3007                 vpir = content.get('videoPrimaryInfoRenderer')
3008                 if vpir:
3009                     stl = vpir.get('superTitleLink')
3010                     if stl:
3011                         stl = self._get_text(stl)
3012                         if try_get(
3013                                 vpir,
3014                                 lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN':
3015                             info['location'] = stl
3016                         else:
3017                             mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl)
3018                             if mobj:
3019                                 info.update({
3020                                     'series': mobj.group(1),
3021                                     'season_number': int(mobj.group(2)),
3022                                     'episode_number': int(mobj.group(3)),
3023                                 })
3024                     for tlb in (try_get(
3025                             vpir,
3026                             lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
3027                             list) or []):
3028                         tbr = tlb.get('toggleButtonRenderer') or {}
3029                         for getter, regex in [(
3030                                 lambda x: x['defaultText']['accessibility']['accessibilityData'],
3031                                 r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
3032                                     lambda x: x['accessibility'],
3033                                     lambda x: x['accessibilityData']['accessibilityData'],
3034                                 ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
3035                             label = (try_get(tbr, getter, dict) or {}).get('label')
3036                             if label:
3037                                 mobj = re.match(regex, label)
3038                                 if mobj:
3039                                     info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
3040                                     break
3041                     sbr_tooltip = try_get(
3042                         vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
3043                     if sbr_tooltip:
3044                         like_count, dislike_count = sbr_tooltip.split(' / ')
3045                         info.update({
3046                             'like_count': str_to_int(like_count),
3047                             'dislike_count': str_to_int(dislike_count),
3048                         })
3049                 vsir = content.get('videoSecondaryInfoRenderer')
3050                 if vsir:
3051                     info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title'))
3052                     rows = try_get(
3053                         vsir,
3054                         lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'],
3055                         list) or []
3056                     multiple_songs = False
3057                     for row in rows:
3058                         if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
3059                             multiple_songs = True
3060                             break
3061                     for row in rows:
3062                         mrr = row.get('metadataRowRenderer') or {}
3063                         mrr_title = mrr.get('title')
3064                         if not mrr_title:
3065                             continue
3066                         mrr_title = self._get_text(mrr, 'title')
3067                         mrr_contents_text = self._get_text(mrr, ('contents', 0))
3068                         if mrr_title == 'License':
3069                             info['license'] = mrr_contents_text
3070                         elif not multiple_songs:
3071                             if mrr_title == 'Album':
3072                                 info['album'] = mrr_contents_text
3073                             elif mrr_title == 'Artist':
3074                                 info['artist'] = mrr_contents_text
3075                             elif mrr_title == 'Song':
3076                                 info['track'] = mrr_contents_text
3077
3078         fallbacks = {
3079             'channel': 'uploader',
3080             'channel_id': 'uploader_id',
3081             'channel_url': 'uploader_url',
3082         }
3083         for to, frm in fallbacks.items():
3084             if not info.get(to):
3085                 info[to] = info.get(frm)
3086
3087         for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
3088             v = info.get(s_k)
3089             if v:
3090                 info[d_k] = v
3091
3092         is_private = get_first(video_details, 'isPrivate', expected_type=bool)
3093         is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
3094         is_membersonly = None
3095         is_premium = None
3096         if initial_data and is_private is not None:
3097             is_membersonly = False
3098             is_premium = False
3099             contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
3100             badge_labels = set()
3101             for content in contents:
3102                 if not isinstance(content, dict):
3103                     continue
3104                 badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
3105             for badge_label in badge_labels:
3106                 if badge_label.lower() == 'members only':
3107                     is_membersonly = True
3108                 elif badge_label.lower() == 'premium':
3109                     is_premium = True
3110                 elif badge_label.lower() == 'unlisted':
3111                     is_unlisted = True
3112
3113         info['availability'] = self._availability(
3114             is_private=is_private,
3115             needs_premium=is_premium,
3116             needs_subscription=is_membersonly,
3117             needs_auth=info['age_limit'] >= 18,
3118             is_unlisted=None if is_private is None else is_unlisted)
3119
3120         info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
3121
3122         self.mark_watched(video_id, player_responses)
3123
3124         return info
3125
3126
3127 class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
3128
3129     def _extract_channel_id(self, webpage):
3130         channel_id = self._html_search_meta(
3131             'channelId', webpage, 'channel id', default=None)
3132         if channel_id:
3133             return channel_id
3134         channel_url = self._html_search_meta(
3135             ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url',
3136              'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad',
3137              'twitter:app:url:googleplay'), webpage, 'channel url')
3138         return self._search_regex(
3139             r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+',
3140             channel_url, 'channel id')
3141
3142     @staticmethod
3143     def _extract_basic_item_renderer(item):
3144         # Modified from _extract_grid_item_renderer
3145         known_basic_renderers = (
3146             'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer'
3147         )
3148         for key, renderer in item.items():
3149             if not isinstance(renderer, dict):
3150                 continue
3151             elif key in known_basic_renderers:
3152                 return renderer
3153             elif key.startswith('grid') and key.endswith('Renderer'):
3154                 return renderer
3155
3156     def _grid_entries(self, grid_renderer):
3157         for item in grid_renderer['items']:
3158             if not isinstance(item, dict):
3159                 continue
3160             renderer = self._extract_basic_item_renderer(item)
3161             if not isinstance(renderer, dict):
3162                 continue
3163             title = self._get_text(renderer, 'title')
3164
3165             # playlist
3166             playlist_id = renderer.get('playlistId')
3167             if playlist_id:
3168                 yield self.url_result(
3169                     'https://www.youtube.com/playlist?list=%s' % playlist_id,
3170                     ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3171                     video_title=title)
3172                 continue
3173             # video
3174             video_id = renderer.get('videoId')
3175             if video_id:
3176                 yield self._extract_video(renderer)
3177                 continue
3178             # channel
3179             channel_id = renderer.get('channelId')
3180             if channel_id:
3181                 yield self.url_result(
3182                     'https://www.youtube.com/channel/%s' % channel_id,
3183                     ie=YoutubeTabIE.ie_key(), video_title=title)
3184                 continue
3185             # generic endpoint URL support
3186             ep_url = urljoin('https://www.youtube.com/', try_get(
3187                 renderer, lambda x: x['navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'],
3188                 compat_str))
3189             if ep_url:
3190                 for ie in (YoutubeTabIE, YoutubePlaylistIE, YoutubeIE):
3191                     if ie.suitable(ep_url):
3192                         yield self.url_result(
3193                             ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title)
3194                         break
3195
3196     def _shelf_entries_from_content(self, shelf_renderer):
3197         content = shelf_renderer.get('content')
3198         if not isinstance(content, dict):
3199             return
3200         renderer = content.get('gridRenderer') or content.get('expandedShelfContentsRenderer')
3201         if renderer:
3202             # TODO: add support for nested playlists so each shelf is processed
3203             # as separate playlist
3204             # TODO: this includes only first N items
3205             for entry in self._grid_entries(renderer):
3206                 yield entry
3207         renderer = content.get('horizontalListRenderer')
3208         if renderer:
3209             # TODO
3210             pass
3211
3212     def _shelf_entries(self, shelf_renderer, skip_channels=False):
3213         ep = try_get(
3214             shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3215             compat_str)
3216         shelf_url = urljoin('https://www.youtube.com', ep)
3217         if shelf_url:
3218             # Skipping links to another channels, note that checking for
3219             # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL
3220             # will not work
3221             if skip_channels and '/channels?' in shelf_url:
3222                 return
3223             title = self._get_text(shelf_renderer, 'title')
3224             yield self.url_result(shelf_url, video_title=title)
3225         # Shelf may not contain shelf URL, fallback to extraction from content
3226         for entry in self._shelf_entries_from_content(shelf_renderer):
3227             yield entry
3228
3229     def _playlist_entries(self, video_list_renderer):
3230         for content in video_list_renderer['contents']:
3231             if not isinstance(content, dict):
3232                 continue
3233             renderer = content.get('playlistVideoRenderer') or content.get('playlistPanelVideoRenderer')
3234             if not isinstance(renderer, dict):
3235                 continue
3236             video_id = renderer.get('videoId')
3237             if not video_id:
3238                 continue
3239             yield self._extract_video(renderer)
3240
3241     def _rich_entries(self, rich_grid_renderer):
3242         renderer = try_get(
3243             rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
3244         video_id = renderer.get('videoId')
3245         if not video_id:
3246             return
3247         yield self._extract_video(renderer)
3248
3249     def _video_entry(self, video_renderer):
3250         video_id = video_renderer.get('videoId')
3251         if video_id:
3252             return self._extract_video(video_renderer)
3253
3254     def _post_thread_entries(self, post_thread_renderer):
3255         post_renderer = try_get(
3256             post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict)
3257         if not post_renderer:
3258             return
3259         # video attachment
3260         video_renderer = try_get(
3261             post_renderer, lambda x: x['backstageAttachment']['videoRenderer'], dict) or {}
3262         video_id = video_renderer.get('videoId')
3263         if video_id:
3264             entry = self._extract_video(video_renderer)
3265             if entry:
3266                 yield entry
3267         # playlist attachment
3268         playlist_id = try_get(
3269             post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], compat_str)
3270         if playlist_id:
3271             yield self.url_result(
3272                 'https://www.youtube.com/playlist?list=%s' % playlist_id,
3273                 ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
3274         # inline video links
3275         runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or []
3276         for run in runs:
3277             if not isinstance(run, dict):
3278                 continue
3279             ep_url = try_get(
3280                 run, lambda x: x['navigationEndpoint']['urlEndpoint']['url'], compat_str)
3281             if not ep_url:
3282                 continue
3283             if not YoutubeIE.suitable(ep_url):
3284                 continue
3285             ep_video_id = YoutubeIE._match_id(ep_url)
3286             if video_id == ep_video_id:
3287                 continue
3288             yield self.url_result(ep_url, ie=YoutubeIE.ie_key(), video_id=ep_video_id)
3289
3290     def _post_thread_continuation_entries(self, post_thread_continuation):
3291         contents = post_thread_continuation.get('contents')
3292         if not isinstance(contents, list):
3293             return
3294         for content in contents:
3295             renderer = content.get('backstagePostThreadRenderer')
3296             if not isinstance(renderer, dict):
3297                 continue
3298             for entry in self._post_thread_entries(renderer):
3299                 yield entry
3300
3301     r''' # unused
3302     def _rich_grid_entries(self, contents):
3303         for content in contents:
3304             video_renderer = try_get(content, lambda x: x['richItemRenderer']['content']['videoRenderer'], dict)
3305             if video_renderer:
3306                 entry = self._video_entry(video_renderer)
3307                 if entry:
3308                     yield entry
3309     '''
3310     def _extract_entries(self, parent_renderer, continuation_list):
3311         # continuation_list is modified in-place with continuation_list = [continuation_token]
3312         continuation_list[:] = [None]
3313         contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
3314         for content in contents:
3315             if not isinstance(content, dict):
3316                 continue
3317             is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict)
3318             if not is_renderer:
3319                 renderer = content.get('richItemRenderer')
3320                 if renderer:
3321                     for entry in self._rich_entries(renderer):
3322                         yield entry
3323                     continuation_list[0] = self._extract_continuation(parent_renderer)
3324                 continue
3325             isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
3326             for isr_content in isr_contents:
3327                 if not isinstance(isr_content, dict):
3328                     continue
3329
3330                 known_renderers = {
3331                     'playlistVideoListRenderer': self._playlist_entries,
3332                     'gridRenderer': self._grid_entries,
3333                     'shelfRenderer': lambda x: self._shelf_entries(x),
3334                     'backstagePostThreadRenderer': self._post_thread_entries,
3335                     'videoRenderer': lambda x: [self._video_entry(x)],
3336                     'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
3337                     'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
3338                 }
3339                 for key, renderer in isr_content.items():
3340                     if key not in known_renderers:
3341                         continue
3342                     for entry in known_renderers[key](renderer):
3343                         if entry:
3344                             yield entry
3345                     continuation_list[0] = self._extract_continuation(renderer)
3346                     break
3347
3348             if not continuation_list[0]:
3349                 continuation_list[0] = self._extract_continuation(is_renderer)
3350
3351         if not continuation_list[0]:
3352             continuation_list[0] = self._extract_continuation(parent_renderer)
3353
3354     def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
3355         continuation_list = [None]
3356         extract_entries = lambda x: self._extract_entries(x, continuation_list)
3357         tab_content = try_get(tab, lambda x: x['content'], dict)
3358         if not tab_content:
3359             return
3360         parent_renderer = (
3361             try_get(tab_content, lambda x: x['sectionListRenderer'], dict)
3362             or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
3363         for entry in extract_entries(parent_renderer):
3364             yield entry
3365         continuation = continuation_list[0]
3366
3367         for page_num in itertools.count(1):
3368             if not continuation:
3369                 break
3370             headers = self.generate_api_headers(
3371                 ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
3372             response = self._extract_response(
3373                 item_id='%s page %s' % (item_id, page_num),
3374                 query=continuation, headers=headers, ytcfg=ytcfg,
3375                 check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3376
3377             if not response:
3378                 break
3379             # Extracting updated visitor data is required to prevent an infinite extraction loop in some cases
3380             # See: https://github.com/ytdl-org/youtube-dl/issues/28702
3381             visitor_data = self._extract_visitor_data(response) or visitor_data
3382
3383             known_continuation_renderers = {
3384                 'playlistVideoListContinuation': self._playlist_entries,
3385                 'gridContinuation': self._grid_entries,
3386                 'itemSectionContinuation': self._post_thread_continuation_entries,
3387                 'sectionListContinuation': extract_entries,  # for feeds
3388             }
3389             continuation_contents = try_get(
3390                 response, lambda x: x['continuationContents'], dict) or {}
3391             continuation_renderer = None
3392             for key, value in continuation_contents.items():
3393                 if key not in known_continuation_renderers:
3394                     continue
3395                 continuation_renderer = value
3396                 continuation_list = [None]
3397                 for entry in known_continuation_renderers[key](continuation_renderer):
3398                     yield entry
3399                 continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
3400                 break
3401             if continuation_renderer:
3402                 continue
3403
3404             known_renderers = {
3405                 'gridPlaylistRenderer': (self._grid_entries, 'items'),
3406                 'gridVideoRenderer': (self._grid_entries, 'items'),
3407                 'gridChannelRenderer': (self._grid_entries, 'items'),
3408                 'playlistVideoRenderer': (self._playlist_entries, 'contents'),
3409                 'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
3410                 'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
3411                 'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
3412             }
3413             on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
3414             continuation_items = try_get(
3415                 on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
3416             continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
3417             video_items_renderer = None
3418             for key, value in continuation_item.items():
3419                 if key not in known_renderers:
3420                     continue
3421                 video_items_renderer = {known_renderers[key][1]: continuation_items}
3422                 continuation_list = [None]
3423                 for entry in known_renderers[key][0](video_items_renderer):
3424                     yield entry
3425                 continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
3426                 break
3427             if video_items_renderer:
3428                 continue
3429             break
3430
3431     @staticmethod
3432     def _extract_selected_tab(tabs):
3433         for tab in tabs:
3434             renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {}
3435             if renderer.get('selected') is True:
3436                 return renderer
3437         else:
3438             raise ExtractorError('Unable to find selected tab')
3439
3440     @classmethod
3441     def _extract_uploader(cls, data):
3442         uploader = {}
3443         renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {}
3444         owner = try_get(
3445             renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict)
3446         if owner:
3447             uploader['uploader'] = owner.get('text')
3448             uploader['uploader_id'] = try_get(
3449                 owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str)
3450             uploader['uploader_url'] = urljoin(
3451                 'https://www.youtube.com/',
3452                 try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
3453         return {k: v for k, v in uploader.items() if v is not None}
3454
3455     def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
3456         playlist_id = title = description = channel_url = channel_name = channel_id = None
3457         thumbnails_list = []
3458         tags = []
3459
3460         selected_tab = self._extract_selected_tab(tabs)
3461         renderer = try_get(
3462             data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
3463         if renderer:
3464             channel_name = renderer.get('title')
3465             channel_url = renderer.get('channelUrl')
3466             channel_id = renderer.get('externalId')
3467         else:
3468             renderer = try_get(
3469                 data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
3470
3471         if renderer:
3472             title = renderer.get('title')
3473             description = renderer.get('description', '')
3474             playlist_id = channel_id
3475             tags = renderer.get('keywords', '').split()
3476             thumbnails_list = (
3477                 try_get(renderer, lambda x: x['avatar']['thumbnails'], list)
3478                 or try_get(
3479                     self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer'),
3480                     lambda x: x['thumbnailRenderer']['playlistVideoThumbnailRenderer']['thumbnail']['thumbnails'],
3481                     list)
3482                 or [])
3483
3484         thumbnails = []
3485         for t in thumbnails_list:
3486             if not isinstance(t, dict):
3487                 continue
3488             thumbnail_url = url_or_none(t.get('url'))
3489             if not thumbnail_url:
3490                 continue
3491             thumbnails.append({
3492                 'url': thumbnail_url,
3493                 'width': int_or_none(t.get('width')),
3494                 'height': int_or_none(t.get('height')),
3495             })
3496         if playlist_id is None:
3497             playlist_id = item_id
3498         if title is None:
3499             title = (
3500                 try_get(data, lambda x: x['header']['hashtagHeaderRenderer']['hashtag']['simpleText'])
3501                 or playlist_id)
3502         title += format_field(selected_tab, 'title', ' - %s')
3503         title += format_field(selected_tab, 'expandedText', ' - %s')
3504         metadata = {
3505             'playlist_id': playlist_id,
3506             'playlist_title': title,
3507             'playlist_description': description,
3508             'uploader': channel_name,
3509             'uploader_id': channel_id,
3510             'uploader_url': channel_url,
3511             'thumbnails': thumbnails,
3512             'tags': tags,
3513         }
3514         availability = self._extract_availability(data)
3515         if availability:
3516             metadata['availability'] = availability
3517         if not channel_id:
3518             metadata.update(self._extract_uploader(data))
3519         metadata.update({
3520             'channel': metadata['uploader'],
3521             'channel_id': metadata['uploader_id'],
3522             'channel_url': metadata['uploader_url']})
3523         return self.playlist_result(
3524             self._entries(
3525                 selected_tab, playlist_id, ytcfg,
3526                 self._extract_account_syncid(ytcfg, data),
3527                 self._extract_visitor_data(data, ytcfg)),
3528             **metadata)
3529
3530     def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg):
3531         first_id = last_id = response = None
3532         for page_num in itertools.count(1):
3533             videos = list(self._playlist_entries(playlist))
3534             if not videos:
3535                 return
3536             start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1
3537             if start >= len(videos):
3538                 return
3539             for video in videos[start:]:
3540                 if video['id'] == first_id:
3541                     self.to_screen('First video %s found again; Assuming end of Mix' % first_id)
3542                     return
3543                 yield video
3544             first_id = first_id or videos[0]['id']
3545             last_id = videos[-1]['id']
3546             watch_endpoint = try_get(
3547                 playlist, lambda x: x['contents'][-1]['playlistPanelVideoRenderer']['navigationEndpoint']['watchEndpoint'])
3548             headers = self.generate_api_headers(
3549                 ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3550                 visitor_data=self._extract_visitor_data(response, data, ytcfg))
3551             query = {
3552                 'playlistId': playlist_id,
3553                 'videoId': watch_endpoint.get('videoId') or last_id,
3554                 'index': watch_endpoint.get('index') or len(videos),
3555                 'params': watch_endpoint.get('params') or 'OAE%3D'
3556             }
3557             response = self._extract_response(
3558                 item_id='%s page %d' % (playlist_id, page_num),
3559                 query=query, ep='next', headers=headers, ytcfg=ytcfg,
3560                 check_get_keys='contents'
3561             )
3562             playlist = try_get(
3563                 response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
3564
3565     def _extract_from_playlist(self, item_id, url, data, playlist, ytcfg):
3566         title = playlist.get('title') or try_get(
3567             data, lambda x: x['titleText']['simpleText'], compat_str)
3568         playlist_id = playlist.get('playlistId') or item_id
3569
3570         # Delegating everything except mix playlists to regular tab-based playlist URL
3571         playlist_url = urljoin(url, try_get(
3572             playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
3573             compat_str))
3574         if playlist_url and playlist_url != url:
3575             return self.url_result(
3576                 playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
3577                 video_title=title)
3578
3579         return self.playlist_result(
3580             self._extract_mix_playlist(playlist, playlist_id, data, ytcfg),
3581             playlist_id=playlist_id, playlist_title=title)
3582
3583     def _extract_availability(self, data):
3584         """
3585         Gets the availability of a given playlist/tab.
3586         Note: Unless YouTube tells us explicitly, we do not assume it is public
3587         @param data: response
3588         """
3589         is_private = is_unlisted = None
3590         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
3591         badge_labels = self._extract_badges(renderer)
3592
3593         # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
3594         privacy_dropdown_entries = try_get(
3595             renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
3596         for renderer_dict in privacy_dropdown_entries:
3597             is_selected = try_get(
3598                 renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
3599             if not is_selected:
3600                 continue
3601             label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
3602             if label:
3603                 badge_labels.add(label.lower())
3604                 break
3605
3606         for badge_label in badge_labels:
3607             if badge_label == 'unlisted':
3608                 is_unlisted = True
3609             elif badge_label == 'private':
3610                 is_private = True
3611             elif badge_label == 'public':
3612                 is_unlisted = is_private = False
3613         return self._availability(is_private, False, False, False, is_unlisted)
3614
3615     @staticmethod
3616     def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
3617         sidebar_renderer = try_get(
3618             data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
3619         for item in sidebar_renderer:
3620             renderer = try_get(item, lambda x: x[info_renderer], expected_type)
3621             if renderer:
3622                 return renderer
3623
3624     def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
3625         """
3626         Get playlist with unavailable videos if the 'show unavailable videos' button exists.
3627         """
3628         browse_id = params = None
3629         renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer')
3630         if not renderer:
3631             return
3632         menu_renderer = try_get(
3633             renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
3634         for menu_item in menu_renderer:
3635             if not isinstance(menu_item, dict):
3636                 continue
3637             nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
3638             text = try_get(
3639                 nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
3640             if not text or text.lower() != 'show unavailable videos':
3641                 continue
3642             browse_endpoint = try_get(
3643                 nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
3644             browse_id = browse_endpoint.get('browseId')
3645             params = browse_endpoint.get('params')
3646             break
3647
3648         headers = self.generate_api_headers(
3649             ytcfg=ytcfg, account_syncid=self._extract_account_syncid(ytcfg, data),
3650             visitor_data=self._extract_visitor_data(data, ytcfg))
3651         query = {
3652             'params': params or 'wgYCCAA=',
3653             'browseId': browse_id or 'VL%s' % item_id
3654         }
3655         return self._extract_response(
3656             item_id=item_id, headers=headers, query=query,
3657             check_get_keys='contents', fatal=False, ytcfg=ytcfg,
3658             note='Downloading API JSON with unavailable videos')
3659
3660     def _extract_webpage(self, url, item_id, fatal=True):
3661         retries = self.get_param('extractor_retries', 3)
3662         count = -1
3663         webpage = data = last_error = None
3664         while count < retries:
3665             count += 1
3666             # Sometimes youtube returns a webpage with incomplete ytInitialData
3667             # See: https://github.com/yt-dlp/yt-dlp/issues/116
3668             if last_error:
3669                 self.report_warning('%s. Retrying ...' % last_error)
3670             try:
3671                 webpage = self._download_webpage(
3672                     url, item_id,
3673                     note='Downloading webpage%s' % (' (retry #%d)' % count if count else '',))
3674                 data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
3675             except ExtractorError as e:
3676                 if isinstance(e.cause, network_exceptions):
3677                     if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (403, 429):
3678                         last_error = error_to_compat_str(e.cause or e.msg)
3679                         if count < retries:
3680                             continue
3681                 if fatal:
3682                     raise
3683                 self.report_warning(error_to_compat_str(e))
3684                 break
3685             else:
3686                 try:
3687                     self._extract_and_report_alerts(data)
3688                 except ExtractorError as e:
3689                     if fatal:
3690                         raise
3691                     self.report_warning(error_to_compat_str(e))
3692                     break
3693
3694                 if dict_get(data, ('contents', 'currentVideoEndpoint')):
3695                     break
3696
3697                 last_error = 'Incomplete yt initial data received'
3698                 if count >= retries:
3699                     if fatal:
3700                         raise ExtractorError(last_error)
3701                     self.report_warning(last_error)
3702                     break
3703
3704         return webpage, data
3705
3706     def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
3707         data = None
3708         if 'webpage' not in self._configuration_arg('skip'):
3709             webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
3710             ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
3711         if not data:
3712             if not ytcfg and self.is_authenticated:
3713                 msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
3714                 if 'authcheck' not in self._configuration_arg('skip') and fatal:
3715                     raise ExtractorError(
3716                         msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
3717                               ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
3718                         expected=True)
3719                 self.report_warning(msg, only_once=True)
3720             data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
3721         return data, ytcfg
3722
3723     def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_client='web'):
3724         headers = self.generate_api_headers(ytcfg=ytcfg, default_client=default_client)
3725         resolve_response = self._extract_response(
3726             item_id=item_id, query={'url': url}, check_get_keys='endpoint', headers=headers, ytcfg=ytcfg, fatal=fatal,
3727             ep='navigation/resolve_url', note='Downloading API parameters API JSON', default_client=default_client)
3728         endpoints = {'browseEndpoint': 'browse', 'watchEndpoint': 'next'}
3729         for ep_key, ep in endpoints.items():
3730             params = try_get(resolve_response, lambda x: x['endpoint'][ep_key], dict)
3731             if params:
3732                 return self._extract_response(
3733                     item_id=item_id, query=params, ep=ep, headers=headers,
3734                     ytcfg=ytcfg, fatal=fatal, default_client=default_client,
3735                     check_get_keys=('contents', 'currentVideoEndpoint'))
3736         err_note = 'Failed to resolve url (does the playlist exist?)'
3737         if fatal:
3738             raise ExtractorError(err_note, expected=True)
3739         self.report_warning(err_note, item_id)
3740
3741     @staticmethod
3742     def _smuggle_data(entries, data):
3743         for entry in entries:
3744             if data:
3745                 entry['url'] = smuggle_url(entry['url'], data)
3746             yield entry
3747
3748     _SEARCH_PARAMS = None
3749
3750     def _search_results(self, query, params=NO_DEFAULT):
3751         data = {'query': query}
3752         if params is NO_DEFAULT:
3753             params = self._SEARCH_PARAMS
3754         if params:
3755             data['params'] = params
3756         continuation_list = [None]
3757         for page_num in itertools.count(1):
3758             data.update(continuation_list[0] or {})
3759             search = self._extract_response(
3760                 item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
3761                 check_get_keys=('contents', 'onResponseReceivedCommands'))
3762             slr_contents = try_get(
3763                 search,
3764                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'],
3765                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']),
3766                 list)
3767             yield from self._extract_entries({'contents': slr_contents}, continuation_list)
3768             if not continuation_list[0]:
3769                 break
3770
3771
3772 class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
3773     IE_DESC = 'YouTube Tabs'
3774     _VALID_URL = r'''(?x:
3775         https?://
3776             (?:\w+\.)?
3777             (?:
3778                 youtube(?:kids)?\.com|
3779                 %(invidious)s
3780             )/
3781             (?:
3782                 (?P<channel_type>channel|c|user|browse)/|
3783                 (?P<not_channel>
3784                     feed/|hashtag/|
3785                     (?:playlist|watch)\?.*?\blist=
3786                 )|
3787                 (?!(?:%(reserved_names)s)\b)  # Direct URLs
3788             )
3789             (?P<id>[^/?\#&]+)
3790     )''' % {
3791         'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES,
3792         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
3793     }
3794     IE_NAME = 'youtube:tab'
3795
3796     _TESTS = [{
3797         'note': 'playlists, multipage',
3798         'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid',
3799         'playlist_mincount': 94,
3800         'info_dict': {
3801             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3802             'title': 'Игорь Клейнер - Playlists',
3803             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3804             'uploader': 'Игорь Клейнер',
3805             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3806         },
3807     }, {
3808         'note': 'playlists, multipage, different order',
3809         'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
3810         'playlist_mincount': 94,
3811         'info_dict': {
3812             'id': 'UCqj7Cz7revf5maW9g5pgNcg',
3813             'title': 'Игорь Клейнер - Playlists',
3814             'description': 'md5:be97ee0f14ee314f1f002cf187166ee2',
3815             'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg',
3816             'uploader': 'Игорь Клейнер',
3817         },
3818     }, {
3819         'note': 'playlists, series',
3820         'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3',
3821         'playlist_mincount': 5,
3822         'info_dict': {
3823             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3824             'title': '3Blue1Brown - Playlists',
3825             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3826             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3827             'uploader': '3Blue1Brown',
3828         },
3829     }, {
3830         'note': 'playlists, singlepage',
3831         'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
3832         'playlist_mincount': 4,
3833         'info_dict': {
3834             'id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3835             'title': 'ThirstForScience - Playlists',
3836             'description': 'md5:609399d937ea957b0f53cbffb747a14c',
3837             'uploader': 'ThirstForScience',
3838             'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ',
3839         }
3840     }, {
3841         'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
3842         'only_matching': True,
3843     }, {
3844         'note': 'basic, single video playlist',
3845         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3846         'info_dict': {
3847             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3848             'uploader': 'Sergey M.',
3849             'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
3850             'title': 'youtube-dl public playlist',
3851         },
3852         'playlist_count': 1,
3853     }, {
3854         'note': 'empty playlist',
3855         'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3856         'info_dict': {
3857             'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
3858             'uploader': 'Sergey M.',
3859             'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
3860             'title': 'youtube-dl empty playlist',
3861         },
3862         'playlist_count': 0,
3863     }, {
3864         'note': 'Home tab',
3865         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured',
3866         'info_dict': {
3867             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3868             'title': 'lex will - Home',
3869             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3870             'uploader': 'lex will',
3871             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3872         },
3873         'playlist_mincount': 2,
3874     }, {
3875         'note': 'Videos tab',
3876         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos',
3877         'info_dict': {
3878             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3879             'title': 'lex will - Videos',
3880             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3881             'uploader': 'lex will',
3882             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3883         },
3884         'playlist_mincount': 975,
3885     }, {
3886         'note': 'Videos tab, sorted by popular',
3887         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid',
3888         'info_dict': {
3889             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3890             'title': 'lex will - Videos',
3891             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3892             'uploader': 'lex will',
3893             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3894         },
3895         'playlist_mincount': 199,
3896     }, {
3897         'note': 'Playlists tab',
3898         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists',
3899         'info_dict': {
3900             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3901             'title': 'lex will - Playlists',
3902             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3903             'uploader': 'lex will',
3904             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3905         },
3906         'playlist_mincount': 17,
3907     }, {
3908         'note': 'Community tab',
3909         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community',
3910         'info_dict': {
3911             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3912             'title': 'lex will - Community',
3913             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3914             'uploader': 'lex will',
3915             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3916         },
3917         'playlist_mincount': 18,
3918     }, {
3919         'note': 'Channels tab',
3920         'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels',
3921         'info_dict': {
3922             'id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3923             'title': 'lex will - Channels',
3924             'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488',
3925             'uploader': 'lex will',
3926             'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
3927         },
3928         'playlist_mincount': 12,
3929     }, {
3930         'note': 'Search tab',
3931         'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra',
3932         'playlist_mincount': 40,
3933         'info_dict': {
3934             'id': 'UCYO_jab_esuFRV4b17AJtAw',
3935             'title': '3Blue1Brown - Search - linear algebra',
3936             'description': 'md5:e1384e8a133307dd10edee76e875d62f',
3937             'uploader': '3Blue1Brown',
3938             'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw',
3939         },
3940     }, {
3941         'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3942         'only_matching': True,
3943     }, {
3944         'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3945         'only_matching': True,
3946     }, {
3947         'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
3948         'only_matching': True,
3949     }, {
3950         'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
3951         'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3952         'info_dict': {
3953             'title': '29C3: Not my department',
3954             'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
3955             'uploader': 'Christiaan008',
3956             'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
3957             'description': 'md5:a14dc1a8ef8307a9807fe136a0660268',
3958         },
3959         'playlist_count': 96,
3960     }, {
3961         'note': 'Large playlist',
3962         'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
3963         'info_dict': {
3964             'title': 'Uploads from Cauchemar',
3965             'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
3966             'uploader': 'Cauchemar',
3967             'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
3968         },
3969         'playlist_mincount': 1123,
3970     }, {
3971         'note': 'even larger playlist, 8832 videos',
3972         'url': 'http://www.youtube.com/user/NASAgovVideo/videos',
3973         'only_matching': True,
3974     }, {
3975         'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
3976         'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
3977         'info_dict': {
3978             'title': 'Uploads from Interstellar Movie',
3979             'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
3980             'uploader': 'Interstellar Movie',
3981             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
3982         },
3983         'playlist_mincount': 21,
3984     }, {
3985         'note': 'Playlist with "show unavailable videos" button',
3986         'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
3987         'info_dict': {
3988             'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
3989             'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
3990             'uploader': 'Phim Siêu Nhân Nhật Bản',
3991             'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
3992         },
3993         'playlist_mincount': 200,
3994     }, {
3995         'note': 'Playlist with unavailable videos in page 7',
3996         'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w',
3997         'info_dict': {
3998             'title': 'Uploads from BlankTV',
3999             'id': 'UU8l9frL61Yl5KFOl87nIm2w',
4000             'uploader': 'BlankTV',
4001             'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w',
4002         },
4003         'playlist_mincount': 1000,
4004     }, {
4005         'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844',
4006         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4007         'info_dict': {
4008             'title': 'Data Analysis with Dr Mike Pound',
4009             'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
4010             'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA',
4011             'uploader': 'Computerphile',
4012             'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487',
4013         },
4014         'playlist_mincount': 11,
4015     }, {
4016         'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
4017         'only_matching': True,
4018     }, {
4019         'note': 'Playlist URL that does not actually serve a playlist',
4020         'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
4021         'info_dict': {
4022             'id': 'FqZTN594JQw',
4023             'ext': 'webm',
4024             'title': "Smiley's People 01 detective, Adventure Series, Action",
4025             'uploader': 'STREEM',
4026             'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
4027             'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
4028             'upload_date': '20150526',
4029             'license': 'Standard YouTube License',
4030             'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
4031             'categories': ['People & Blogs'],
4032             'tags': list,
4033             'view_count': int,
4034             'like_count': int,
4035             'dislike_count': int,
4036         },
4037         'params': {
4038             'skip_download': True,
4039         },
4040         'skip': 'This video is not available.',
4041         'add_ie': [YoutubeIE.ie_key()],
4042     }, {
4043         'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
4044         'only_matching': True,
4045     }, {
4046         'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
4047         'only_matching': True,
4048     }, {
4049         'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
4050         'info_dict': {
4051             'id': '3yImotZU3tw',  # This will keep changing
4052             'ext': 'mp4',
4053             'title': compat_str,
4054             'uploader': 'Sky News',
4055             'uploader_id': 'skynews',
4056             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews',
4057             'upload_date': r're:\d{8}',
4058             'description': compat_str,
4059             'categories': ['News & Politics'],
4060             'tags': list,
4061             'like_count': int,
4062             'dislike_count': int,
4063         },
4064         'params': {
4065             'skip_download': True,
4066         },
4067         'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '],
4068     }, {
4069         'url': 'https://www.youtube.com/user/TheYoungTurks/live',
4070         'info_dict': {
4071             'id': 'a48o2S1cPoo',
4072             'ext': 'mp4',
4073             'title': 'The Young Turks - Live Main Show',
4074             'uploader': 'The Young Turks',
4075             'uploader_id': 'TheYoungTurks',
4076             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
4077             'upload_date': '20150715',
4078             'license': 'Standard YouTube License',
4079             'description': 'md5:438179573adcdff3c97ebb1ee632b891',
4080             'categories': ['News & Politics'],
4081             'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
4082             'like_count': int,
4083             'dislike_count': int,
4084         },
4085         'params': {
4086             'skip_download': True,
4087         },
4088         'only_matching': True,
4089     }, {
4090         'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
4091         'only_matching': True,
4092     }, {
4093         'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
4094         'only_matching': True,
4095     }, {
4096         'note': 'A channel that is not live. Should raise error',
4097         'url': 'https://www.youtube.com/user/numberphile/live',
4098         'only_matching': True,
4099     }, {
4100         'url': 'https://www.youtube.com/feed/trending',
4101         'only_matching': True,
4102     }, {
4103         'url': 'https://www.youtube.com/feed/library',
4104         'only_matching': True,
4105     }, {
4106         'url': 'https://www.youtube.com/feed/history',
4107         'only_matching': True,
4108     }, {
4109         'url': 'https://www.youtube.com/feed/subscriptions',
4110         'only_matching': True,
4111     }, {
4112         'url': 'https://www.youtube.com/feed/watch_later',
4113         'only_matching': True,
4114     }, {
4115         'note': 'Recommended - redirects to home page.',
4116         'url': 'https://www.youtube.com/feed/recommended',
4117         'only_matching': True,
4118     }, {
4119         'note': 'inline playlist with not always working continuations',
4120         'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
4121         'only_matching': True,
4122     }, {
4123         'url': 'https://www.youtube.com/course',
4124         'only_matching': True,
4125     }, {
4126         'url': 'https://www.youtube.com/zsecurity',
4127         'only_matching': True,
4128     }, {
4129         'url': 'http://www.youtube.com/NASAgovVideo/videos',
4130         'only_matching': True,
4131     }, {
4132         'url': 'https://www.youtube.com/TheYoungTurks/live',
4133         'only_matching': True,
4134     }, {
4135         'url': 'https://www.youtube.com/hashtag/cctv9',
4136         'info_dict': {
4137             'id': 'cctv9',
4138             'title': '#cctv9',
4139         },
4140         'playlist_mincount': 350,
4141     }, {
4142         'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU',
4143         'only_matching': True,
4144     }, {
4145         'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist',
4146         'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4147         'only_matching': True
4148     }, {
4149         'note': '/browse/ should redirect to /channel/',
4150         'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng',
4151         'only_matching': True
4152     }, {
4153         'note': 'VLPL, should redirect to playlist?list=PL...',
4154         'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4155         'info_dict': {
4156             'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
4157             'uploader': 'NoCopyrightSounds',
4158             'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
4159             'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
4160             'title': 'NCS Releases',
4161         },
4162         'playlist_mincount': 166,
4163     }, {
4164         'note': 'Topic, should redirect to playlist?list=UU...',
4165         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4166         'info_dict': {
4167             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4168             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4169             'title': 'Uploads from Royalty Free Music - Topic',
4170             'uploader': 'Royalty Free Music - Topic',
4171         },
4172         'expected_warnings': [
4173             'A channel/user page was given',
4174             'The URL does not have a videos tab',
4175         ],
4176         'playlist_mincount': 101,
4177     }, {
4178         'note': 'Topic without a UU playlist',
4179         'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
4180         'info_dict': {
4181             'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
4182             'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
4183         },
4184         'expected_warnings': [
4185             'A channel/user page was given',
4186             'The URL does not have a videos tab',
4187             'Falling back to channel URL',
4188         ],
4189         'playlist_mincount': 9,
4190     }, {
4191         'note': 'Youtube music Album',
4192         'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE',
4193         'info_dict': {
4194             'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
4195             'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
4196         },
4197         'playlist_count': 50,
4198     }, {
4199         'note': 'unlisted single video playlist',
4200         'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4201         'info_dict': {
4202             'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q',
4203             'uploader': 'colethedj',
4204             'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf',
4205             'title': 'yt-dlp unlisted playlist test',
4206             'availability': 'unlisted'
4207         },
4208         'playlist_count': 1,
4209     }, {
4210         'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
4211         'url': 'https://www.youtube.com/feed/recommended',
4212         'info_dict': {
4213             'id': 'recommended',
4214             'title': 'recommended',
4215         },
4216         'playlist_mincount': 50,
4217         'params': {
4218             'skip_download': True,
4219             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4220         },
4221     }, {
4222         'note': 'API Fallback: /videos tab, sorted by oldest first',
4223         'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid',
4224         'info_dict': {
4225             'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4226             'title': 'Cody\'sLab - Videos',
4227             'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa',
4228             'uploader': 'Cody\'sLab',
4229             'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw',
4230         },
4231         'playlist_mincount': 650,
4232         'params': {
4233             'skip_download': True,
4234             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4235         },
4236     }, {
4237         'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
4238         'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
4239         'info_dict': {
4240             'id': 'UU9ALqqC4aIeG5iDs7i90Bfw',
4241             'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw',
4242             'title': 'Uploads from Royalty Free Music - Topic',
4243             'uploader': 'Royalty Free Music - Topic',
4244         },
4245         'expected_warnings': [
4246             'A channel/user page was given',
4247             'The URL does not have a videos tab',
4248         ],
4249         'playlist_mincount': 101,
4250         'params': {
4251             'skip_download': True,
4252             'extractor_args': {'youtubetab': {'skip': ['webpage']}}
4253         },
4254     }]
4255
4256     @classmethod
4257     def suitable(cls, url):
4258         return False if YoutubeIE.suitable(url) else super(
4259             YoutubeTabIE, cls).suitable(url)
4260
4261     def _real_extract(self, url):
4262         url, smuggled_data = unsmuggle_url(url, {})
4263         if self.is_music_url(url):
4264             smuggled_data['is_music_url'] = True
4265         info_dict = self.__real_extract(url, smuggled_data)
4266         if info_dict.get('entries'):
4267             info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data)
4268         return info_dict
4269
4270     _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL)
4271
4272     def __real_extract(self, url, smuggled_data):
4273         item_id = self._match_id(url)
4274         url = compat_urlparse.urlunparse(
4275             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
4276         compat_opts = self.get_param('compat_opts', [])
4277
4278         def get_mobj(url):
4279             mobj = self._url_re.match(url).groupdict()
4280             mobj.update((k, '') for k, v in mobj.items() if v is None)
4281             return mobj
4282
4283         mobj = get_mobj(url)
4284         # Youtube returns incomplete data if tabname is not lower case
4285         pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
4286         if is_channel:
4287             if smuggled_data.get('is_music_url'):
4288                 if item_id[:2] == 'VL':
4289                     # Youtube music VL channels have an equivalent playlist
4290                     item_id = item_id[2:]
4291                     pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False
4292                 elif item_id[:2] == 'MP':
4293                     # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist
4294                     mdata = self._extract_tab_endpoint(
4295                         'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music')
4296                     murl = traverse_obj(
4297                         mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str)
4298                     if not murl:
4299                         raise ExtractorError('Failed to resolve album to playlist.')
4300                     return self.url_result(murl, ie=YoutubeTabIE.ie_key())
4301                 elif mobj['channel_type'] == 'browse':
4302                     # Youtube music /browse/ should be changed to /channel/
4303                     pre = 'https://www.youtube.com/channel/%s' % item_id
4304         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
4305             # Home URLs should redirect to /videos/
4306             self.report_warning(
4307                 'A channel/user page was given. All the channel\'s videos will be downloaded. '
4308                 'To download only the videos in the home page, add a "/featured" to the URL')
4309             tab = '/videos'
4310
4311         url = ''.join((pre, tab, post))
4312         mobj = get_mobj(url)
4313
4314         # Handle both video/playlist URLs
4315         qs = parse_qs(url)
4316         video_id = qs.get('v', [None])[0]
4317         playlist_id = qs.get('list', [None])[0]
4318
4319         if not video_id and mobj['not_channel'].startswith('watch'):
4320             if not playlist_id:
4321                 # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable
4322                 raise ExtractorError('Unable to recognize tab page')
4323             # Common mistake: https://www.youtube.com/watch?list=playlist_id
4324             self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id)
4325             url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
4326             mobj = get_mobj(url)
4327
4328         if video_id and playlist_id:
4329             if self.get_param('noplaylist'):
4330                 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
4331                 return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4332             self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id))
4333
4334         data, ytcfg = self._extract_data(url, item_id)
4335
4336         tabs = try_get(
4337             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4338         if tabs:
4339             selected_tab = self._extract_selected_tab(tabs)
4340             tab_name = selected_tab.get('title', '')
4341             if 'no-youtube-channel-redirect' not in compat_opts:
4342                 if mobj['tab'] == '/live':
4343                     # Live tab should have redirected to the video
4344                     raise ExtractorError('The channel is not currently live', expected=True)
4345                 if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
4346                     if not mobj['not_channel'] and item_id[:2] == 'UC':
4347                         # Topic channels don't have /videos. Use the equivalent playlist instead
4348                         self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:]))
4349                         pl_id = 'UU%s' % item_id[2:]
4350                         pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post'])
4351                         try:
4352                             data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url
4353                         except ExtractorError:
4354                             self.report_warning('The playlist gave error. Falling back to channel URL')
4355                     else:
4356                         self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name))
4357
4358         self.write_debug('Final URL: %s' % url)
4359
4360         # YouTube sometimes provides a button to reload playlist with unavailable videos.
4361         if 'no-youtube-unavailable-videos' not in compat_opts:
4362             data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
4363         self._extract_and_report_alerts(data, only_once=True)
4364         tabs = try_get(
4365             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
4366         if tabs:
4367             return self._extract_from_tabs(item_id, ytcfg, data, tabs)
4368
4369         playlist = try_get(
4370             data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
4371         if playlist:
4372             return self._extract_from_playlist(item_id, url, data, playlist, ytcfg)
4373
4374         video_id = try_get(
4375             data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'],
4376             compat_str) or video_id
4377         if video_id:
4378             if mobj['tab'] != '/live':  # live tab is expected to redirect to video
4379                 self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id)
4380             return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id)
4381
4382         raise ExtractorError('Unable to recognize tab page')
4383
4384
4385 class YoutubePlaylistIE(InfoExtractor):
4386     IE_DESC = 'YouTube playlists'
4387     _VALID_URL = r'''(?x)(?:
4388                         (?:https?://)?
4389                         (?:\w+\.)?
4390                         (?:
4391                             (?:
4392                                 youtube(?:kids)?\.com|
4393                                 %(invidious)s
4394                             )
4395                             /.*?\?.*?\blist=
4396                         )?
4397                         (?P<id>%(playlist_id)s)
4398                      )''' % {
4399         'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,
4400         'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
4401     }
4402     IE_NAME = 'youtube:playlist'
4403     _TESTS = [{
4404         'note': 'issue #673',
4405         'url': 'PLBB231211A4F62143',
4406         'info_dict': {
4407             'title': '[OLD]Team Fortress 2 (Class-based LP)',
4408             'id': 'PLBB231211A4F62143',
4409             'uploader': 'Wickydoo',
4410             'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
4411             'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
4412         },
4413         'playlist_mincount': 29,
4414     }, {
4415         'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4416         'info_dict': {
4417             'title': 'YDL_safe_search',
4418             'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
4419         },
4420         'playlist_count': 2,
4421         'skip': 'This playlist is private',
4422     }, {
4423         'note': 'embedded',
4424         'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4425         'playlist_count': 4,
4426         'info_dict': {
4427             'title': 'JODA15',
4428             'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
4429             'uploader': 'milan',
4430             'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
4431         }
4432     }, {
4433         'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4434         'playlist_mincount': 654,
4435         'info_dict': {
4436             'title': '2018 Chinese New Singles (11/6 updated)',
4437             'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
4438             'uploader': 'LBK',
4439             'uploader_id': 'UC21nz3_MesPLqtDqwdvnoxA',
4440             'description': 'md5:da521864744d60a198e3a88af4db0d9d',
4441         }
4442     }, {
4443         'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
4444         'only_matching': True,
4445     }, {
4446         # music album playlist
4447         'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
4448         'only_matching': True,
4449     }]
4450
4451     @classmethod
4452     def suitable(cls, url):
4453         if YoutubeTabIE.suitable(url):
4454             return False
4455         from ..utils import parse_qs
4456         qs = parse_qs(url)
4457         if qs.get('v', [None])[0]:
4458             return False
4459         return super(YoutubePlaylistIE, cls).suitable(url)
4460
4461     def _real_extract(self, url):
4462         playlist_id = self._match_id(url)
4463         is_music_url = YoutubeBaseInfoExtractor.is_music_url(url)
4464         url = update_url_query(
4465             'https://www.youtube.com/playlist',
4466             parse_qs(url) or {'list': playlist_id})
4467         if is_music_url:
4468             url = smuggle_url(url, {'is_music_url': True})
4469         return self.url_result(url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4470
4471
4472 class YoutubeYtBeIE(InfoExtractor):
4473     IE_DESC = 'youtu.be'
4474     _VALID_URL = r'https?://youtu\.be/(?P<id>[0-9A-Za-z_-]{11})/*?.*?\blist=(?P<playlist_id>%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
4475     _TESTS = [{
4476         'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
4477         'info_dict': {
4478             'id': 'yeWKywCrFtk',
4479             'ext': 'mp4',
4480             'title': 'Small Scale Baler and Braiding Rugs',
4481             'uploader': 'Backus-Page House Museum',
4482             'uploader_id': 'backuspagemuseum',
4483             'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
4484             'upload_date': '20161008',
4485             'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
4486             'categories': ['Nonprofits & Activism'],
4487             'tags': list,
4488             'like_count': int,
4489             'dislike_count': int,
4490         },
4491         'params': {
4492             'noplaylist': True,
4493             'skip_download': True,
4494         },
4495     }, {
4496         'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
4497         'only_matching': True,
4498     }]
4499
4500     def _real_extract(self, url):
4501         mobj = self._match_valid_url(url)
4502         video_id = mobj.group('id')
4503         playlist_id = mobj.group('playlist_id')
4504         return self.url_result(
4505             update_url_query('https://www.youtube.com/watch', {
4506                 'v': video_id,
4507                 'list': playlist_id,
4508                 'feature': 'youtu.be',
4509             }), ie=YoutubeTabIE.ie_key(), video_id=playlist_id)
4510
4511
4512 class YoutubeYtUserIE(InfoExtractor):
4513     IE_DESC = 'YouTube user videos; "ytuser:" prefix'
4514     _VALID_URL = r'ytuser:(?P<id>.+)'
4515     _TESTS = [{
4516         'url': 'ytuser:phihag',
4517         'only_matching': True,
4518     }]
4519
4520     def _real_extract(self, url):
4521         user_id = self._match_id(url)
4522         return self.url_result(
4523             'https://www.youtube.com/user/%s/videos' % user_id,
4524             ie=YoutubeTabIE.ie_key(), video_id=user_id)
4525
4526
4527 class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
4528     IE_NAME = 'youtube:favorites'
4529     IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
4530     _VALID_URL = r':ytfav(?:ou?rite)?s?'
4531     _LOGIN_REQUIRED = True
4532     _TESTS = [{
4533         'url': ':ytfav',
4534         'only_matching': True,
4535     }, {
4536         'url': ':ytfavorites',
4537         'only_matching': True,
4538     }]
4539
4540     def _real_extract(self, url):
4541         return self.url_result(
4542             'https://www.youtube.com/playlist?list=LL',
4543             ie=YoutubeTabIE.ie_key())
4544
4545
4546 class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4547     IE_DESC = 'YouTube search'
4548     IE_NAME = 'youtube:search'
4549     _SEARCH_KEY = 'ytsearch'
4550     _SEARCH_PARAMS = 'EgIQAQ%3D%3D'  # Videos only
4551     _TESTS = []
4552
4553
4554 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
4555     IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
4556     _SEARCH_KEY = 'ytsearchdate'
4557     IE_DESC = 'YouTube search, newest videos first'
4558     _SEARCH_PARAMS = 'CAISAhAB'  # Videos only, sorted by date
4559
4560
4561 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
4562     IE_DESC = 'YouTube search URLs with sorting and filter support'
4563     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
4564     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
4565     _TESTS = [{
4566         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
4567         'playlist_mincount': 5,
4568         'info_dict': {
4569             'id': 'youtube-dl test video',
4570             'title': 'youtube-dl test video',
4571         }
4572     }, {
4573         'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
4574         'playlist_mincount': 5,
4575         'info_dict': {
4576             'id': 'python',
4577             'title': 'python',
4578         }
4579
4580     }, {
4581         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
4582         'only_matching': True,
4583     }]
4584
4585     def _real_extract(self, url):
4586         qs = parse_qs(url)
4587         query = (qs.get('search_query') or qs.get('q'))[0]
4588         return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
4589
4590
4591 class YoutubeFeedsInfoExtractor(YoutubeTabIE):
4592     """
4593     Base class for feed extractors
4594     Subclasses must define the _FEED_NAME property.
4595     """
4596     _LOGIN_REQUIRED = True
4597     _TESTS = []
4598
4599     @property
4600     def IE_NAME(self):
4601         return 'youtube:%s' % self._FEED_NAME
4602
4603     def _real_extract(self, url):
4604         return self.url_result(
4605             'https://www.youtube.com/feed/%s' % self._FEED_NAME,
4606             ie=YoutubeTabIE.ie_key())
4607
4608
4609 class YoutubeWatchLaterIE(InfoExtractor):
4610     IE_NAME = 'youtube:watchlater'
4611     IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
4612     _VALID_URL = r':ytwatchlater'
4613     _TESTS = [{
4614         'url': ':ytwatchlater',
4615         'only_matching': True,
4616     }]
4617
4618     def _real_extract(self, url):
4619         return self.url_result(
4620             'https://www.youtube.com/playlist?list=WL', ie=YoutubeTabIE.ie_key())
4621
4622
4623 class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
4624     IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
4625     _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
4626     _FEED_NAME = 'recommended'
4627     _LOGIN_REQUIRED = False
4628     _TESTS = [{
4629         'url': ':ytrec',
4630         'only_matching': True,
4631     }, {
4632         'url': ':ytrecommended',
4633         'only_matching': True,
4634     }, {
4635         'url': 'https://youtube.com',
4636         'only_matching': True,
4637     }]
4638
4639
4640 class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
4641     IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
4642     _VALID_URL = r':ytsub(?:scription)?s?'
4643     _FEED_NAME = 'subscriptions'
4644     _TESTS = [{
4645         'url': ':ytsubs',
4646         'only_matching': True,
4647     }, {
4648         'url': ':ytsubscriptions',
4649         'only_matching': True,
4650     }]
4651
4652
4653 class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
4654     IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
4655     _VALID_URL = r':ythis(?:tory)?'
4656     _FEED_NAME = 'history'
4657     _TESTS = [{
4658         'url': ':ythistory',
4659         'only_matching': True,
4660     }]
4661
4662
4663 class YoutubeTruncatedURLIE(InfoExtractor):
4664     IE_NAME = 'youtube:truncated_url'
4665     IE_DESC = False  # Do not list
4666     _VALID_URL = r'''(?x)
4667         (?:https?://)?
4668         (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
4669         (?:watch\?(?:
4670             feature=[a-z_]+|
4671             annotation_id=annotation_[^&]+|
4672             x-yt-cl=[0-9]+|
4673             hl=[^&]*|
4674             t=[0-9]+
4675         )?
4676         |
4677             attribution_link\?a=[^&]+
4678         )
4679         $
4680     '''
4681
4682     _TESTS = [{
4683         'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
4684         'only_matching': True,
4685     }, {
4686         'url': 'https://www.youtube.com/watch?',
4687         'only_matching': True,
4688     }, {
4689         'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
4690         'only_matching': True,
4691     }, {
4692         'url': 'https://www.youtube.com/watch?feature=foo',
4693         'only_matching': True,
4694     }, {
4695         'url': 'https://www.youtube.com/watch?hl=en-GB',
4696         'only_matching': True,
4697     }, {
4698         'url': 'https://www.youtube.com/watch?t=2372',
4699         'only_matching': True,
4700     }]
4701
4702     def _real_extract(self, url):
4703         raise ExtractorError(
4704             'Did you forget to quote the URL? Remember that & is a meta '
4705             'character in most shells, so you want to put the URL in quotes, '
4706             'like  youtube-dl '
4707             '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
4708             ' or simply  youtube-dl BaW_jenozKc  .',
4709             expected=True)
4710
4711
4712 class YoutubeClipIE(InfoExtractor):
4713     IE_NAME = 'youtube:clip'
4714     IE_DESC = False  # Do not list
4715     _VALID_URL = r'https?://(?:www\.)?youtube\.com/clip/'
4716
4717     def _real_extract(self, url):
4718         self.report_warning('YouTube clips are not currently supported. The entire video will be downloaded instead')
4719         return self.url_result(url, 'Generic')
4720
4721
4722 class YoutubeTruncatedIDIE(InfoExtractor):
4723     IE_NAME = 'youtube:truncated_id'
4724     IE_DESC = False  # Do not list
4725     _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
4726
4727     _TESTS = [{
4728         'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
4729         'only_matching': True,
4730     }]
4731
4732     def _real_extract(self, url):
4733         video_id = self._match_id(url)
4734         raise ExtractorError(
4735             'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
4736             expected=True)